reduce.cl
1019 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#define SUM(a, b) (a + b)
#define MUL(a, b) (a * b)
#define MIN(a, b) min(a, b)
#define MAX(a, b) max(a, b)
__kernel
void greduce(
__global float* buffer,
__global float* result,
__local float* scratch,
__const int length) {
int global_index = get_global_id(0);
int local_index = get_local_id(0);
// Load data into local memory
if (global_index < length) {
scratch[local_index] = buffer[global_index];
} else {
// Infinity is the identity element for the sum operation
scratch[local_index] = NEUTRAL;
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int offset = 1; offset < get_local_size(0); offset <<= 1) {
int mask = (offset << 1) - 1;
if ((local_index & mask) == 0) {
float other = scratch[local_index + offset];
float mine = scratch[local_index];
scratch[local_index] = OP(mine, other);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (local_index == 0) {
result[get_group_id(0)] = scratch[0];
}
}