Blame view
kernels/plugins/reduce.cl
1019 Bytes
1b1e928cc initial import of... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
#define SUM(a, b) (a + b) #define MUL(a, b) (a * b) #define MIN(a, b) min(a, b) #define MAX(a, b) max(a, b) __kernel void greduce( __global float* buffer, __global float* result, __local float* scratch, __const int length) { int global_index = get_global_id(0); int local_index = get_local_id(0); // Load data into local memory if (global_index < length) { scratch[local_index] = buffer[global_index]; } else { // Infinity is the identity element for the sum operation scratch[local_index] = NEUTRAL; } barrier(CLK_LOCAL_MEM_FENCE); for(int offset = 1; offset < get_local_size(0); offset <<= 1) { int mask = (offset << 1) - 1; if ((local_index & mask) == 0) { float other = scratch[local_index + offset]; float mine = scratch[local_index]; scratch[local_index] = OP(mine, other); } barrier(CLK_LOCAL_MEM_FENCE); } if (local_index == 0) { result[get_group_id(0)] = scratch[0]; } } |