reduce.cl 1019 Bytes
#define SUM(a, b) (a + b)
#define MUL(a, b) (a * b)
#define MIN(a, b) min(a, b)
#define MAX(a, b) max(a, b)

__kernel
void greduce(
            __global float* buffer,
            __global float* result,
            __local float* scratch,
            __const int length) {

  int global_index = get_global_id(0);
  int local_index = get_local_id(0);
  
  // Load data into local memory
  if (global_index < length) {
    scratch[local_index] = buffer[global_index];
  } else {
    // Infinity is the identity element for the sum operation
    scratch[local_index] = NEUTRAL;
  }
  
  barrier(CLK_LOCAL_MEM_FENCE);
  
  for(int offset = 1; offset < get_local_size(0); offset <<= 1) {
    int mask = (offset << 1) - 1;
    if ((local_index & mask) == 0) {
      float other = scratch[local_index + offset];
      float mine = scratch[local_index];
      scratch[local_index] = OP(mine, other);
    }
    
    barrier(CLK_LOCAL_MEM_FENCE);
  }
  
  if (local_index == 0) {
    result[get_group_id(0)] = scratch[0];
  }
}