Blame view

kernels/plugins/reduce.cl 1019 Bytes
1b1e928cc   glaville   initial import of...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  #define SUM(a, b) (a + b)
  #define MUL(a, b) (a * b)
  #define MIN(a, b) min(a, b)
  #define MAX(a, b) max(a, b)
  
  __kernel
  void greduce(
              __global float* buffer,
              __global float* result,
              __local float* scratch,
              __const int length) {
  
    int global_index = get_global_id(0);
    int local_index = get_local_id(0);
    
    // Load data into local memory
    if (global_index < length) {
      scratch[local_index] = buffer[global_index];
    } else {
      // Infinity is the identity element for the sum operation
      scratch[local_index] = NEUTRAL;
    }
    
    barrier(CLK_LOCAL_MEM_FENCE);
    
    for(int offset = 1; offset < get_local_size(0); offset <<= 1) {
      int mask = (offset << 1) - 1;
      if ((local_index & mask) == 0) {
        float other = scratch[local_index + offset];
        float mine = scratch[local_index];
        scratch[local_index] = OP(mine, other);
      }
      
      barrier(CLK_LOCAL_MEM_FENCE);
    }
    
    if (local_index == 0) {
      result[get_group_id(0)] = scratch[0];
    }
  }