aboutsummaryrefslogtreecommitdiff
path: root/offload/unittests/OffloadAPI/device_code/localmem_reduction.c
blob: 8a9a46cfb6a112cb85e9d9fb83989a4d2c25fdcc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#include <gpuintrin.h>
#include <stdint.h>

extern __gpu_local uint32_t shared_mem[];

__gpu_kernel void localmem_reduction(uint32_t *out) {
  shared_mem[__gpu_thread_id(0)] = 2;

  __gpu_sync_threads();

  if (__gpu_thread_id(0) == 0) {
    out[__gpu_block_id(0)] = 0;
    for (uint32_t i = 0; i < __gpu_num_threads(0); i++)
      out[__gpu_block_id(0)] += shared_mem[i];
  }
}