// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \ // RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \ // RUN: -o - | FileCheck %s #define __device__ __attribute__((device)) typedef __attribute__((address_space(3))) float *LP; // CHECK-LABEL: define spir_func void @_Z22test_ds_atomic_add_f32Pff( // CHECK-SAME: ptr addrspace(4) noundef [[ADDR:%.*]], float noundef [[VAL:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 // CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: [[RTN:%.*]] = alloca ptr addrspace(4), align 8 // CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[ADDR_ADDR]] to ptr addrspace(4) // CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr [[VAL_ADDR]] to ptr addrspace(4) // CHECK-NEXT: [[RTN_ASCAST:%.*]] = addrspacecast ptr [[RTN]] to ptr addrspace(4) // CHECK-NEXT: store ptr addrspace(4) [[ADDR]], ptr addrspace(4) [[ADDR_ADDR_ASCAST]], align 8 // CHECK-NEXT: store float [[VAL]], ptr addrspace(4) [[VAL_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ADDR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3) // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[VAL_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[TMP2]] monotonic, align 4 // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[RTN_ASCAST]], align 8 // CHECK-NEXT: store float [[TMP3]], ptr addrspace(4) [[TMP4]], align 4 // CHECK-NEXT: ret void // __device__ void test_ds_atomic_add_f32(float *addr, float val) { float *rtn; *rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0); }