; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals ; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC target triple = "amdgcn-amd-amdhsa" %struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } @G = internal addrspace(3) global i32 undef, align 4 @H = internal addrspace(3) global i32 undef, align 4 @X = internal addrspace(3) global i32 undef, align 4 @QA1 = internal addrspace(3) global i32 undef, align 4 @QB1 = internal addrspace(3) global i32 undef, align 4 @QC1 = internal addrspace(3) global i32 undef, align 4 @QD1 = internal addrspace(3) global i32 undef, align 4 @QA2 = internal addrspace(3) global i32 undef, align 4 @QB2 = internal addrspace(3) global i32 undef, align 4 @QC2 = internal addrspace(3) global i32 undef, align 4 @QD2 = internal addrspace(3) global i32 undef, align 4 @QA3 = internal addrspace(3) global i32 undef, align 4 @QB3 = internal addrspace(3) global i32 undef, align 4 @QC3 = internal addrspace(3) global i32 undef, align 4 @QD3 = internal addrspace(3) global i32 undef, align 4 @UAA1 = internal addrspace(3) global i32 undef, align 4 @UAA2 = internal addrspace(3) global i32 undef, align 4 @UAA3 = internal addrspace(3) global i32 undef, align 4 @UANA1 = internal addrspace(3) global i32 undef, align 4 @str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1 @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } ; Make sure we do not delete the stores to @G without also replacing the load with `1`. ;. ; CHECK: @G = internal addrspace(3) global i32 undef, align 4 ; CHECK: @H = internal addrspace(3) global i32 undef, align 4 ; CHECK: @X = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QA1 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QB1 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QC1 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QD1 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QA2 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QB2 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QC2 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QD2 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QA3 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QB3 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QC3 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @QD3 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @UAA1 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @UAA2 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @UAA3 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @UANA1 = internal addrspace(3) global i32 undef, align 4 ; CHECK: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 ; CHECK: @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null } ;. define amdgpu_kernel void @kernel(ptr %dyn) "kernel" { ; ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel ; TUNIT-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]]) ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TUNIT: if.then: ; TUNIT-NEXT: br label [[IF_MERGE:%.*]] ; TUNIT: if.else: ; TUNIT-NEXT: call void @barrier() #[[ATTR6:[0-9]+]] ; TUNIT-NEXT: call void @use1(i32 1) #[[ATTR7:[0-9]+]] ; TUNIT-NEXT: call void @llvm.assume(i1 true) ; TUNIT-NEXT: call void @barrier() #[[ATTR6]] ; TUNIT-NEXT: br label [[IF_MERGE]] ; TUNIT: if.merge: ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] ; TUNIT: if.then2: ; TUNIT-NEXT: call void @barrier() #[[ATTR6]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: ; TUNIT-NEXT: call void @__kmpc_target_deinit() ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse ; CGSCC-LABEL: define {{[^@]+}}@kernel ; CGSCC-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]]) ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CGSCC: if.then: ; CGSCC-NEXT: br label [[IF_MERGE:%.*]] ; CGSCC: if.else: ; CGSCC-NEXT: call void @barrier() #[[ATTR6:[0-9]+]] ; CGSCC-NEXT: call void @use1(i32 1) #[[ATTR6]] ; CGSCC-NEXT: call void @llvm.assume(i1 true) ; CGSCC-NEXT: call void @barrier() #[[ATTR6]] ; CGSCC-NEXT: br label [[IF_MERGE]] ; CGSCC: if.merge: ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] ; CGSCC: if.then2: ; CGSCC-NEXT: call void @barrier() #[[ATTR6]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: ; CGSCC-NEXT: call void @__kmpc_target_deinit() ; CGSCC-NEXT: ret void ; %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr %dyn) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: store i32 1, ptr addrspace(3) @G store i32 2, ptr addrspace(3) @H br label %if.merge if.else: call void @barrier(); %l = load i32, ptr addrspace(3) @G call void @use1(i32 %l) %hv = load i32, ptr addrspace(3) @H %hc = icmp eq i32 %hv, 2 call void @llvm.assume(i1 %hc) call void @barrier(); br label %if.merge if.merge: %hreload = load i32, ptr addrspace(3) @H call void @use1(i32 %hreload) br i1 %cmp, label %if.then2, label %if.end if.then2: store i32 2, ptr addrspace(3) @G call void @barrier(); br label %if.end if.end: call void @__kmpc_target_deinit() ret void } define void @test_assume() { ; CHECK-LABEL: define {{[^@]+}}@test_assume() { ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr addrspacecast (ptr addrspace(4) @str to ptr), null ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret void ; %cmp = icmp ne ptr addrspacecast (ptr addrspace(4) @str to ptr), null call void @llvm.assume(i1 %cmp) ret void } ; We can't ignore the sync, hence this might store 2 into %p define amdgpu_kernel void @kernel2(ptr %p) "kernel" { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4 ; CHECK-NEXT: call void @sync() ; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 ; CHECK-NEXT: store i32 2, ptr addrspace(3) @X, align 4 ; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4 ; CHECK-NEXT: ret void ; store i32 1, ptr addrspace(3) @X call void @sync() %v = load i32, ptr addrspace(3) @X store i32 2, ptr addrspace(3) @X store i32 %v, ptr %p ret void } ; We can't ignore the sync, hence this might store 2 into %p define amdgpu_kernel void @kernel3(ptr %p) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel3 ; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4 ; TUNIT-NEXT: call void @sync_def.internalized() ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @X, align 4 ; TUNIT-NEXT: store i32 [[V]], ptr [[P]], align 4 ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel3 ; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: store i32 1, ptr addrspace(3) @X, align 4 ; CGSCC-NEXT: call void @sync_def() ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4 ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @X, align 4 ; CGSCC-NEXT: store i32 [[V]], ptr [[P]], align 4 ; CGSCC-NEXT: ret void ; store i32 1, ptr addrspace(3) @X call void @sync_def() %v = load i32, ptr addrspace(3) @X store i32 2, ptr addrspace(3) @X store i32 %v, ptr %p ret void } define void @sync_def() { ; CHECK-LABEL: define {{[^@]+}}@sync_def() { ; CHECK-NEXT: call void @sync() ; CHECK-NEXT: ret void ; call void @sync() ret void } define amdgpu_kernel void @kernel4a1(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4a1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QA1, align 4 ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel4a1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QA1, align 4 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QA1, align 4 ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: ret void ; store i32 0, ptr addrspace(3) @QA1 br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @QA1 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QA1 call void @sync(); ret void } ; We should not replace the load or delete the second store. define amdgpu_kernel void @kernel4b1(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4b1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4 ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel4b1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB1, align 4 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB1, align 4 ; CGSCC-NEXT: ret void ; store i32 0, ptr addrspace(3) @QB1 br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @QB1 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QB1 ret void } define amdgpu_kernel void @kernel4a2(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4a2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel4a2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @QA2 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QA2 call void @sync(); ret void } ; FIXME: We should not replace the load with undef. define amdgpu_kernel void @kernel4b2(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4b2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel4b2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @QB2 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QB2 ret void } define amdgpu_kernel void @kernel4a3(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4a3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QA3, align 4 ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel4a3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QA3, align 4 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QA3, align 4 ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: ret void ; store i32 0, ptr addrspace(3) @QA3 br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @QA3 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QA3 call void @sync(); call void @sync(); call void @sync(); call void @sync(); ret void } ; The load of QB3 should not be simplified to 0. define amdgpu_kernel void @kernel4b3(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel4b3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QB3, align 4 ; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]] ; TUNIT-NEXT: call void @use1(i32 1) #[[ATTR7]] ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: call void @use1(i32 3) #[[ATTR7]] ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel4b3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QB3, align 4 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QB3, align 4 ; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]] ; CGSCC-NEXT: call void @use1(i32 1) #[[ATTR6]] ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: call void @use1(i32 3) #[[ATTR6]] ; CGSCC-NEXT: ret void ; store i32 0, ptr addrspace(3) @QB3 br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @QB3 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QB3 call void @use1(i32 0) call void @use1(i32 1) call void @use1(i32 2) call void @use1(i32 3) ret void } define amdgpu_kernel void @kernel4c1(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4c1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @use1(i32 0) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse ; CGSCC-LABEL: define {{[^@]+}}@kernel4c1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @use1(i32 0) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: ret void ; store i32 0, ptr addrspace(3) @QC1 br i1 %c, label %S, label %L L: call void @barrier(); %v = load i32, ptr addrspace(3) @QC1 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QC1 call void @barrier(); ret void } ; We should not replace the load or delete the second store. define amdgpu_kernel void @kernel4d1(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4d1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: store i32 0, ptr addrspace(3) @QD1, align 4 ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @barrier() #[[ATTR7]] ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4 ; TUNIT-NEXT: call void @use1(i32 [[V]]) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @QD1, align 4 ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse ; CGSCC-LABEL: define {{[^@]+}}@kernel4d1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: store i32 0, ptr addrspace(3) @QD1, align 4 ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @barrier() #[[ATTR6]] ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4 ; CGSCC-NEXT: call void @use1(i32 [[V]]) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @QD1, align 4 ; CGSCC-NEXT: ret void ; store i32 0, ptr addrspace(3) @QD1 br i1 %c, label %S, label %L L: call void @barrier(); %v = load i32, ptr addrspace(3) @QD1 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QD1 ret void } define amdgpu_kernel void @kernel4c2(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4c2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse ; CGSCC-LABEL: define {{[^@]+}}@kernel4c2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @barrier(); %v = load i32, ptr addrspace(3) @QC2 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QC2 call void @barrier(); ret void } ; We should not replace the load with undef. define amdgpu_kernel void @kernel4d2(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4d2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse ; CGSCC-LABEL: define {{[^@]+}}@kernel4d2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @barrier(); %v = load i32, ptr addrspace(3) @QD2 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QD2 ret void } define amdgpu_kernel void @kernel4c3(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4c3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @use1(i32 undef) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse ; CGSCC-LABEL: define {{[^@]+}}@kernel4c3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @use1(i32 undef) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @barrier(); %v = load i32, ptr addrspace(3) @QC3 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QC3 call void @barrier(); ret void } ; We should not replace the load with undef. define amdgpu_kernel void @kernel4d3(i1 %c) "kernel" { ; TUNIT: Function Attrs: norecurse ; TUNIT-LABEL: define {{[^@]+}}@kernel4d3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse ; CGSCC-LABEL: define {{[^@]+}}@kernel4d3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @barrier(); %v = load i32, ptr addrspace(3) @QD3 call void @use1(i32 %v) ret void S: store i32 2, ptr addrspace(3) @QD3 ret void } define amdgpu_kernel void @kernel_unknown_and_aligned1(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @barrier() #[[ATTR7]] ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @barrier() #[[ATTR6]] ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @barrier(); %v = load i32, ptr addrspace(3) @UAA1 call void @use1(i32 %v) ret void S: call void @sync(); store i32 2, ptr addrspace(3) @UAA1 call void @barrier(); call void @sync(); ret void } define amdgpu_kernel void @kernel_unknown_and_aligned2(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @barrier() #[[ATTR7]] ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @barrier() #[[ATTR6]] ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @UAA2 call void @use1(i32 %v) ret void S: call void @sync(); store i32 2, ptr addrspace(3) @UAA2 call void @barrier(); call void @sync(); ret void } define amdgpu_kernel void @kernel_unknown_and_aligned3(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: call void @barrier() #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: call void @barrier() #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @UAA3 call void @use1(i32 %v) call void @barrier(); ret void S: call void @sync(); store i32 2, ptr addrspace(3) @UAA3 call void @sync(); ret void } define amdgpu_kernel void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" { ; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; TUNIT: L: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @use1(i32 2) #[[ATTR7]] ; TUNIT-NEXT: ret void ; TUNIT: S: ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: call void @sync() ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[S:%.*]], label [[L:%.*]] ; CGSCC: L: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @use1(i32 2) #[[ATTR6]] ; CGSCC-NEXT: ret void ; CGSCC: S: ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: call void @sync() ; CGSCC-NEXT: ret void ; br i1 %c, label %S, label %L L: call void @sync(); %v = load i32, ptr addrspace(3) @UANA1 call void @use1(i32 %v) ret void S: call void @sync(); store i32 2, ptr addrspace(3) @UANA1 call void @sync(); ret void } declare void @sync() declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier" declare void @use1(i32) nosync norecurse nounwind nocallback declare i32 @__kmpc_target_init(ptr, ptr) nocallback declare void @__kmpc_target_deinit() nocallback declare void @llvm.assume(i1) !llvm.module.flags = !{!0, !1} !0 = !{i32 7, !"openmp", i32 50} !1 = !{i32 7, !"openmp-device", i32 50} ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" } ; TUNIT: attributes #[[ATTR1]] = { "kernel" } ; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" } ; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind } ; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback } ; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } ; TUNIT: attributes #[[ATTR6]] = { nounwind "llvm.assume"="ompx_aligned_barrier" } ; TUNIT: attributes #[[ATTR7]] = { nounwind } ;. ; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" } ; CGSCC: attributes #[[ATTR1]] = { "kernel" } ; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" } ; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind } ; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback } ; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } ; CGSCC: attributes #[[ATTR6]] = { nounwind } ;. ; TUNIT: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. ; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;.