diff options
Diffstat (limited to 'llvm/test/CodeGen')
22 files changed, 1494 insertions, 278 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir index 6362ed6..9381f0f4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir @@ -1,11 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s -# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s ... --- name: fconstant_to_constant_s32 alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -24,16 +25,17 @@ body: | ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 - %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 - %1:_(s64) = G_CONSTANT i64 524 - %2:_(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %3(s32), %2(p0) :: (store (s32)) + %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 + %2:_(s64) = G_CONSTANT i64 524 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %1(s32), %3(p0) :: (store (s32)) RET_ReallyLR ... --- name: fconstant_to_constant_s64 alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -48,7 +50,7 @@ body: | ; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64)) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 - %c:_(s64) = G_FCONSTANT double 0.0 + %c:_(s64) = G_FCONSTANT double 0.000000e+00 G_STORE %c(s64), %ptr(p0) :: (store (s64)) RET_ReallyLR ... @@ -56,6 +58,7 @@ body: | name: no_store_means_no_combine alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -71,7 +74,7 @@ body: | ; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c ; CHECK-NEXT: RET_ReallyLR implicit %add(s64) %v:_(s64) = COPY $x0 - %c:_(s64) = G_FCONSTANT double 0.0 + %c:_(s64) = G_FCONSTANT double 0.000000e+00 %add:_(s64) = G_FADD %v, %c - RET_ReallyLR implicit %add + RET_ReallyLR implicit %add(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir index c301e76..c00ce22 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -48,8 +48,9 @@ body: | ; CHECK-NEXT: $w0 = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: $x0 = COPY [[C1]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C2]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) %0:_(s32) = G_FCONSTANT float 1.0 $w0 = COPY %0 %1:_(s64) = G_FCONSTANT double 2.0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir index ddf219d..c6df345 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir @@ -8,7 +8,7 @@ tracksRegLiveness: true body: | bb.0: ; NO-FP16-LABEL: name: fp16 - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000 ; NO-FP16-NEXT: $h0 = COPY %cst(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; @@ -26,7 +26,7 @@ tracksRegLiveness: true body: | bb.0: ; NO-FP16-LABEL: name: fp16_non_zero - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000 ; NO-FP16-NEXT: $h0 = COPY %cst(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; @@ -44,7 +44,7 @@ tracksRegLiveness: true body: | bb.1.entry: ; NO-FP16-LABEL: name: nan - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01 ; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16) ; NO-FP16-NEXT: $w0 = COPY %ext(s32) ; NO-FP16-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index cb5df07..322a96a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) { ; ; GISEL-LABEL: postidx32_shalf: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #0 ; =0x0 -; GISEL-NEXT: ldr h1, [x0], #4 -; GISEL-NEXT: fmov s2, w8 +; GISEL-NEXT: movi d1, #0000000000000000 +; GISEL-NEXT: ldr h2, [x0], #4 ; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0 ; GISEL-NEXT: fmov w9, s0 -; GISEL-NEXT: fcvt s3, h1 -; GISEL-NEXT: fmov w8, s1 -; GISEL-NEXT: fcvt s2, h2 -; GISEL-NEXT: fcmp s3, s2 +; GISEL-NEXT: fcvt s3, h2 +; GISEL-NEXT: fmov w8, s2 +; GISEL-NEXT: fcvt s1, h1 +; GISEL-NEXT: fcmp s3, s1 ; GISEL-NEXT: csel w8, w8, w9, mi ; GISEL-NEXT: strh w8, [x1] ; GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index adc536d..b234ef7 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) { ; ; CHECK-CVT-GI-LABEL: test_fccmp: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500 -; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800 +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0 ; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0 ; CHECK-CVT-GI-NEXT: fcvt s2, h0 -; CHECK-CVT-GI-NEXT: fmov s1, w8 -; CHECK-CVT-GI-NEXT: fmov s3, w9 -; CHECK-CVT-GI-NEXT: fmov w9, s0 -; CHECK-CVT-GI-NEXT: fcvt s1, h1 -; CHECK-CVT-GI-NEXT: fcvt s3, h3 -; CHECK-CVT-GI-NEXT: fcmp s2, s1 -; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi -; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt +; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0] +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1 +; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1] +; CHECK-CVT-GI-NEXT: fmov w8, s0 +; CHECK-CVT-GI-NEXT: fcvt s3, h1 +; CHECK-CVT-GI-NEXT: fmov w9, s1 +; CHECK-CVT-GI-NEXT: fcvt s4, h4 +; CHECK-CVT-GI-NEXT: fcmp s2, s3 +; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi +; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt ; CHECK-CVT-GI-NEXT: strh w8, [x0] ; CHECK-CVT-GI-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index 51aad4fe..7409bfb 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI45_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI46_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI47_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI55_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI56_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI57_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI58_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI66_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI67_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI68_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI69_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll index 91bb8ac..9eacb61 100644 --- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -12,22 +12,14 @@ ; => ; recip = 1.0 / D; a * recip; b * recip; c * recip; define void @three_fdiv_float(float %D, float %a, float %b, float %c) { -; CHECK-SD-LABEL: three_fdiv_float: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov s4, #1.00000000 -; CHECK-SD-NEXT: fdiv s4, s4, s0 -; CHECK-SD-NEXT: fmul s0, s1, s4 -; CHECK-SD-NEXT: fmul s1, s2, s4 -; CHECK-SD-NEXT: fmul s2, s3, s4 -; CHECK-SD-NEXT: b foo_3f -; -; CHECK-GI-LABEL: three_fdiv_float: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv s4, s1, s0 -; CHECK-GI-NEXT: fdiv s1, s2, s0 -; CHECK-GI-NEXT: fdiv s2, s3, s0 -; CHECK-GI-NEXT: fmov s0, s4 -; CHECK-GI-NEXT: b foo_3f +; CHECK-LABEL: three_fdiv_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s4, #1.00000000 +; CHECK-NEXT: fdiv s4, s4, s0 +; CHECK-NEXT: fmul s0, s1, s4 +; CHECK-NEXT: fmul s1, s2, s4 +; CHECK-NEXT: fmul s2, s3, s4 +; CHECK-NEXT: b foo_3f %div = fdiv arcp float %a, %D %div1 = fdiv arcp float %b, %D %div2 = fdiv arcp float %c, %D @@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) { } define void @three_fdiv_double(double %D, double %a, double %b, double %c) { -; CHECK-SD-LABEL: three_fdiv_double: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov d4, #1.00000000 -; CHECK-SD-NEXT: fdiv d4, d4, d0 -; CHECK-SD-NEXT: fmul d0, d1, d4 -; CHECK-SD-NEXT: fmul d1, d2, d4 -; CHECK-SD-NEXT: fmul d2, d3, d4 -; CHECK-SD-NEXT: b foo_3d -; -; CHECK-GI-LABEL: three_fdiv_double: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv d4, d1, d0 -; CHECK-GI-NEXT: fdiv d1, d2, d0 -; CHECK-GI-NEXT: fdiv d2, d3, d0 -; CHECK-GI-NEXT: fmov d0, d4 -; CHECK-GI-NEXT: b foo_3d +; CHECK-LABEL: three_fdiv_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d4, #1.00000000 +; CHECK-NEXT: fdiv d4, d4, d0 +; CHECK-NEXT: fmul d0, d1, d4 +; CHECK-NEXT: fmul d1, d2, d4 +; CHECK-NEXT: fmul d2, d3, d4 +; CHECK-NEXT: b foo_3d %div = fdiv arcp double %a, %D %div1 = fdiv arcp double %b, %D %div2 = fdiv arcp double %c, %D @@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) { } define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { -; CHECK-SD-LABEL: three_fdiv_4xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov v4.4s, #1.00000000 -; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s -; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s -; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s -; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s -; CHECK-SD-NEXT: b foo_3_4xf -; -; CHECK-GI-LABEL: three_fdiv_4xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv v4.4s, v1.4s, v0.4s -; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v0.4s -; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s -; CHECK-GI-NEXT: mov v0.16b, v4.16b -; CHECK-GI-NEXT: b foo_3_4xf +; CHECK-LABEL: three_fdiv_4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.4s, #1.00000000 +; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-NEXT: b foo_3_4xf %div = fdiv arcp <4 x float> %a, %D %div1 = fdiv arcp <4 x float> %b, %D %div2 = fdiv arcp <4 x float> %c, %D @@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, } define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) { -; CHECK-SD-LABEL: three_fdiv_2xdouble: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov v4.2d, #1.00000000 -; CHECK-SD-NEXT: fdiv v4.2d, v4.2d, v0.2d -; CHECK-SD-NEXT: fmul v0.2d, v1.2d, v4.2d -; CHECK-SD-NEXT: fmul v1.2d, v2.2d, v4.2d -; CHECK-SD-NEXT: fmul v2.2d, v3.2d, v4.2d -; CHECK-SD-NEXT: b foo_3_2xd -; -; CHECK-GI-LABEL: three_fdiv_2xdouble: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv v4.2d, v1.2d, v0.2d -; CHECK-GI-NEXT: fdiv v1.2d, v2.2d, v0.2d -; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d -; CHECK-GI-NEXT: mov v0.16b, v4.16b -; CHECK-GI-NEXT: b foo_3_2xd +; CHECK-LABEL: three_fdiv_2xdouble: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.2d, #1.00000000 +; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d +; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d +; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d +; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d +; CHECK-NEXT: b foo_3_2xd %div = fdiv arcp <2 x double> %a, %D %div1 = fdiv arcp <2 x double> %b, %D %div2 = fdiv arcp <2 x double> %c, %D @@ -135,26 +103,47 @@ define void @two_fdiv_double(double %D, double %a, double %b) { ret void } -define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { -; CHECK-SD-LABEL: splat_three_fdiv_4xfloat: +define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 { +; CHECK-SD-LABEL: four_fdiv_multi_float: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: fmov v4.4s, #1.00000000 -; CHECK-SD-NEXT: dup v0.4s, v0.s[0] -; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s -; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s -; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s -; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s -; CHECK-SD-NEXT: b foo_3_4xf +; CHECK-SD-NEXT: fmov s4, #1.00000000 +; CHECK-SD-NEXT: fdiv s5, s4, s0 +; CHECK-SD-NEXT: fmul s4, s1, s5 +; CHECK-SD-NEXT: fmul s1, s2, s5 +; CHECK-SD-NEXT: fmul s2, s3, s5 +; CHECK-SD-NEXT: fmul s3, s0, s5 +; CHECK-SD-NEXT: fmov s0, s4 +; CHECK-SD-NEXT: b foo_4f ; -; CHECK-GI-LABEL: splat_three_fdiv_4xfloat: +; CHECK-GI-LABEL: four_fdiv_multi_float: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: dup v4.4s, v0.s[0] -; CHECK-GI-NEXT: fdiv v0.4s, v1.4s, v4.4s -; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v4.4s -; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v4.4s -; CHECK-GI-NEXT: b foo_3_4xf +; CHECK-GI-NEXT: fmov s4, #1.00000000 +; CHECK-GI-NEXT: fdiv s5, s4, s0 +; CHECK-GI-NEXT: fdiv s4, s0, s0 +; CHECK-GI-NEXT: fmul s0, s1, s5 +; CHECK-GI-NEXT: fmul s1, s2, s5 +; CHECK-GI-NEXT: fmul s2, s3, s5 +; CHECK-GI-NEXT: fmov s3, s4 +; CHECK-GI-NEXT: b foo_4f + %div = fdiv arcp float %a, %D + %div1 = fdiv arcp float %b, %D + %div2 = fdiv arcp float %c, %D + %div3 = fdiv arcp float %D, %D + tail call void @foo_4f(float %div, float %div1, float %div2, float %div3) + ret void +} + +define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: splat_three_fdiv_4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: fmov v4.4s, #1.00000000 +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-NEXT: b foo_3_4xf %D.ins = insertelement <4 x float> poison, float %D, i64 0 %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer %div = fdiv arcp <4 x float> %a, %splat @@ -256,6 +245,7 @@ entry: } declare void @foo_3f(float, float, float) +declare void @foo_4f(float, float, float, float) declare void @foo_3d(double, double, double) declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>) declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll index 594a3ab..be07978 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll @@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_v2HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 @@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_v3HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI3_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI3_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI4_0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI4_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_2H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI7_0 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: fmov s2, w8 +; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI7_0] ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll index 18f463c..40925da 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) { ; ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop: ; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NOFP16-NEXT: mov x8, xzr -; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop ; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8] -; CHECK-GI-NOFP16-NEXT: fmov s1, w9 +; CHECK-GI-NOFP16-NEXT: ldr d1, [x0, x8] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 ; CHECK-GI-NOFP16-NEXT: add x8, x8, #8 ; CHECK-GI-NOFP16-NEXT: cmp w8, #56 -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fmov w9, s0 ; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1 ; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit -; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop: @@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) { ; ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop: ; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NOFP16-NEXT: mov x8, xzr -; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NOFP16-NEXT: .LBB14_1: // %loop ; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8] +; CHECK-GI-NOFP16-NEXT: ldr q1, [x0, x8] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 ; CHECK-GI-NOFP16-NEXT: add x8, x8, #8 ; CHECK-GI-NOFP16-NEXT: cmp w8, #56 -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fmov s1, w9 +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fmov w9, s0 ; CHECK-GI-NOFP16-NEXT: b.ne .LBB14_1 ; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit -; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll index e1b2170..c10d6e9 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll @@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_2H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI5_0 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: fmov s2, w8 +; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0] ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index 664dfa2..2ad6e68 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -1,103 +1,166 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s +; Shrink result attribute list by preventing use of most attributes. +define internal void @use_most() { +; CHECK-LABEL: define internal void @use_most( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [256 x i8], align 1, addrspace(5) +; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.cluster.id.x() +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.cluster.id.y() +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.cluster.id.z() +; CHECK-NEXT: [[TMP7:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.dispatch.id() +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[IMPLICIT_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr [[ALLOCA_CAST]], ptr addrspace(4) [[IMPLICIT_ARG_PTR]], i64 256, i1 false) +; CHECK-NEXT: ret void +; + %alloca = alloca [256 x i8], addrspace(5) + %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr + call i32 @llvm.amdgcn.workitem.id.x() + call i32 @llvm.amdgcn.workitem.id.y() + call i32 @llvm.amdgcn.workitem.id.z() + call i32 @llvm.amdgcn.workgroup.id.x() + call i32 @llvm.amdgcn.workgroup.id.y() + call i32 @llvm.amdgcn.workgroup.id.z() + call i32 @llvm.amdgcn.cluster.id.x() + call i32 @llvm.amdgcn.cluster.id.y() + call i32 @llvm.amdgcn.cluster.id.z() + call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + call ptr addrspace(4) @llvm.amdgcn.queue.ptr() + call i64 @llvm.amdgcn.dispatch.id() + call i32 @llvm.amdgcn.lds.kernel.id() + %implicit.arg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + call void @llvm.memcpy.p0.p4(ptr %alloca.cast, ptr addrspace(4) %implicit.arg.ptr, i64 256, i1 false) + ret void +} + define amdgpu_kernel void @kernel_uses_asm_virtreg() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg( -; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void asm sideeffect "; use $0", "a"(i32 poison) + call void @use_most() ret void } define amdgpu_kernel void @kernel_uses_asm_virtreg_def() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: [[DEF:%.*]] = call i32 asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; %def = call i32 asm sideeffect "; def $0", "=a"() + call void @use_most() ret void } define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; %def = call i64 asm sideeffect "; def $0", "={a[0:1]}"() + call void @use_most() ret void } define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison) + call void @use_most() ret void } define amdgpu_kernel void @kernel_uses_non_agpr_asm() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void asm sideeffect "; use $0", "v"(i32 poison) + call void @use_most() ret void } define amdgpu_kernel void @kernel_uses_asm_physreg() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void asm sideeffect "; use $0", "{a0}"(i32 poison) + call void @use_most() ret void } define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison) + call void @use_most() ret void } define void @func_uses_asm_virtreg_agpr() { ; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void asm sideeffect "; use $0", "a"(i32 poison) + call void @use_most() ret void } define void @func_uses_asm_physreg_agpr() { ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void asm sideeffect "; use $0", "{a0}"(i32 poison) + call void @use_most() ret void } define void @func_uses_asm_physreg_agpr_tuple() { ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison) + call void @use_most() ret void } @@ -105,99 +168,119 @@ declare void @unknown() define amdgpu_kernel void @kernel_calls_extern() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern( -; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void @unknown() + call void @use_most() ret void } define amdgpu_kernel void @kernel_calls_extern_marked_callsite() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite( -; CHECK-SAME: ) #[[ATTR2]] { -; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]] +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void @unknown() #0 + call void @use_most() ret void } define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect( -; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] { +; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: call void [[INDIRECT]]() +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void %indirect() + call void @use_most() ret void } define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite( -; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]] +; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]] +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void %indirect() #0 + call void @use_most() ret void } define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void @func_uses_asm_physreg_agpr() +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void @func_uses_asm_physreg_agpr() + call void @use_most() ret void } define void @empty() { ; CHECK-LABEL: define void @empty( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; + call void @use_most() ret void } define void @also_empty() { ; CHECK-LABEL: define void @also_empty( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; + call void @use_most() ret void } define amdgpu_kernel void @kernel_calls_empty() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty( -; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: call void @empty() +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void @empty() + call void @use_most() ret void } define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr( -; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK-NEXT: call void @empty() ; CHECK-NEXT: call void @func_uses_asm_physreg_agpr() +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void @empty() call void @func_uses_asm_physreg_agpr() + call void @use_most() ret void } define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic( -; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false) +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false) + call void @use_most() ret void } @@ -205,31 +288,35 @@ declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float> define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32( -; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0) ; CHECK-NEXT: store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128 +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; %result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0) store <32 x float> %result, ptr addrspace(1) %out + call void @use_most() ret void } define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x( -; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4 +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; %result = call i32 @llvm.amdgcn.workitem.id.x() store i32 %result, ptr addrspace(1) %out + call void @use_most() ret void } define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr( -; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] @@ -244,21 +331,476 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { ; CHECK: 5: ; CHECK-NEXT: unreachable ; CHECK: 6: +; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; %fptr = select i1 %cond, ptr @empty, ptr @also_empty call void %fptr() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(ptr poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call ptr asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <2 x ptr> asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a4}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a[10:13]}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber_oob() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a256}"() + call void @use_most() ret void } +define amdgpu_kernel void @kernel_uses_asm_clobber_max() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a255}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg_oob() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "{a256}"(i32 poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <32 x i32> asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(<32 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @vreg_use_exceeds_register_file() { +; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(<257 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @vreg_def_exceeds_register_file() { +; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <257 x i32> asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @multiple() { +; CHECK-LABEL: define amdgpu_kernel void @multiple( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999) + call void @use_most() + ret void +} + +define amdgpu_kernel void @earlyclobber_0() { +; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0) + call void @use_most() + ret void +} + +define amdgpu_kernel void @earlyclobber_1() { +; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1)) + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 poison, <8 x i32> poison, <16 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call {i32, <8 x i32>, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call {i32, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,a"(<8 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() { +; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "{a[1:4]},a"(<4 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() { +; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,{a[0:3]}"(<4 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_raises_limit() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,{a[5:8]}"(<4 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +; FIXME: This should require 9. We cannot allocate an a128 at a0. +define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_tuple_alignment_raises_limit( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,{a[1:4]}"(<4 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @align3_virtreg() { +; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <3 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @align3_align4_virtreg() { +; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @align2_align4_virtreg() { +; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,a"(<2 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_a55() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55( +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META0:![0-9]+]], i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !0, i32 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_v55() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55( +; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META1:![0-9]+]], i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !1, i32 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_a55_57() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK-NEXT: call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !2, i96 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) +; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: ret void +; + %reg = call i32 @llvm.read_register.i64(metadata !0) + store i32 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]]) +; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: ret void +; + %reg = call i32 @llvm.read_volatile_register.i64(metadata !0) + store i32 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]]) +; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: ret void +; + %reg = call i128 @llvm.read_register.i64(metadata !3) + store i128 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !4, i32 0) + ret void +} attributes #0 = { "amdgpu-agpr-alloc"="0" } + +!0 = !{!"a55"} +!1 = !{!"v55"} +!2 = !{!"a[55:57]"} +!3 = !{!"a[56:59]"} +!4 = !{!"a256"} + +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" } +; CHECK: [[META0]] = !{!"a55"} +; CHECK: [[META1]] = !{!"v55"} +; CHECK: [[META2]] = !{!"a[55:57]"} +; CHECK: [[META3]] = !{!"a[56:59]"} +; CHECK: [[META4]] = !{!"a256"} ;. diff --git a/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll new file mode 100644 index 0000000..1db8391 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s + +@page1 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8 +@page2 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8 + +define dso_local void @test_memset() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memset() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16) to ptr), i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16), i8 0, i64 16, i1 false) + ret void +} + +declare void @llvm.memset.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg) + +define dso_local void @test_memcpy() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memcpy() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8) to ptr), i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8), i64 16, i1 false) + ret void +} + +declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg) + +define dso_local void @test_memmove() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memmove() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 16) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 16), ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), i64 16, i1 false) + ret void +} + +declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) writeonly captures(none), ptr addrspace(1) readonly captures(none), i64, i1 immarg) + +define dso_local void @test_memset_inline() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memset_inline() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memset.inline.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16) to ptr), i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.inline.p1.i64(ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16), i8 0, i64 16, i1 false) + ret void +} + +declare void @llvm.memset.inline.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg) + +define dso_local void @test_memcpy_inline() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memcpy_inline() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8) to ptr), i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8), i64 16, i1 false) + ret void +} + +declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg) diff --git a/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll new file mode 100644 index 0000000..62fa2e4 --- /dev/null +++ b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s + +@page1 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8 +@page2 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8 + +define dso_local void @test_memset() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memset() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(16) @page1, i8 0, i64 16, i1 false) + ret void +} + +declare void @llvm.memset.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg) + +define dso_local void @test_memcpy() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memcpy() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page2 to ptr), ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(16) @page2, ptr addrspace(1) noundef align 8 dereferenceable(16) @page1, i64 16, i1 false) + ret void +} + +declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg) + +define dso_local void @test_memset_inline() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memset_inline() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memset.inline.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.inline.p1.i64(ptr addrspace(1) align 8 @page1, i8 0, i64 16, i1 false) + ret void +} + +declare void @llvm.memset.inline.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg) + +define dso_local void @test_memcpy_inline() local_unnamed_addr { +; CHECK-LABEL: define dso_local void @test_memcpy_inline() local_unnamed_addr { +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page2 to ptr), ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i64 16, i1 false) +; CHECK-NEXT: ret void +; + tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) align 8 @page2, ptr addrspace(1) align 8 @page1, i64 16, i1 false) + ret void +} + +declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll index 4ad2d2c..4914357 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll @@ -23,6 +23,16 @@ ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+experimental-zalasr -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s + +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+experimental-zalasr -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s + define float @atomic_load_f32_unordered(ptr %a) nounwind { ; RV32I-LABEL: atomic_load_f32_unordered: @@ -171,6 +181,30 @@ define float @atomic_load_f32_acquire(ptr %a) nounwind { ; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_load_f32_acquire: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: lw.aq a0, (a0) +; RV32IA-ZALASR-WMO-NEXT: fmv.w.x fa0, a0 +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_load_f32_acquire: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: lw a0, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: fmv.w.x fa0, a0 +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_f32_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: lw.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: fmv.w.x fa0, a0 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_f32_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: fmv.w.x fa0, a0 +; RV64IA-ZALASR-TSO-NEXT: ret %1 = load atomic float, ptr %a acquire, align 4 ret float %1 } @@ -256,6 +290,18 @@ define float @atomic_load_f32_seq_cst(ptr %a) nounwind { ; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_load_f32_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: lw.aq a0, (a0) +; RV32IA-ZALASR-NEXT: fmv.w.x fa0, a0 +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_f32_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: lw.aq a0, (a0) +; RV64IA-ZALASR-NEXT: fmv.w.x fa0, a0 +; RV64IA-ZALASR-NEXT: ret %1 = load atomic float, ptr %a seq_cst, align 4 ret float %1 } @@ -414,6 +460,18 @@ define double @atomic_load_f64_acquire(ptr %a) nounwind { ; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_load_f64_acquire: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: ld.aq a0, (a0) +; RV64IA-ZALASR-WMO-NEXT: fmv.d.x fa0, a0 +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_load_f64_acquire: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: ld a0, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: fmv.d.x fa0, a0 +; RV64IA-ZALASR-TSO-NEXT: ret %1 = load atomic double, ptr %a acquire, align 8 ret double %1 } @@ -484,6 +542,12 @@ define double @atomic_load_f64_seq_cst(ptr %a) nounwind { ; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_load_f64_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: ld.aq a0, (a0) +; RV64IA-ZALASR-NEXT: fmv.d.x fa0, a0 +; RV64IA-ZALASR-NEXT: ret %1 = load atomic double, ptr %a seq_cst, align 8 ret double %1 } @@ -635,6 +699,30 @@ define void @atomic_store_f32_release(ptr %a, float %b) nounwind { ; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 ; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-WMO-LABEL: atomic_store_f32_release: +; RV32IA-ZALASR-WMO: # %bb.0: +; RV32IA-ZALASR-WMO-NEXT: fmv.x.w a1, fa0 +; RV32IA-ZALASR-WMO-NEXT: sw.rl a1, (a0) +; RV32IA-ZALASR-WMO-NEXT: ret +; +; RV32IA-ZALASR-TSO-LABEL: atomic_store_f32_release: +; RV32IA-ZALASR-TSO: # %bb.0: +; RV32IA-ZALASR-TSO-NEXT: fmv.x.w a1, fa0 +; RV32IA-ZALASR-TSO-NEXT: sw a1, 0(a0) +; RV32IA-ZALASR-TSO-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_store_f32_release: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: fmv.x.w a1, fa0 +; RV64IA-ZALASR-WMO-NEXT: sw.rl a1, (a0) +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_store_f32_release: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: fmv.x.w a1, fa0 +; RV64IA-ZALASR-TSO-NEXT: sw a1, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: ret store atomic float %b, ptr %a release, align 4 ret void } @@ -718,6 +806,18 @@ define void @atomic_store_f32_seq_cst(ptr %a, float %b) nounwind { ; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-ZALASR-LABEL: atomic_store_f32_seq_cst: +; RV32IA-ZALASR: # %bb.0: +; RV32IA-ZALASR-NEXT: fmv.x.w a1, fa0 +; RV32IA-ZALASR-NEXT: sw.rl a1, (a0) +; RV32IA-ZALASR-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_store_f32_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: fmv.x.w a1, fa0 +; RV64IA-ZALASR-NEXT: sw.rl a1, (a0) +; RV64IA-ZALASR-NEXT: ret store atomic float %b, ptr %a seq_cst, align 4 ret void } @@ -876,6 +976,18 @@ define void @atomic_store_f64_release(ptr %a, double %b) nounwind { ; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 ; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-ZALASR-WMO-LABEL: atomic_store_f64_release: +; RV64IA-ZALASR-WMO: # %bb.0: +; RV64IA-ZALASR-WMO-NEXT: fmv.x.d a1, fa0 +; RV64IA-ZALASR-WMO-NEXT: sd.rl a1, (a0) +; RV64IA-ZALASR-WMO-NEXT: ret +; +; RV64IA-ZALASR-TSO-LABEL: atomic_store_f64_release: +; RV64IA-ZALASR-TSO: # %bb.0: +; RV64IA-ZALASR-TSO-NEXT: fmv.x.d a1, fa0 +; RV64IA-ZALASR-TSO-NEXT: sd a1, 0(a0) +; RV64IA-ZALASR-TSO-NEXT: ret store atomic double %b, ptr %a release, align 8 ret void } @@ -945,6 +1057,12 @@ define void @atomic_store_f64_seq_cst(ptr %a, double %b) nounwind { ; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) ; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw ; RV64IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-ZALASR-LABEL: atomic_store_f64_seq_cst: +; RV64IA-ZALASR: # %bb.0: +; RV64IA-ZALASR-NEXT: fmv.x.d a1, fa0 +; RV64IA-ZALASR-NEXT: sd.rl a1, (a0) +; RV64IA-ZALASR-NEXT: ret store atomic double %b, ptr %a seq_cst, align 8 ret void } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir index 74249c1..e2d3bff 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir @@ -17,7 +17,7 @@ body: | ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8)) + ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8)) ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]] ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -42,7 +42,7 @@ body: | ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16)) + ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16)) ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]] ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -67,7 +67,7 @@ body: | ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32)) ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -92,7 +92,7 @@ body: | ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32)) ; RV32IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_W]], 1 ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir index a2f7e30..ab537ea 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir @@ -17,7 +17,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8)) ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]] ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -42,7 +42,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16)) ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]] ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -67,7 +67,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32)) ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -92,7 +92,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64)) ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]] ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -116,7 +116,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64)) ; RV64IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_D_RV64_]], 1 ; RV64IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir index f7fdc33..e547972 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir @@ -15,7 +15,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -38,7 +38,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16)) + ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -61,7 +61,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32)) + ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -86,7 +86,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -113,7 +113,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16)) + ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -140,7 +140,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir index 178586c..f34826c 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir @@ -15,7 +15,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -38,7 +38,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16)) + ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -61,7 +61,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32)) + ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -84,7 +84,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY]], [[COPY1]] :: (load store monotonic (s64)) + ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY1]], [[COPY]] :: (load store monotonic (s64)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_D]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -109,7 +109,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -136,7 +136,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16)) + ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -163,7 +163,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[SUB]] :: (load store monotonic (s32)) + ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[SUB]], [[COPY]] :: (load store monotonic (s32)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -190,7 +190,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index ead255b..f3529b1 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -443,7 +443,7 @@ ; RV32ZVFBFWMA: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvfbfwma1p0_zvl32b1p0" ; RV32ZVFOFP8MIN: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfofp8min0p2_zvl32b1p0" ; RV32ZACAS: .attribute 5, "rv32i2p1_zaamo1p0_zacas1p0" -; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p1" +; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p9" ; RV32ZAMA16B: .attribute 5, "rv32i2p1_zama16b1p0" ; RV32ZICFILP: .attribute 5, "rv32i2p1_zicfilp1p0_zicsr2p0" ; RV32ZABHA: .attribute 5, "rv32i2p1_zaamo1p0_zabha1p0" @@ -590,8 +590,8 @@ ; RV64ZVFBFWMA: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvfbfwma1p0_zvl32b1p0" ; RV64ZVFOFP8MIN: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfofp8min0p2_zvl32b1p0" ; RV64ZACAS: .attribute 5, "rv64i2p1_zaamo1p0_zacas1p0" -; RV64ZALASR: .attribute 5, "rv64i2p1_zalasr0p1" -; RV64ZALASRA: .attribute 5, "rv64i2p1_a2p1_zaamo1p0_zalasr0p1_zalrsc1p0" +; RV64ZALASR: .attribute 5, "rv64i2p1_zalasr0p9" +; RV64ZALASRA: .attribute 5, "rv64i2p1_a2p1_zaamo1p0_zalasr0p9_zalrsc1p0" ; RV64ZICFILP: .attribute 5, "rv64i2p1_zicfilp1p0_zicsr2p0" ; RV64ZABHA: .attribute 5, "rv64i2p1_zaamo1p0_zabha1p0" ; RV64ZVBC32E: .attribute 5, "rv64i2p1_zicsr2p0_zvbc32e0p7_zve32x1p0_zvl32b1p0" diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll index 380a4a0..d1f1c46 100644 --- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll @@ -5,7 +5,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i8: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: mov 3, %o3 ; CHECK-NEXT: andn %o3, %o0, %o0 @@ -36,7 +36,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o4, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst @@ -47,7 +47,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i16: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: and %o0, 3, %o0 ; CHECK-NEXT: xor %o0, 2, %o0 @@ -79,7 +79,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o5, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst @@ -90,7 +90,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i32: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: ld [%o0], %o2 ; CHECK-NEXT: .LBB2_1: ! %atomicrmw.start ; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 @@ -106,7 +106,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-NEXT: bne %icc, .LBB2_1 ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: mov %o2, %o0 %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst @@ -160,7 +160,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i8: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: mov 3, %o3 ; CHECK-NEXT: andn %o3, %o0, %o0 @@ -193,7 +193,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o5, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst @@ -204,7 +204,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i16: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: and %o0, 3, %o0 ; CHECK-NEXT: xor %o0, 2, %o0 @@ -238,7 +238,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %g2, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst @@ -249,7 +249,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i32: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: ld [%o0], %o2 ; CHECK-NEXT: .LBB6_1: ! %atomicrmw.start ; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 @@ -267,7 +267,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-NEXT: bne %icc, .LBB6_1 ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: mov %o2, %o0 %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll new file mode 100644 index 0000000..7c13ac2 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll @@ -0,0 +1,446 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32 +; RUN: llc < %s -mtriple=sparc -mcpu=leon4 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-LEON4 +; RUN: llc < %s -mtriple=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-V9 +; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC64 + +define i32 @load_acq(ptr %0) nounwind { +; SPARC32-LABEL: load_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_load_4 +; SPARC32-NEXT: mov 2, %o1 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: load_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: ld [%o0], %o0 +; +; SPARC32-V9-LABEL: load_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: ld [%o0], %o0 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: load_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: ld [%o0], %o0 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = load atomic i32, ptr %0 acquire, align 4 + ret i32 %2 +} + +define i32 @load_sc(ptr %0) nounwind { +; SPARC32-LABEL: load_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_load_4 +; SPARC32-NEXT: mov 5, %o1 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: load_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: ld [%o0], %o0 +; +; SPARC32-V9-LABEL: load_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: ld [%o0], %o0 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: load_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: ld [%o0], %o0 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = load atomic i32, ptr %0 seq_cst, align 4 + ret i32 %2 +} + +define void @store_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: store_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_store_4 +; SPARC32-NEXT: mov 3, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: store_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: st %o1, [%o0] +; +; SPARC32-V9-LABEL: store_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: st %o1, [%o0] +; +; SPARC64-LABEL: store_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: st %o1, [%o0] + store atomic i32 %1, ptr %0 release, align 4 + ret void +} + +define void @store_sc(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: store_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_store_4 +; SPARC32-NEXT: mov 5, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: store_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: st %o1, [%o0] +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: ldstub [%sp+-1], %g0 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: nop +; +; SPARC32-V9-LABEL: store_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: st %o1, [%o0] +; SPARC32-V9-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: store_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: st %o1, [%o0] +; SPARC64-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + store atomic i32 %1, ptr %0 seq_cst, align 4 + ret void +} + +define i32 @rmw_acq(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 2, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 acquire, align 4 + ret i32 %3 +} + +define i32 @rmw_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 3, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 release, align 4 + ret i32 %3 +} + +define i32 @rmw_acq_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_acq_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 4, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_acq_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_acq_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_acq_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 acq_rel, align 4 + ret i32 %3 +} + +define i32 @rmw_sc(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 5, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 seq_cst, align 4 + ret i32 %3 +} + +define i32 @cas_acq(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 2, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %o3, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 acquire acquire, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_rel(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 3, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %g0, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 release monotonic, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_acq_rel(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_acq_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 4, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov 2, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_acq_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_acq_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_acq_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 acq_rel acquire, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_sc(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 5, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %o3, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 seq_cst seq_cst, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll index 172ff53..e562c4a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll @@ -132,4 +132,17 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) { ret i32 %conv3 } +; Regression test for the intrinsic pattern matcher with nullary intrinsics +define i64 @other_intrinsic() #0 { +; CHECK-LABEL: other_intrinsic: +; CHECK: .functype other_intrinsic () -> (i64) +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get $push0=, __tls_align +; CHECK-NEXT: return $pop0 +entry: + %0 = call i64 @llvm.wasm.tls.align.i64() + ret i64 %0 +} + +attributes #0 = { "target-features"="+atomics" } |