diff options
Diffstat (limited to 'llvm/test/CodeGen/AArch64')
-rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir | 50 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-vadd.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/f16-instructions.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/fcvt-fixed.ll | 112 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/fdiv-combine.ll | 152 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/vecreduce-fadd.ll | 42 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll | 12 |
12 files changed, 247 insertions, 212 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir index 6362ed6..9381f0f4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir @@ -1,11 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s -# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s ... --- name: fconstant_to_constant_s32 alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -24,16 +25,17 @@ body: | ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 - %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 - %1:_(s64) = G_CONSTANT i64 524 - %2:_(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %3(s32), %2(p0) :: (store (s32)) + %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 + %2:_(s64) = G_CONSTANT i64 524 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %1(s32), %3(p0) :: (store (s32)) RET_ReallyLR ... --- name: fconstant_to_constant_s64 alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -48,7 +50,7 @@ body: | ; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64)) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 - %c:_(s64) = G_FCONSTANT double 0.0 + %c:_(s64) = G_FCONSTANT double 0.000000e+00 G_STORE %c(s64), %ptr(p0) :: (store (s64)) RET_ReallyLR ... @@ -56,6 +58,7 @@ body: | name: no_store_means_no_combine alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -71,7 +74,7 @@ body: | ; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c ; CHECK-NEXT: RET_ReallyLR implicit %add(s64) %v:_(s64) = COPY $x0 - %c:_(s64) = G_FCONSTANT double 0.0 + %c:_(s64) = G_FCONSTANT double 0.000000e+00 %add:_(s64) = G_FADD %v, %c - RET_ReallyLR implicit %add + RET_ReallyLR implicit %add(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir index c301e76..c00ce22 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -48,8 +48,9 @@ body: | ; CHECK-NEXT: $w0 = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: $x0 = COPY [[C1]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C2]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) %0:_(s32) = G_FCONSTANT float 1.0 $w0 = COPY %0 %1:_(s64) = G_FCONSTANT double 2.0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir index ddf219d..c6df345 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir @@ -8,7 +8,7 @@ tracksRegLiveness: true body: | bb.0: ; NO-FP16-LABEL: name: fp16 - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000 ; NO-FP16-NEXT: $h0 = COPY %cst(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; @@ -26,7 +26,7 @@ tracksRegLiveness: true body: | bb.0: ; NO-FP16-LABEL: name: fp16_non_zero - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000 ; NO-FP16-NEXT: $h0 = COPY %cst(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; @@ -44,7 +44,7 @@ tracksRegLiveness: true body: | bb.1.entry: ; NO-FP16-LABEL: name: nan - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01 ; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16) ; NO-FP16-NEXT: $w0 = COPY %ext(s32) ; NO-FP16-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index cb5df07..322a96a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) { ; ; GISEL-LABEL: postidx32_shalf: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #0 ; =0x0 -; GISEL-NEXT: ldr h1, [x0], #4 -; GISEL-NEXT: fmov s2, w8 +; GISEL-NEXT: movi d1, #0000000000000000 +; GISEL-NEXT: ldr h2, [x0], #4 ; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0 ; GISEL-NEXT: fmov w9, s0 -; GISEL-NEXT: fcvt s3, h1 -; GISEL-NEXT: fmov w8, s1 -; GISEL-NEXT: fcvt s2, h2 -; GISEL-NEXT: fcmp s3, s2 +; GISEL-NEXT: fcvt s3, h2 +; GISEL-NEXT: fmov w8, s2 +; GISEL-NEXT: fcvt s1, h1 +; GISEL-NEXT: fcmp s3, s1 ; GISEL-NEXT: csel w8, w8, w9, mi ; GISEL-NEXT: strh w8, [x1] ; GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir new file mode 100644 index 0000000..074f75a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: saddlp1d +legalized: true +regBankSelected: false +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0 + + ; CHECK-LABEL: name: saddlp1d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) + ; CHECK-NEXT: [[SADDLP:%[0-9]+]]:fpr(s64) = G_SADDLP [[LOAD]] + ; CHECK-NEXT: $d0 = COPY [[SADDLP]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(p0) = COPY $x0 + %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) + %2:_(s64) = G_SADDLP %1 + $d0 = COPY %2(s64) + RET_ReallyLR implicit $d0 +... +--- +name: uaddlp1d +legalized: true +regBankSelected: false +failedISel: false +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0 + + ; CHECK-LABEL: name: uaddlp1d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>)) + ; CHECK-NEXT: [[UADDLP:%[0-9]+]]:fpr(s64) = G_UADDLP [[LOAD]] + ; CHECK-NEXT: $d0 = COPY [[UADDLP]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %0:_(p0) = COPY $x0 + %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>)) + %2:_(s64) = G_UADDLP %1 + $d0 = COPY %2(s64) + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll index 938712a..3cf0115 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=arm64-eabi -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for saddlp1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uaddlp1d +; RUN: llc < %s -mtriple=arm64-eabi -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <8 x i8> @addhn8b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: addhn8b: diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index adc536d..b234ef7 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) { ; ; CHECK-CVT-GI-LABEL: test_fccmp: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500 -; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800 +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0 ; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0 ; CHECK-CVT-GI-NEXT: fcvt s2, h0 -; CHECK-CVT-GI-NEXT: fmov s1, w8 -; CHECK-CVT-GI-NEXT: fmov s3, w9 -; CHECK-CVT-GI-NEXT: fmov w9, s0 -; CHECK-CVT-GI-NEXT: fcvt s1, h1 -; CHECK-CVT-GI-NEXT: fcvt s3, h3 -; CHECK-CVT-GI-NEXT: fcmp s2, s1 -; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi -; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt +; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0] +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1 +; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1] +; CHECK-CVT-GI-NEXT: fmov w8, s0 +; CHECK-CVT-GI-NEXT: fcvt s3, h1 +; CHECK-CVT-GI-NEXT: fmov w9, s1 +; CHECK-CVT-GI-NEXT: fcvt s4, h4 +; CHECK-CVT-GI-NEXT: fcmp s2, s3 +; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi +; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt ; CHECK-CVT-GI-NEXT: strh w8, [x0] ; CHECK-CVT-GI-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index 51aad4fe..7409bfb 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI45_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI46_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI47_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI55_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI56_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI57_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI58_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI66_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI67_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI68_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI69_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll index 91bb8ac..9eacb61 100644 --- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -12,22 +12,14 @@ ; => ; recip = 1.0 / D; a * recip; b * recip; c * recip; define void @three_fdiv_float(float %D, float %a, float %b, float %c) { -; CHECK-SD-LABEL: three_fdiv_float: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov s4, #1.00000000 -; CHECK-SD-NEXT: fdiv s4, s4, s0 -; CHECK-SD-NEXT: fmul s0, s1, s4 -; CHECK-SD-NEXT: fmul s1, s2, s4 -; CHECK-SD-NEXT: fmul s2, s3, s4 -; CHECK-SD-NEXT: b foo_3f -; -; CHECK-GI-LABEL: three_fdiv_float: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv s4, s1, s0 -; CHECK-GI-NEXT: fdiv s1, s2, s0 -; CHECK-GI-NEXT: fdiv s2, s3, s0 -; CHECK-GI-NEXT: fmov s0, s4 -; CHECK-GI-NEXT: b foo_3f +; CHECK-LABEL: three_fdiv_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s4, #1.00000000 +; CHECK-NEXT: fdiv s4, s4, s0 +; CHECK-NEXT: fmul s0, s1, s4 +; CHECK-NEXT: fmul s1, s2, s4 +; CHECK-NEXT: fmul s2, s3, s4 +; CHECK-NEXT: b foo_3f %div = fdiv arcp float %a, %D %div1 = fdiv arcp float %b, %D %div2 = fdiv arcp float %c, %D @@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) { } define void @three_fdiv_double(double %D, double %a, double %b, double %c) { -; CHECK-SD-LABEL: three_fdiv_double: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov d4, #1.00000000 -; CHECK-SD-NEXT: fdiv d4, d4, d0 -; CHECK-SD-NEXT: fmul d0, d1, d4 -; CHECK-SD-NEXT: fmul d1, d2, d4 -; CHECK-SD-NEXT: fmul d2, d3, d4 -; CHECK-SD-NEXT: b foo_3d -; -; CHECK-GI-LABEL: three_fdiv_double: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv d4, d1, d0 -; CHECK-GI-NEXT: fdiv d1, d2, d0 -; CHECK-GI-NEXT: fdiv d2, d3, d0 -; CHECK-GI-NEXT: fmov d0, d4 -; CHECK-GI-NEXT: b foo_3d +; CHECK-LABEL: three_fdiv_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d4, #1.00000000 +; CHECK-NEXT: fdiv d4, d4, d0 +; CHECK-NEXT: fmul d0, d1, d4 +; CHECK-NEXT: fmul d1, d2, d4 +; CHECK-NEXT: fmul d2, d3, d4 +; CHECK-NEXT: b foo_3d %div = fdiv arcp double %a, %D %div1 = fdiv arcp double %b, %D %div2 = fdiv arcp double %c, %D @@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) { } define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { -; CHECK-SD-LABEL: three_fdiv_4xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov v4.4s, #1.00000000 -; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s -; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s -; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s -; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s -; CHECK-SD-NEXT: b foo_3_4xf -; -; CHECK-GI-LABEL: three_fdiv_4xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv v4.4s, v1.4s, v0.4s -; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v0.4s -; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s -; CHECK-GI-NEXT: mov v0.16b, v4.16b -; CHECK-GI-NEXT: b foo_3_4xf +; CHECK-LABEL: three_fdiv_4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.4s, #1.00000000 +; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-NEXT: b foo_3_4xf %div = fdiv arcp <4 x float> %a, %D %div1 = fdiv arcp <4 x float> %b, %D %div2 = fdiv arcp <4 x float> %c, %D @@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, } define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) { -; CHECK-SD-LABEL: three_fdiv_2xdouble: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov v4.2d, #1.00000000 -; CHECK-SD-NEXT: fdiv v4.2d, v4.2d, v0.2d -; CHECK-SD-NEXT: fmul v0.2d, v1.2d, v4.2d -; CHECK-SD-NEXT: fmul v1.2d, v2.2d, v4.2d -; CHECK-SD-NEXT: fmul v2.2d, v3.2d, v4.2d -; CHECK-SD-NEXT: b foo_3_2xd -; -; CHECK-GI-LABEL: three_fdiv_2xdouble: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv v4.2d, v1.2d, v0.2d -; CHECK-GI-NEXT: fdiv v1.2d, v2.2d, v0.2d -; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d -; CHECK-GI-NEXT: mov v0.16b, v4.16b -; CHECK-GI-NEXT: b foo_3_2xd +; CHECK-LABEL: three_fdiv_2xdouble: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.2d, #1.00000000 +; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d +; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d +; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d +; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d +; CHECK-NEXT: b foo_3_2xd %div = fdiv arcp <2 x double> %a, %D %div1 = fdiv arcp <2 x double> %b, %D %div2 = fdiv arcp <2 x double> %c, %D @@ -135,26 +103,47 @@ define void @two_fdiv_double(double %D, double %a, double %b) { ret void } -define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { -; CHECK-SD-LABEL: splat_three_fdiv_4xfloat: +define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 { +; CHECK-SD-LABEL: four_fdiv_multi_float: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: fmov v4.4s, #1.00000000 -; CHECK-SD-NEXT: dup v0.4s, v0.s[0] -; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s -; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s -; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s -; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s -; CHECK-SD-NEXT: b foo_3_4xf +; CHECK-SD-NEXT: fmov s4, #1.00000000 +; CHECK-SD-NEXT: fdiv s5, s4, s0 +; CHECK-SD-NEXT: fmul s4, s1, s5 +; CHECK-SD-NEXT: fmul s1, s2, s5 +; CHECK-SD-NEXT: fmul s2, s3, s5 +; CHECK-SD-NEXT: fmul s3, s0, s5 +; CHECK-SD-NEXT: fmov s0, s4 +; CHECK-SD-NEXT: b foo_4f ; -; CHECK-GI-LABEL: splat_three_fdiv_4xfloat: +; CHECK-GI-LABEL: four_fdiv_multi_float: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: dup v4.4s, v0.s[0] -; CHECK-GI-NEXT: fdiv v0.4s, v1.4s, v4.4s -; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v4.4s -; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v4.4s -; CHECK-GI-NEXT: b foo_3_4xf +; CHECK-GI-NEXT: fmov s4, #1.00000000 +; CHECK-GI-NEXT: fdiv s5, s4, s0 +; CHECK-GI-NEXT: fdiv s4, s0, s0 +; CHECK-GI-NEXT: fmul s0, s1, s5 +; CHECK-GI-NEXT: fmul s1, s2, s5 +; CHECK-GI-NEXT: fmul s2, s3, s5 +; CHECK-GI-NEXT: fmov s3, s4 +; CHECK-GI-NEXT: b foo_4f + %div = fdiv arcp float %a, %D + %div1 = fdiv arcp float %b, %D + %div2 = fdiv arcp float %c, %D + %div3 = fdiv arcp float %D, %D + tail call void @foo_4f(float %div, float %div1, float %div2, float %div3) + ret void +} + +define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: splat_three_fdiv_4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: fmov v4.4s, #1.00000000 +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-NEXT: b foo_3_4xf %D.ins = insertelement <4 x float> poison, float %D, i64 0 %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer %div = fdiv arcp <4 x float> %a, %splat @@ -256,6 +245,7 @@ entry: } declare void @foo_3f(float, float, float) +declare void @foo_4f(float, float, float, float) declare void @foo_3d(double, double, double) declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>) declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll index 594a3ab..be07978 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll @@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_v2HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 @@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_v3HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI3_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI3_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI4_0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI4_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_2H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI7_0 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: fmov s2, w8 +; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI7_0] ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll index 18f463c..40925da 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) { ; ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop: ; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NOFP16-NEXT: mov x8, xzr -; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop ; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8] -; CHECK-GI-NOFP16-NEXT: fmov s1, w9 +; CHECK-GI-NOFP16-NEXT: ldr d1, [x0, x8] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 ; CHECK-GI-NOFP16-NEXT: add x8, x8, #8 ; CHECK-GI-NOFP16-NEXT: cmp w8, #56 -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fmov w9, s0 ; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1 ; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit -; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop: @@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) { ; ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop: ; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NOFP16-NEXT: mov x8, xzr -; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NOFP16-NEXT: .LBB14_1: // %loop ; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8] +; CHECK-GI-NOFP16-NEXT: ldr q1, [x0, x8] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 ; CHECK-GI-NOFP16-NEXT: add x8, x8, #8 ; CHECK-GI-NOFP16-NEXT: cmp w8, #56 -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fmov s1, w9 +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fmov w9, s0 ; CHECK-GI-NOFP16-NEXT: b.ne .LBB14_1 ; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit -; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll index e1b2170..c10d6e9 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll @@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_2H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI5_0 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: fmov s2, w8 +; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0] ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] |