diff options
Diffstat (limited to 'llvm/test')
22 files changed, 8945 insertions, 269 deletions
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index 5be9394..4f65786 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -76,6 +76,15 @@ define <2 x i32> @movi2s_lsl16() { ret <2 x i32> <i32 16711680, i32 16711680> } +define <2 x i32> @movi2s_fneg() { +; CHECK-LABEL: movi2s_fneg: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2s, #240, lsl #8 +; CHECK-NEXT: fneg v0.2s, v0.2s +; CHECK-NEXT: ret + ret <2 x i32> <i32 2147545088, i32 2147545088> +} + define <2 x i32> @movi2s_lsl24() { ; CHECK-LABEL: movi2s_lsl24: ; CHECK: // %bb.0: @@ -149,6 +158,33 @@ define <4 x i16> @movi4h_lsl8() { ret <4 x i16> <i16 65280, i16 65280, i16 65280, i16 65280> } +define <4 x i16> @movi4h_fneg() { +; CHECK-NOFP16-SD-LABEL: movi4h_fneg: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: movi v0.4h, #127, lsl #8 +; CHECK-NOFP16-SD-NEXT: fneg v0.2s, v0.2s +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: movi4h_fneg: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: movi v0.4h, #127, lsl #8 +; CHECK-FP16-SD-NEXT: fneg v0.2s, v0.2s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: movi4h_fneg: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI18_0 +; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0] +; CHECK-NOFP16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: movi4h_fneg: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI18_0 +; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0] +; CHECK-FP16-GI-NEXT: ret + ret <4 x i16> <i16 32512, i16 65280, i16 32512, i16 65280> +} + define <8 x i16> @movi8h_lsl0() { ; CHECK-LABEL: movi8h_lsl0: ; CHECK: // %bb.0: @@ -180,14 +216,14 @@ define <8 x i16> @movi8h_fneg() { ; ; CHECK-NOFP16-GI-LABEL: movi8h_fneg: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI19_0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI21_0 +; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0] ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: movi8h_fneg: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI19_0 -; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI21_0 +; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0] ; CHECK-FP16-GI-NEXT: ret ret <8 x i16> <i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280> } @@ -275,6 +311,27 @@ define <4 x i16> @mvni4h_lsl8() { ret <4 x i16> <i16 61439, i16 61439, i16 61439, i16 61439> } +define <4 x i16> @mvni4h_neg() { +; CHECK-NOFP16-SD-LABEL: mvni4h_neg: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: mov w8, #33008 // =0x80f0 +; CHECK-NOFP16-SD-NEXT: dup v0.4h, w8 +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-LABEL: mvni4h_neg: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v0.4h, #240 +; CHECK-FP16-NEXT: fneg v0.4h, v0.4h +; CHECK-FP16-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: mvni4h_neg: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0 +; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI32_0] +; CHECK-NOFP16-GI-NEXT: ret + ret <4 x i16> <i16 33008, i16 33008, i16 33008, i16 33008> +} + define <8 x i16> @mvni8h_lsl0() { ; CHECK-LABEL: mvni8h_lsl0: ; CHECK: // %bb.0: @@ -306,8 +363,8 @@ define <8 x i16> @mvni8h_neg() { ; ; CHECK-NOFP16-GI-LABEL: mvni8h_neg: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0 -; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0] +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI35_0] ; CHECK-NOFP16-GI-NEXT: ret ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008> } @@ -486,6 +543,33 @@ define <2 x double> @fmov2d_neg0() { ret <2 x double> <double -0.0, double -0.0> } +define <1 x double> @fmov1d_neg0() { +; CHECK-NOFP16-SD-LABEL: fmov1d_neg0: +; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: movi d0, #0000000000000000 +; CHECK-NOFP16-SD-NEXT: fneg d0, d0 +; CHECK-NOFP16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: fmov1d_neg0: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: movi d0, #0000000000000000 +; CHECK-FP16-SD-NEXT: fneg d0, d0 +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NOFP16-GI-LABEL: fmov1d_neg0: +; CHECK-NOFP16-GI: // %bb.0: +; CHECK-NOFP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NOFP16-GI-NEXT: fmov d0, x8 +; CHECK-NOFP16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: fmov1d_neg0: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-FP16-GI-NEXT: fmov d0, x8 +; CHECK-FP16-GI-NEXT: ret + ret <1 x double> <double -0.0> +} + define <2 x i32> @movi1d_1() { ; CHECK-NOFP16-SD-LABEL: movi1d_1: ; CHECK-NOFP16-SD: // %bb.0: @@ -499,14 +583,14 @@ define <2 x i32> @movi1d_1() { ; ; CHECK-NOFP16-GI-LABEL: movi1d_1: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI52_0 -; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0] +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI56_0 +; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0] ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: movi1d_1: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI52_0 -; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI52_0] +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI56_0 +; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0] ; CHECK-FP16-GI-NEXT: ret ret <2 x i32> <i32 -65536, i32 65535> } @@ -517,31 +601,31 @@ define <2 x i32> @movi1d() { ; CHECK-NOFP16-SD-LABEL: movi1d: ; CHECK-NOFP16-SD: // %bb.0: ; CHECK-NOFP16-SD-NEXT: movi d1, #0x00ffffffff0000 -; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI53_0 -; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0] +; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI57_0 +; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0] ; CHECK-NOFP16-SD-NEXT: b test_movi1d ; ; CHECK-FP16-SD-LABEL: movi1d: ; CHECK-FP16-SD: // %bb.0: ; CHECK-FP16-SD-NEXT: movi d1, #0x00ffffffff0000 -; CHECK-FP16-SD-NEXT: adrp x8, .LCPI53_0 -; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI53_0] +; CHECK-FP16-SD-NEXT: adrp x8, .LCPI57_0 +; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0] ; CHECK-FP16-SD-NEXT: b test_movi1d ; ; CHECK-NOFP16-GI-LABEL: movi1d: ; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI53_1 -; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI53_0 -; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1] -; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0] +; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI57_1 +; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI57_0 +; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1] +; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0] ; CHECK-NOFP16-GI-NEXT: b test_movi1d ; ; CHECK-FP16-GI-LABEL: movi1d: ; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: adrp x8, .LCPI53_1 -; CHECK-FP16-GI-NEXT: adrp x9, .LCPI53_0 -; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI53_1] -; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI53_0] +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI57_1 +; CHECK-FP16-GI-NEXT: adrp x9, .LCPI57_0 +; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1] +; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0] ; CHECK-FP16-GI-NEXT: b test_movi1d %1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>) ret <2 x i32> %1 diff --git a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll new file mode 100644 index 0000000..b5c3399 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; Test that 'and' mask is sunk to the cmp use block only if it is masking a single bit +; RUN: llc -march=hexagon --verify-machineinstrs < %s | FileCheck %s + +@A = global i32 zeroinitializer + +define i32 @and_sink1(i32 %a) { +; CHECK-LABEL: and_sink1: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: p0 = !tstbit(r0,#11) +; CHECK-NEXT: r0 = ##A +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %bb0 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) jump:nt .LBB0_1 +; CHECK-NEXT: memw(r0+#0) = #0 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %bb2 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %and = and i32 %a, 2048 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} + +define i32 @and_sink2(i32 %a) { +; CHECK-LABEL: and_sink2: +; CHECK: .cfi_startproc +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r1 = and(r0,##2049) +; CHECK-NEXT: r0 = ##A +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r1,#0) +; CHECK-NEXT: } +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_1: // %bb0 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) jump:nt .LBB1_1 +; CHECK-NEXT: memw(r0+#0) = #0 +; CHECK-NEXT: } +; CHECK-NEXT: // %bb.2: // %bb2 +; CHECK-NEXT: { +; CHECK-NEXT: r0 = #0 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %and = and i32 %a, 2049 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll new file mode 100644 index 0000000..36670fa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll @@ -0,0 +1,2239 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL + +declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly +declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly + +define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $zero +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $zero +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize { +; LA32-LABEL: bcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %bcmp +} + +define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, 0 + ret i1 %ret +} + +define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind optsize { +; CHECK-LABEL: memcmp_size_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: and $a1, $a1, $a2 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.h $a0, $a0, 0 +; LA64-UAL-NEXT: ld.h $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2h $a0, $a0 +; LA64-UAL-NEXT: revb.2h $a1, $a1 +; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: lu12i.w $a4, 15 +; LA32-UAL-NEXT: ori $a4, $a4, 3840 +; LA32-UAL-NEXT: and $a5, $a0, $a4 +; LA32-UAL-NEXT: or $a2, $a5, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: and $a2, $a1, $a4 +; LA32-UAL-NEXT: or $a2, $a2, $a3 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a2, $a1 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 16 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 16 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: srli.w $a4, $a2, 8 +; LA32-UAL-NEXT: lu12i.w $a5, 15 +; LA32-UAL-NEXT: ori $a5, $a5, 3840 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a2, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a6, $a2, $a5 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a2, $a2, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a6 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: srli.w $a4, $a3, 8 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a5, $a3, $a5 +; LA32-UAL-NEXT: slli.w $a5, $a5, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: bne $a2, $a3, .LBB26_2 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB26_2: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a2, $a3 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a6, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a7, $a3, $a6 +; LA32-UAL-NEXT: slli.w $a7, $a7, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a7 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a6, $a4, $a6 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a3, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a3, $a0, $a2 +; LA32-UAL-NEXT: and $a4, $a1, $a2 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB27_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB28_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a2, $a2 +; LA64-UAL-NEXT: addi.w $a4, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a3 +; LA64-UAL-NEXT: addi.w $a5, $a3, 0 +; LA64-UAL-NEXT: bne $a4, $a5, .LBB28_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: revb.2w $a2, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a3, 0 +; LA64-UAL-NEXT: bne $a0, $a1, .LBB28_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB28_3: # %res_block +; LA64-UAL-NEXT: addi.w $a0, $a3, 0 +; LA64-UAL-NEXT: addi.w $a1, $a2, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB29_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB30_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB30_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB31_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB31_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB32_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB33_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize { +; LA32-LABEL: memcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %memcmp +} + +define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: ld.bu $a2, $a1, 1 +; LA32-NUAL-NEXT: ld.bu $a3, $a1, 0 +; LA32-NUAL-NEXT: ld.bu $a4, $a1, 2 +; LA32-NUAL-NEXT: ld.bu $a1, $a1, 3 +; LA32-NUAL-NEXT: slli.w $a2, $a2, 8 +; LA32-NUAL-NEXT: or $a2, $a2, $a3 +; LA32-NUAL-NEXT: slli.w $a3, $a4, 16 +; LA32-NUAL-NEXT: slli.w $a1, $a1, 24 +; LA32-NUAL-NEXT: or $a1, $a1, $a3 +; LA32-NUAL-NEXT: or $a1, $a1, $a2 +; LA32-NUAL-NEXT: ld.bu $a2, $a0, 1 +; LA32-NUAL-NEXT: ld.bu $a3, $a0, 0 +; LA32-NUAL-NEXT: ld.bu $a4, $a0, 2 +; LA32-NUAL-NEXT: ld.bu $a0, $a0, 3 +; LA32-NUAL-NEXT: slli.w $a2, $a2, 8 +; LA32-NUAL-NEXT: or $a2, $a2, $a3 +; LA32-NUAL-NEXT: slli.w $a3, $a4, 16 +; LA32-NUAL-NEXT: slli.w $a0, $a0, 24 +; LA32-NUAL-NEXT: or $a0, $a0, $a3 +; LA32-NUAL-NEXT: or $a0, $a0, $a2 +; LA32-NUAL-NEXT: xor $a0, $a0, $a1 +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: ld.bu $a2, $a1, 1 +; LA64-NUAL-NEXT: ld.bu $a3, $a1, 0 +; LA64-NUAL-NEXT: ld.bu $a4, $a1, 2 +; LA64-NUAL-NEXT: ld.b $a1, $a1, 3 +; LA64-NUAL-NEXT: slli.d $a2, $a2, 8 +; LA64-NUAL-NEXT: or $a2, $a2, $a3 +; LA64-NUAL-NEXT: slli.d $a3, $a4, 16 +; LA64-NUAL-NEXT: slli.d $a1, $a1, 24 +; LA64-NUAL-NEXT: or $a1, $a1, $a3 +; LA64-NUAL-NEXT: or $a1, $a1, $a2 +; LA64-NUAL-NEXT: ld.bu $a2, $a0, 1 +; LA64-NUAL-NEXT: ld.bu $a3, $a0, 0 +; LA64-NUAL-NEXT: ld.bu $a4, $a0, 2 +; LA64-NUAL-NEXT: ld.b $a0, $a0, 3 +; LA64-NUAL-NEXT: slli.d $a2, $a2, 8 +; LA64-NUAL-NEXT: or $a2, $a2, $a3 +; LA64-NUAL-NEXT: slli.d $a3, $a4, 16 +; LA64-NUAL-NEXT: slli.d $a0, $a0, 24 +; LA64-NUAL-NEXT: or $a0, $a0, $a3 +; LA64-NUAL-NEXT: or $a0, $a0, $a2 +; LA64-NUAL-NEXT: xor $a0, $a0, $a1 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, 0 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll new file mode 100644 index 0000000..c1bf850 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll @@ -0,0 +1,3106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL + +declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly +declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly + +define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $zero +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $zero +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_31: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $t0, $a0, 12 +; LA32-UAL-NEXT: ld.w $t1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a5, $t0, $t1 +; LA32-UAL-NEXT: ld.w $a6, $a0, 16 +; LA32-UAL-NEXT: ld.w $a7, $a1, 16 +; LA32-UAL-NEXT: ld.w $t0, $a0, 20 +; LA32-UAL-NEXT: ld.w $t1, $a1, 20 +; LA32-UAL-NEXT: ld.w $t2, $a0, 24 +; LA32-UAL-NEXT: ld.w $t3, $a1, 24 +; LA32-UAL-NEXT: ld.w $a0, $a0, 27 +; LA32-UAL-NEXT: ld.w $a1, $a1, 27 +; LA32-UAL-NEXT: xor $a6, $a6, $a7 +; LA32-UAL-NEXT: xor $a7, $t0, $t1 +; LA32-UAL-NEXT: xor $t0, $t2, $t3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a2, $a4, $a5 +; LA32-UAL-NEXT: or $a3, $a6, $a7 +; LA32-UAL-NEXT: or $a0, $t0, $a0 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a0, $a3, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_31: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 31 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_32: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $t0, $a0, 12 +; LA32-UAL-NEXT: ld.w $t1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a5, $t0, $t1 +; LA32-UAL-NEXT: ld.w $a6, $a0, 16 +; LA32-UAL-NEXT: ld.w $a7, $a1, 16 +; LA32-UAL-NEXT: ld.w $t0, $a0, 20 +; LA32-UAL-NEXT: ld.w $t1, $a1, 20 +; LA32-UAL-NEXT: ld.w $t2, $a0, 24 +; LA32-UAL-NEXT: ld.w $t3, $a1, 24 +; LA32-UAL-NEXT: ld.w $a0, $a0, 28 +; LA32-UAL-NEXT: ld.w $a1, $a1, 28 +; LA32-UAL-NEXT: xor $a6, $a6, $a7 +; LA32-UAL-NEXT: xor $a7, $t0, $t1 +; LA32-UAL-NEXT: xor $t0, $t2, $t3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a2, $a4, $a5 +; LA32-UAL-NEXT: or $a3, $a6, $a7 +; LA32-UAL-NEXT: or $a0, $t0, $a0 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a0, $a3, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_32: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 32 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_63: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $t0, $a0, 24 +; LA64-UAL-NEXT: ld.d $t1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a5, $t0, $t1 +; LA64-UAL-NEXT: ld.d $a6, $a0, 32 +; LA64-UAL-NEXT: ld.d $a7, $a1, 32 +; LA64-UAL-NEXT: ld.d $t0, $a0, 40 +; LA64-UAL-NEXT: ld.d $t1, $a1, 40 +; LA64-UAL-NEXT: ld.d $t2, $a0, 48 +; LA64-UAL-NEXT: ld.d $t3, $a1, 48 +; LA64-UAL-NEXT: ld.d $a0, $a0, 55 +; LA64-UAL-NEXT: ld.d $a1, $a1, 55 +; LA64-UAL-NEXT: xor $a6, $a6, $a7 +; LA64-UAL-NEXT: xor $a7, $t0, $t1 +; LA64-UAL-NEXT: xor $t0, $t2, $t3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a2, $a4, $a5 +; LA64-UAL-NEXT: or $a3, $a6, $a7 +; LA64-UAL-NEXT: or $a0, $t0, $a0 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: or $a0, $a3, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_63: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 63 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_64: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $t0, $a0, 24 +; LA64-UAL-NEXT: ld.d $t1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a5, $t0, $t1 +; LA64-UAL-NEXT: ld.d $a6, $a0, 32 +; LA64-UAL-NEXT: ld.d $a7, $a1, 32 +; LA64-UAL-NEXT: ld.d $t0, $a0, 40 +; LA64-UAL-NEXT: ld.d $t1, $a1, 40 +; LA64-UAL-NEXT: ld.d $t2, $a0, 48 +; LA64-UAL-NEXT: ld.d $t3, $a1, 48 +; LA64-UAL-NEXT: ld.d $a0, $a0, 56 +; LA64-UAL-NEXT: ld.d $a1, $a1, 56 +; LA64-UAL-NEXT: xor $a6, $a6, $a7 +; LA64-UAL-NEXT: xor $a7, $t0, $t1 +; LA64-UAL-NEXT: xor $t0, $t2, $t3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a2, $a4, $a5 +; LA64-UAL-NEXT: or $a3, $a6, $a7 +; LA64-UAL-NEXT: or $a0, $t0, $a0 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: or $a0, $a3, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_64: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 64 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind { +; LA32-LABEL: bcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %bcmp +} + +define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_le_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_le_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: slti $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_le_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: slti $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_le_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slti $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_le_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 1 + ret i1 %ret +} + +define i1 @bcmp_ge_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_ge_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ori $a0, $zero, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_ge_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ori $a0, $zero, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_ge_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA32-NUAL-NEXT: slt $a0, $a1, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_ge_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA64-NUAL-NEXT: slt $a0, $a1, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, -1 + ret i1 %ret +} + +define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind { +; CHECK-LABEL: memcmp_size_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: and $a1, $a1, $a2 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.h $a0, $a0, 0 +; LA64-UAL-NEXT: ld.h $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2h $a0, $a0 +; LA64-UAL-NEXT: revb.2h $a1, $a1 +; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: lu12i.w $a4, 15 +; LA32-UAL-NEXT: ori $a4, $a4, 3840 +; LA32-UAL-NEXT: and $a5, $a0, $a4 +; LA32-UAL-NEXT: or $a2, $a5, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: and $a2, $a1, $a4 +; LA32-UAL-NEXT: or $a2, $a2, $a3 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a2, $a1 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 16 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 16 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: srli.w $a4, $a2, 8 +; LA32-UAL-NEXT: lu12i.w $a5, 15 +; LA32-UAL-NEXT: ori $a5, $a5, 3840 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a2, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a6, $a2, $a5 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a2, $a2, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a6 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: srli.w $a4, $a3, 8 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a5, $a3, $a5 +; LA32-UAL-NEXT: slli.w $a5, $a5, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: bne $a2, $a3, .LBB28_2 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB28_2: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a2, $a3 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a6, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a7, $a3, $a6 +; LA32-UAL-NEXT: slli.w $a7, $a7, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a7 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a6, $a4, $a6 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a3, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a3, $a0, $a2 +; LA32-UAL-NEXT: and $a4, $a1, $a2 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB29_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB30_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a2, $a2 +; LA64-UAL-NEXT: addi.w $a4, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a3 +; LA64-UAL-NEXT: addi.w $a5, $a3, 0 +; LA64-UAL-NEXT: bne $a4, $a5, .LBB30_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: revb.2w $a2, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a3, 0 +; LA64-UAL-NEXT: bne $a0, $a1, .LBB30_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB30_3: # %res_block +; LA64-UAL-NEXT: addi.w $a0, $a3, 0 +; LA64-UAL-NEXT: addi.w $a1, $a2, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB31_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB32_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB32_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB33_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB33_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_31: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a3, $a0, 12 +; LA32-UAL-NEXT: ld.w $a4, $a1, 12 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.4: # %loadbb4 +; LA32-UAL-NEXT: ld.w $a3, $a0, 16 +; LA32-UAL-NEXT: ld.w $a4, $a1, 16 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.5: # %loadbb5 +; LA32-UAL-NEXT: ld.w $a3, $a0, 20 +; LA32-UAL-NEXT: ld.w $a4, $a1, 20 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.6: # %loadbb6 +; LA32-UAL-NEXT: ld.w $a3, $a0, 24 +; LA32-UAL-NEXT: ld.w $a4, $a1, 24 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.7: # %loadbb7 +; LA32-UAL-NEXT: ld.w $a0, $a0, 27 +; LA32-UAL-NEXT: ld.w $a1, $a1, 27 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.8: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB34_9: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB34_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_31: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 31 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_32: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a3, $a0, 12 +; LA32-UAL-NEXT: ld.w $a4, $a1, 12 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.4: # %loadbb4 +; LA32-UAL-NEXT: ld.w $a3, $a0, 16 +; LA32-UAL-NEXT: ld.w $a4, $a1, 16 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.5: # %loadbb5 +; LA32-UAL-NEXT: ld.w $a3, $a0, 20 +; LA32-UAL-NEXT: ld.w $a4, $a1, 20 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.6: # %loadbb6 +; LA32-UAL-NEXT: ld.w $a3, $a0, 24 +; LA32-UAL-NEXT: ld.w $a4, $a1, 24 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.7: # %loadbb7 +; LA32-UAL-NEXT: ld.w $a0, $a0, 28 +; LA32-UAL-NEXT: ld.w $a1, $a1, 28 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.8: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB35_9: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB35_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_32: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 32 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_63: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a2, $a0, 24 +; LA64-UAL-NEXT: ld.d $a3, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.4: # %loadbb4 +; LA64-UAL-NEXT: ld.d $a2, $a0, 32 +; LA64-UAL-NEXT: ld.d $a3, $a1, 32 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.5: # %loadbb5 +; LA64-UAL-NEXT: ld.d $a2, $a0, 40 +; LA64-UAL-NEXT: ld.d $a3, $a1, 40 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.6: # %loadbb6 +; LA64-UAL-NEXT: ld.d $a2, $a0, 48 +; LA64-UAL-NEXT: ld.d $a3, $a1, 48 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.7: # %loadbb7 +; LA64-UAL-NEXT: ld.d $a0, $a0, 55 +; LA64-UAL-NEXT: ld.d $a1, $a1, 55 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.8: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB36_9: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_63: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 63 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_64: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a2, $a0, 24 +; LA64-UAL-NEXT: ld.d $a3, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.4: # %loadbb4 +; LA64-UAL-NEXT: ld.d $a2, $a0, 32 +; LA64-UAL-NEXT: ld.d $a3, $a1, 32 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.5: # %loadbb5 +; LA64-UAL-NEXT: ld.d $a2, $a0, 40 +; LA64-UAL-NEXT: ld.d $a3, $a1, 40 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.6: # %loadbb6 +; LA64-UAL-NEXT: ld.d $a2, $a0, 48 +; LA64-UAL-NEXT: ld.d $a3, $a1, 48 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.7: # %loadbb7 +; LA64-UAL-NEXT: ld.d $a0, $a0, 56 +; LA64-UAL-NEXT: ld.d $a1, $a1, 56 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.8: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB37_9: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_64: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 64 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind { +; LA32-LABEL: memcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %memcmp +} + +define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_le_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: xori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_le_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: xori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_le_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slti $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_le_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 1 + ret i1 %ret +} + +define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_ge_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: xori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_ge_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: xori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_ge_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA32-NUAL-NEXT: slt $a0, $a1, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_ge_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA64-NUAL-NEXT: slt $a0, $a1, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, -1 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/LoongArch/memcmp.ll b/llvm/test/CodeGen/LoongArch/memcmp.ll index c4aaf9a..c3811c0 100644 --- a/llvm/test/CodeGen/LoongArch/memcmp.ll +++ b/llvm/test/CodeGen/LoongArch/memcmp.ll @@ -7,15 +7,24 @@ define signext i32 @test1(ptr %buffer1, ptr %buffer2) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: ori $a2, $zero, 16 -; CHECK-NEXT: pcaddu18i $ra, %call36(memcmp) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ld.d $a2, $a0, 0 +; CHECK-NEXT: ld.d $a3, $a1, 0 +; CHECK-NEXT: revb.d $a2, $a2 +; CHECK-NEXT: revb.d $a3, $a3 +; CHECK-NEXT: bne $a2, $a3, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %loadbb1 +; CHECK-NEXT: ld.d $a0, $a0, 8 +; CHECK-NEXT: ld.d $a1, $a1, 8 +; CHECK-NEXT: revb.d $a2, $a0 +; CHECK-NEXT: revb.d $a3, $a1 +; CHECK-NEXT: bne $a2, $a3, .LBB0_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: # %res_block +; CHECK-NEXT: sltu $a0, $a2, $a3 +; CHECK-NEXT: sub.d $a0, $zero, $a0 +; CHECK-NEXT: ori $a0, $a0, 1 ; CHECK-NEXT: ret entry: %call = call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll new file mode 100644 index 0000000..9cfed6a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <2 x bfloat> @copysign_v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) { +; CHECK-LABEL: copysign_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) + ret <2 x bfloat> %r +} + +define <4 x bfloat> @copysign_v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) { +; CHECK-LABEL: copysign_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) + ret <4 x bfloat> %r +} + +define <8 x bfloat> @copysign_v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) { +; CHECK-LABEL: copysign_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret + %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) + ret <8 x bfloat> %r +} + +define <16 x bfloat> @copysign_v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) { +; CHECK-LABEL: copysign_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: ret + %r = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) + ret <16 x bfloat> %r +} + +define <32 x bfloat> @copysign_v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) { +; CHECK-LABEL: copysign_v32bf32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v12 +; CHECK-NEXT: ret + %r = call <32 x bfloat> @llvm.copysign.v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) + ret <32 x bfloat> %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index a2178e1..2455d87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -1,8 +1,172 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s + +define <2 x bfloat> @vfsgnj_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfsgnj_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfsgnj_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfsgnj_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfsgnj_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfsgnj_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfsgnj_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfsgnj_vv_v16bf16_unmasked(<16 x bfloat> %va, <16 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.copysign.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) @@ -311,10 +475,10 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: bltu a2, a1, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 @@ -346,10 +510,10 @@ define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: vle64.v v0, (a0) ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: bltu a2, a1, .LBB27_2 +; CHECK-NEXT: bltu a2, a1, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v0 ; CHECK-NEXT: addi a0, a2, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll new file mode 100644 index 0000000..27c00de --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <1 x bfloat> @v1bf16(<1 x bfloat> %v) { +; CHECK-LABEL: v1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <1 x bfloat> @llvm.fabs.v1bf16(<1 x bfloat> %v) + ret <1 x bfloat> %r +} + +define <2 x bfloat> @v2bf16(<2 x bfloat> %v) { +; CHECK-LABEL: v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %v) + ret <2 x bfloat> %r +} + +define <4 x bfloat> @v4bf16(<4 x bfloat> %v) { +; CHECK-LABEL: v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> %v) + ret <4 x bfloat> %r +} + +define <8 x bfloat> @v8bf16(<8 x bfloat> %v) { +; CHECK-LABEL: v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %v) + ret <8 x bfloat> %r +} + +define <16 x bfloat> @v16bf16(<16 x bfloat> %v) { +; CHECK-LABEL: v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> %v) + ret <16 x bfloat> %r +} + +define <32 x bfloat> @v32bf16(<32 x bfloat> %v) { +; CHECK-LABEL: v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfabs.v v8, v8 +; CHECK-NEXT: ret + %r = call <32 x bfloat> @llvm.fabs.v32bf16(<32 x bfloat> %v) + ret <32 x bfloat> %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index 08f486b..01bd706 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -1,12 +1,224 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <2 x bfloat> @vfabs_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfabs_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfabs_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfabs_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfabs_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfabs_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfabs_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfabs_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.fabs.v2f16(<2 x half>, <2 x i1>, i32) @@ -24,6 +236,14 @@ define <2 x half> @vfabs_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -42,6 +262,14 @@ define <2 x half> @vfabs_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -62,6 +290,14 @@ define <4 x half> @vfabs_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -80,6 +316,14 @@ define <4 x half> @vfabs_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -100,6 +344,14 @@ define <8 x half> @vfabs_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -118,6 +370,14 @@ define <8 x half> @vfabs_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -138,6 +398,14 @@ define <16 x half> @vfabs_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -156,6 +424,14 @@ define <16 x half> @vfabs_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_v16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -367,10 +643,10 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 +; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 @@ -390,10 +666,10 @@ define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 +; CHECK-NEXT: bltu a0, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: addi a1, a0, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll new file mode 100644 index 0000000..b3b9a62 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +define <1 x bfloat> @v1bf16(<1 x bfloat> %va) { +; CHECK-LABEL: v1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <1 x bfloat> %va + ret <1 x bfloat> %vb +} + +define <2 x bfloat> @v2bf16(<2 x bfloat> %va) { +; CHECK-LABEL: v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <2 x bfloat> %va + ret <2 x bfloat> %vb +} + +define <4 x bfloat> @v4bf16(<4 x bfloat> %va) { +; CHECK-LABEL: v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <4 x bfloat> %va + ret <4 x bfloat> %vb +} + +define <8 x bfloat> @v8bf16(<8 x bfloat> %va) { +; CHECK-LABEL: v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <8 x bfloat> %va + ret <8 x bfloat> %vb +} + +define <16 x bfloat> @v16bf16(<16 x bfloat> %va) { +; CHECK-LABEL: v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <16 x bfloat> %va + ret <16 x bfloat> %vb +} + +define <32 x bfloat> @v32bf16(<32 x bfloat> %va) { +; CHECK-LABEL: v32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfneg.v v8, v8 +; CHECK-NEXT: ret + %vb = fneg <32 x bfloat> %va + ret <32 x bfloat> %vb +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index 968fd9f9..dede0e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -1,12 +1,208 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <2 x bfloat> @vfneg_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl) + ret <2 x bfloat> %v +} + +define <2 x bfloat> @vfneg_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl) + ret <2 x bfloat> %v +} + +define <4 x bfloat> @vfneg_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl) + ret <4 x bfloat> %v +} + +define <4 x bfloat> @vfneg_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl) + ret <4 x bfloat> %v +} + +define <8 x bfloat> @vfneg_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl) + ret <8 x bfloat> %v +} + +define <8 x bfloat> @vfneg_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl) + ret <8 x bfloat> %v +} + +define <16 x bfloat> @vfneg_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl) + ret <16 x bfloat> %v +} + +define <16 x bfloat> @vfneg_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl) + ret <16 x bfloat> %v +} declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32) @@ -23,6 +219,13 @@ define <2 x half> @vfneg_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v } @@ -40,6 +243,13 @@ define <2 x half> @vfneg_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -59,6 +269,13 @@ define <4 x half> @vfneg_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v } @@ -76,6 +293,13 @@ define <4 x half> @vfneg_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -95,6 +319,13 @@ define <8 x half> @vfneg_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v } @@ -112,6 +343,13 @@ define <8 x half> @vfneg_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -131,6 +369,13 @@ define <16 x half> @vfneg_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v } @@ -148,6 +393,13 @@ define <16 x half> @vfneg_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_v16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -359,10 +611,10 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB26_2 +; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 @@ -382,10 +634,10 @@ define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: bltu a0, a2, .LBB27_2 +; CHECK-NEXT: bltu a0, a2, .LBB35_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: .LBB35_2: ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: addi a1, a0, -16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll index ccf82b9..2f5fde3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -1,12 +1,376 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <vscale x 1 x bfloat> @vfsgnj_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.copysign.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfsgnj_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.copysign.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfsgnj_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.copysign.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfsgnj_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.copysign.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfsgnj_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.copysign.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfsgnj_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.copysign.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfsgnj_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.copysign.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfsgnj_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.copysign.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfsgnj_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v12, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.copysign.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfsgnj_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12 +; ZVFBFA-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.copysign.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfsgnj_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a1, v0.t +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: vor.vv v8, v8, v16, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a1, v0.t +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.copysign.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfsgnj_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) { +; ZVFH-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a1 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: vor.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16 +; ZVFBFA-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.copysign.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) @@ -26,6 +390,16 @@ define <vscale x 1 x half> @vfsgnj_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x half> %v } @@ -46,6 +420,16 @@ define <vscale x 1 x half> @vfsgnj_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x half> %v } @@ -68,6 +452,16 @@ define <vscale x 2 x half> @vfsgnj_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 2 x half> @llvm.vp.copysign.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x half> %v } @@ -88,6 +482,16 @@ define <vscale x 2 x half> @vfsgnj_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call <vscale x 2 x half> @llvm.vp.copysign.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x half> %v } @@ -110,6 +514,16 @@ define <vscale x 4 x half> @vfsgnj_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 4 x half> @llvm.vp.copysign.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x half> %v } @@ -130,6 +544,16 @@ define <vscale x 4 x half> @vfsgnj_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %v = call <vscale x 4 x half> @llvm.vp.copysign.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x half> %v } @@ -152,6 +576,16 @@ define <vscale x 8 x half> @vfsgnj_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v10, v10, a1, v0.t +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: vor.vv v8, v8, v10, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 8 x half> @llvm.vp.copysign.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x half> %v } @@ -172,6 +606,16 @@ define <vscale x 8 x half> @vfsgnj_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %v = call <vscale x 8 x half> @llvm.vp.copysign.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x half> %v } @@ -194,6 +638,16 @@ define <vscale x 16 x half> @vfsgnj_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v12, v12, a1, v0.t +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: vor.vv v8, v8, v12, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.copysign.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x half> %v } @@ -214,6 +668,16 @@ define <vscale x 16 x half> @vfsgnj_vv_nxv16f16_unmasked(<vscale x 16 x half> %v ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.copysign.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x half> %v } @@ -236,6 +700,16 @@ define <vscale x 32 x half> @vfsgnj_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v16, v16, a1, v0.t +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: vor.vv v8, v8, v16, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 32 x half> @llvm.vp.copysign.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl) ret <vscale x 32 x half> %v } @@ -256,6 +730,16 @@ define <vscale x 32 x half> @vfsgnj_vv_nxv32f16_unmasked(<vscale x 32 x half> %v ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfsgnj_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %v = call <vscale x 32 x half> @llvm.vp.copysign.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) ret <vscale x 32 x half> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll index 1d86388..28426ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll @@ -11,75 +11,165 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %v) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call <vscale x 1 x bfloat> @llvm.fabs.nxv1bf16(<vscale x 1 x bfloat> %v) ret <vscale x 1 x bfloat> %r } define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %v) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> %v) ret <vscale x 2 x bfloat> %r } define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %v) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> %v) ret <vscale x 4 x bfloat> %r } define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %v) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> %v) ret <vscale x 8 x bfloat> %r } define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %v) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> %v) ret <vscale x 16 x bfloat> %r } define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %v) { -; CHECK-LABEL: nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret %r = call <vscale x 32 x bfloat> @llvm.fabs.nxv32bf16(<vscale x 32 x bfloat> %v) ret <vscale x 32 x bfloat> %r } @@ -100,6 +190,14 @@ define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v) ret <vscale x 1 x half> %r } @@ -120,6 +218,14 @@ define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v) ret <vscale x 2 x half> %r } @@ -140,6 +246,14 @@ define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v) ret <vscale x 4 x half> %r } @@ -160,6 +274,14 @@ define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v) ret <vscale x 8 x half> %r } @@ -180,6 +302,14 @@ define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v) ret <vscale x 16 x half> %r } @@ -200,6 +330,14 @@ define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v) ret <vscale x 32 x half> %r } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll index 8f9f9c4..c6888c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -1,12 +1,328 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <vscale x 1 x bfloat> @vfabs_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fabs.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfabs_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fabs.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfabs_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fabs.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfabs_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fabs.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfabs_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fabs.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfabs_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fabs.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfabs_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fabs.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfabs_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fabs.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfabs_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fabs.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfabs_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fabs.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfabs_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fabs.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfabs_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfabs.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fabs.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) @@ -24,6 +340,14 @@ define <vscale x 1 x half> @vfabs_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x half> %v } @@ -42,6 +366,14 @@ define <vscale x 1 x half> @vfabs_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x half> %v } @@ -62,6 +394,14 @@ define <vscale x 2 x half> @vfabs_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 2 x half> @llvm.vp.fabs.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x half> %v } @@ -80,6 +420,14 @@ define <vscale x 2 x half> @vfabs_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 2 x half> @llvm.vp.fabs.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x half> %v } @@ -100,6 +448,14 @@ define <vscale x 4 x half> @vfabs_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 4 x half> @llvm.vp.fabs.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x half> %v } @@ -118,6 +474,14 @@ define <vscale x 4 x half> @vfabs_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 4 x half> @llvm.vp.fabs.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x half> %v } @@ -138,6 +502,14 @@ define <vscale x 8 x half> @vfabs_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 8 x half> @llvm.vp.fabs.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x half> %v } @@ -156,6 +528,14 @@ define <vscale x 8 x half> @vfabs_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 8 x half> @llvm.vp.fabs.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x half> %v } @@ -176,6 +556,14 @@ define <vscale x 16 x half> @vfabs_vv_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.fabs.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x half> %v } @@ -194,6 +582,14 @@ define <vscale x 16 x half> @vfabs_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.fabs.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x half> %v } @@ -214,6 +610,14 @@ define <vscale x 32 x half> @vfabs_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 32 x half> @llvm.vp.fabs.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl) ret <vscale x 32 x half> %v } @@ -232,6 +636,14 @@ define <vscale x 32 x half> @vfabs_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfabs_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: addi a1, a1, -1 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 32 x half> @llvm.vp.fabs.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) ret <vscale x 32 x half> %v } @@ -473,10 +885,10 @@ define <vscale x 16 x double> @vfabs_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8, v0.t @@ -495,10 +907,10 @@ define <vscale x 16 x double> @vfabs_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll index 83f588c..bef2e8d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll @@ -11,87 +11,189 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs) ret <vscale x 1 x bfloat> %r } define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs) ret <vscale x 2 x bfloat> %r } define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vand.vx v9, v9, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs) ret <vscale x 4 x bfloat> %r } define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vand.vx v10, v10, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vand.vx v10, v10, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v10 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs) ret <vscale x 8 x bfloat> %r } define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vand.vx v12, v12, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v12 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vand.vx v12, v12, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v12 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v12 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %r = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs) ret <vscale x 16 x bfloat> %r } define <vscale x 32 x bfloat> @nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs) { -; CHECK-LABEL: nxv32bf32: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vand.vx v16, v16, a0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vand.vx v8, v8, a0 -; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf32: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vand.vx v16, v16, a0 +; ZVFH-NEXT: addi a0, a0, -1 +; ZVFH-NEXT: vand.vx v8, v8, a0 +; ZVFH-NEXT: vor.vv v8, v8, v16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a0 +; ZVFHMIN-NEXT: addi a0, a0, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a0 +; ZVFHMIN-NEXT: vor.vv v8, v8, v16 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %r = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs) ret <vscale x 32 x bfloat> %r } @@ -114,6 +216,16 @@ define <vscale x 1 x half> @vfcopysign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsca ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vv_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %vs) ret <vscale x 1 x half> %r } @@ -136,6 +248,18 @@ define <vscale x 1 x half> @vfcopysign_vf_nxv1f16(<vscale x 1 x half> %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 1 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %splat) @@ -159,6 +283,17 @@ define <vscale x 1 x half> @vfcopynsign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsc ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v9, v9, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 1 x half> %vs %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %n) ret <vscale x 1 x half> %r @@ -183,6 +318,19 @@ define <vscale x 1 x half> @vfcopynsign_vf_nxv1f16(<vscale x 1 x half> %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 1 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer %n = fneg <vscale x 1 x half> %splat @@ -208,6 +356,17 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1 x ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vand.vx v9, v10, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %e = fptrunc <vscale x 1 x float> %vs to <vscale x 1 x half> %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e) ret <vscale x 1 x half> %r @@ -235,6 +394,19 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 x ; ZVFHMIN-NEXT: vand.vx v9, v10, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vand.vx v9, v10, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 1 x float> poison, float %s, i32 0 %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer %esplat = fptrunc <vscale x 1 x float> %splat to <vscale x 1 x half> @@ -261,6 +433,18 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1 ; ZVFHMIN-NEXT: vand.vx v9, v9, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vxor.vx v9, v10, a0 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 1 x float> %vs %eneg = fptrunc <vscale x 1 x float> %n to <vscale x 1 x half> %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %eneg) @@ -290,6 +474,20 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 ; ZVFHMIN-NEXT: vand.vx v9, v9, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vxor.vx v9, v10, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 1 x float> poison, float %s, i32 0 %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer %n = fneg <vscale x 1 x float> %splat @@ -320,6 +518,19 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1 x ; ZVFHMIN-NEXT: vand.vx v9, v9, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %e = fptrunc <vscale x 1 x double> %vs to <vscale x 1 x half> %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e) ret <vscale x 1 x half> %r @@ -351,6 +562,21 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 x ; ZVFHMIN-NEXT: vand.vx v9, v9, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f64: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 1 x double> poison, double %s, i32 0 %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer %esplat = fptrunc <vscale x 1 x double> %splat to <vscale x 1 x half> @@ -381,6 +607,20 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1 ; ZVFHMIN-NEXT: vand.vx v9, v9, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 1 x double> %vs %eneg = fptrunc <vscale x 1 x double> %n to <vscale x 1 x half> %r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %eneg) @@ -414,6 +654,22 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 ; ZVFHMIN-NEXT: vand.vx v9, v9, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v9, fa0 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10 +; ZVFBFA-NEXT: vxor.vx v9, v9, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 1 x double> poison, double %s, i32 0 %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer %n = fneg <vscale x 1 x double> %splat @@ -440,6 +696,16 @@ define <vscale x 2 x half> @vfcopysign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsca ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vv_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %vs) ret <vscale x 2 x half> %r } @@ -462,6 +728,18 @@ define <vscale x 2 x half> @vfcopysign_vf_nxv2f16(<vscale x 2 x half> %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 2 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer %r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %splat) @@ -485,6 +763,17 @@ define <vscale x 2 x half> @vfcopynsign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsc ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v9, v9, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 2 x half> %vs %r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %n) ret <vscale x 2 x half> %r @@ -509,6 +798,19 @@ define <vscale x 2 x half> @vfcopynsign_vf_nxv2f16(<vscale x 2 x half> %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 2 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer %n = fneg <vscale x 2 x half> %splat @@ -534,6 +836,16 @@ define <vscale x 4 x half> @vfcopysign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsca ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vv_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %vs) ret <vscale x 4 x half> %r } @@ -556,6 +868,18 @@ define <vscale x 4 x half> @vfcopysign_vf_nxv4f16(<vscale x 4 x half> %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 4 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer %r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %splat) @@ -579,6 +903,17 @@ define <vscale x 4 x half> @vfcopynsign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsc ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v9, v9, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 4 x half> %vs %r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %n) ret <vscale x 4 x half> %r @@ -603,6 +938,19 @@ define <vscale x 4 x half> @vfcopynsign_vf_nxv4f16(<vscale x 4 x half> %vm, half ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vmv.v.x v9, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v9, v9, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v9, v9, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v9 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 4 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer %n = fneg <vscale x 4 x half> %splat @@ -628,6 +976,16 @@ define <vscale x 8 x half> @vfcopysign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsca ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vv_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %vs) ret <vscale x 8 x half> %r } @@ -650,6 +1008,18 @@ define <vscale x 8 x half> @vfcopysign_vf_nxv8f16(<vscale x 8 x half> %vm, half ; ZVFHMIN-NEXT: vand.vx v10, v10, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v10, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %splat) @@ -673,6 +1043,17 @@ define <vscale x 8 x half> @vfcopynsign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsc ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v10, v10, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 8 x half> %vs %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %n) ret <vscale x 8 x half> %r @@ -697,6 +1078,19 @@ define <vscale x 8 x half> @vfcopynsign_vf_nxv8f16(<vscale x 8 x half> %vm, half ; ZVFHMIN-NEXT: vand.vx v10, v10, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vmv.v.x v10, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v10, v10, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer %n = fneg <vscale x 8 x half> %splat @@ -722,6 +1116,17 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8 x ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %e = fptrunc <vscale x 8 x float> %vs to <vscale x 8 x half> %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e) ret <vscale x 8 x half> %r @@ -749,6 +1154,19 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8 x ; ZVFHMIN-NEXT: vand.vx v10, v10, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v12, fa0 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %s, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer %esplat = fptrunc <vscale x 8 x float> %splat to <vscale x 8 x half> @@ -775,6 +1193,18 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8 ; ZVFHMIN-NEXT: vand.vx v10, v10, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vxor.vx v10, v10, a0 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 8 x float> %vs %eneg = fptrunc <vscale x 8 x float> %n to <vscale x 8 x half> %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %eneg) @@ -804,6 +1234,20 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8 ; ZVFHMIN-NEXT: vand.vx v10, v10, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v12, fa0 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vxor.vx v10, v10, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %s, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer %n = fneg <vscale x 8 x float> %splat @@ -834,6 +1278,19 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8 x ; ZVFHMIN-NEXT: vand.vx v10, v10, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f64: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %e = fptrunc <vscale x 8 x double> %vs to <vscale x 8 x half> %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e) ret <vscale x 8 x half> %r @@ -865,6 +1322,21 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8 x ; ZVFHMIN-NEXT: vand.vx v10, v10, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f64: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v16, fa0 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %s, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer %esplat = fptrunc <vscale x 8 x double> %splat to <vscale x 8 x half> @@ -895,6 +1367,20 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8 ; ZVFHMIN-NEXT: vand.vx v10, v10, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12 +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vxor.vx v10, v10, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 8 x double> %vs %eneg = fptrunc <vscale x 8 x double> %n to <vscale x 8 x half> %r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %eneg) @@ -928,6 +1414,22 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8 ; ZVFHMIN-NEXT: vand.vx v10, v10, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; ZVFBFA-NEXT: vfmv.v.f v16, fa0 +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: addi a1, a0, -1 +; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16 +; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vand.vx v8, v8, a1 +; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12 +; ZVFBFA-NEXT: vxor.vx v10, v10, a0 +; ZVFBFA-NEXT: vand.vx v10, v10, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v10 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %s, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer %n = fneg <vscale x 8 x double> %splat @@ -954,6 +1456,16 @@ define <vscale x 16 x half> @vfcopysign_vv_nxv16f16(<vscale x 16 x half> %vm, <v ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vand.vx v12, v12, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %vs) ret <vscale x 16 x half> %r } @@ -976,6 +1488,18 @@ define <vscale x 16 x half> @vfcopysign_vf_nxv16f16(<vscale x 16 x half> %vm, ha ; ZVFHMIN-NEXT: vand.vx v12, v12, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v12, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer %r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %splat) @@ -999,6 +1523,17 @@ define <vscale x 16 x half> @vfcopynsign_vv_nxv16f16(<vscale x 16 x half> %vm, < ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v12, v12, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 16 x half> %vs %r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %n) ret <vscale x 16 x half> %r @@ -1023,6 +1558,19 @@ define <vscale x 16 x half> @vfcopynsign_vf_nxv16f16(<vscale x 16 x half> %vm, h ; ZVFHMIN-NEXT: vand.vx v12, v12, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vmv.v.x v12, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v12, v12, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v12, v12, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v12 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer %n = fneg <vscale x 16 x half> %splat @@ -1048,6 +1596,16 @@ define <vscale x 32 x half> @vfcopysign_vv_nxv32f16(<vscale x 32 x half> %vm, <v ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vand.vx v16, v16, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %vs) ret <vscale x 32 x half> %r } @@ -1070,6 +1628,18 @@ define <vscale x 32 x half> @vfcopysign_vf_nxv32f16(<vscale x 32 x half> %vm, ha ; ZVFHMIN-NEXT: vand.vx v16, v16, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopysign_vf_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vmv.v.x v16, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer %r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %splat) @@ -1093,6 +1663,17 @@ define <vscale x 32 x half> @vfcopynsign_vv_nxv32f16(<vscale x 32 x half> %vm, < ; ZVFHMIN-NEXT: vand.vx v8, v8, a0 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v16, v16, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a0 +; ZVFBFA-NEXT: addi a0, a0, -1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %n = fneg <vscale x 32 x half> %vs %r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %n) ret <vscale x 32 x half> %r @@ -1117,6 +1698,19 @@ define <vscale x 32 x half> @vfcopynsign_vf_nxv32f16(<vscale x 32 x half> %vm, h ; ZVFHMIN-NEXT: vand.vx v16, v16, a1 ; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfcopynsign_vf_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: fmv.x.h a0, fa0 +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vmv.v.x v16, a0 +; ZVFBFA-NEXT: addi a0, a1, -1 +; ZVFBFA-NEXT: vxor.vx v16, v16, a1 +; ZVFBFA-NEXT: vand.vx v8, v8, a0 +; ZVFBFA-NEXT: vand.vx v16, v16, a1 +; ZVFBFA-NEXT: vor.vv v8, v8, v16 +; ZVFBFA-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %s, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer %n = fneg <vscale x 32 x half> %splat diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll index 9f456e9..c0b4916 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll @@ -11,69 +11,153 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \ ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ ; RUN: --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \ +; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ +; RUN: --check-prefixes=CHECK,ZVFBFA define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %va) { -; CHECK-LABEL: nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 1 x bfloat> %va ret <vscale x 1 x bfloat> %vb } define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %va) { -; CHECK-LABEL: nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 2 x bfloat> %va ret <vscale x 2 x bfloat> %vb } define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %va) { -; CHECK-LABEL: nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 4 x bfloat> %va ret <vscale x 4 x bfloat> %vb } define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %va) { -; CHECK-LABEL: nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 8 x bfloat> %va ret <vscale x 8 x bfloat> %vb } define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %va) { -; CHECK-LABEL: nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 16 x bfloat> %va ret <vscale x 16 x bfloat> %vb } define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %va) { -; CHECK-LABEL: nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vxor.vx v8, v8, a0 -; CHECK-NEXT: ret +; ZVFH-LABEL: nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a0, 8 +; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a0 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a0, 8 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 32 x bfloat> %va ret <vscale x 32 x bfloat> %vb } @@ -91,6 +175,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 1 x half> %va ret <vscale x 1 x half> %vb } @@ -108,6 +199,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 2 x half> %va ret <vscale x 2 x half> %vb } @@ -125,6 +223,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 4 x half> %va ret <vscale x 4 x half> %vb } @@ -142,6 +247,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 8 x half> %va ret <vscale x 8 x half> %vb } @@ -159,6 +271,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 16 x half> %va ret <vscale x 16 x half> %vb } @@ -176,6 +295,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va) { ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a0 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a0, 8 +; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a0 +; ZVFBFA-NEXT: ret %vb = fneg <vscale x 32 x half> %va ret <vscale x 32 x half> %vb } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll index bbab056..9bd24c4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -1,12 +1,304 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA + +define <vscale x 1 x bfloat> @vfneg_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv1bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 1 x bfloat> @vfneg_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret <vscale x 1 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfneg_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv2bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 2 x bfloat> @vfneg_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret <vscale x 2 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfneg_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv4bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 4 x bfloat> @vfneg_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret <vscale x 4 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfneg_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv8bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 8 x bfloat> @vfneg_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret <vscale x 8 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfneg_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv16bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 16 x bfloat> @vfneg_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret <vscale x 16 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfneg_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv32bf16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32bf16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t +; ZVFBFA-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fneg.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x bfloat> %v +} + +define <vscale x 32 x bfloat> @vfneg_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) { +; ZVFH-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFH: # %bb.0: +; ZVFH-NEXT: lui a1, 8 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vxor.vx v8, v8, a1 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32bf16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; ZVFBFA-NEXT: vfneg.v v8, v8 +; ZVFBFA-NEXT: ret + %v = call <vscale x 32 x bfloat> @llvm.vp.fneg.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret <vscale x 32 x bfloat> %v +} declare <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32) @@ -23,6 +315,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x half> %v } @@ -40,6 +339,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv1f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x half> %v } @@ -59,6 +365,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x half> %v } @@ -76,6 +389,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv2f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x half> %v } @@ -95,6 +415,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x half> %v } @@ -112,6 +439,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv4f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x half> %v } @@ -131,6 +465,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x half> %v } @@ -148,6 +489,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv8f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x half> %v } @@ -167,6 +515,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x half> %v } @@ -184,6 +539,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv16f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x half> %v } @@ -203,6 +565,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32f16: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t +; ZVFBFA-NEXT: ret %v = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl) ret <vscale x 32 x half> %v } @@ -220,6 +589,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret +; +; ZVFBFA-LABEL: vfneg_vv_nxv32f16_unmasked: +; ZVFBFA: # %bb.0: +; ZVFBFA-NEXT: lui a1, 8 +; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFBFA-NEXT: vxor.vx v8, v8, a1 +; ZVFBFA-NEXT: ret %v = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) ret <vscale x 32 x half> %v } @@ -461,10 +837,10 @@ define <vscale x 16 x double> @vfneg_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8, v0.t @@ -483,10 +859,10 @@ define <vscale x 16 x double> @vfneg_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16 -; CHECK-NEXT: bltu a0, a1, .LBB33_2 +; CHECK-NEXT: bltu a0, a1, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_faddfsub_bfloat16.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_faddfsub_bfloat16.ll new file mode 100644 index 0000000..a189b2a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_faddfsub_bfloat16.ll @@ -0,0 +1,34 @@ +; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_bfloat16 %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR1 +; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_KHR_bfloat16 %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR2 + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_INTEL_16bit_atomics,+SPV_KHR_bfloat16,+SPV_INTEL_bfloat16_arithmetic %s -o - | FileCheck %s + +; CHECK-ERROR1: LLVM ERROR: The atomic float instruction requires the following SPIR-V extension: SPV_EXT_shader_atomic_float_add +; CHECK-ERROR2: LLVM ERROR: The atomic bfloat16 instruction requires the following SPIR-V extension: SPV_INTEL_16bit_atomics + +; CHECK: Capability BFloat16TypeKHR +; CHECK: Capability AtomicBFloat16AddINTEL +; CHECK: Extension "SPV_KHR_bfloat16" +; CHECK: Extension "SPV_EXT_shader_atomic_float_add" +; CHECK: Extension "SPV_INTEL_16bit_atomics" +; CHECK-DAG: %[[TyBF16:[0-9]+]] = OpTypeFloat 16 0 +; CHECK-DAG: %[[TyBF16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyBF16]] +; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[ConstBF16:[0-9]+]] = OpConstant %[[TyBF16]] 16936{{$}} +; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstantNull %[[TyBF16]] +; CHECK-DAG: %[[BF16Ptr:[0-9]+]] = OpVariable %[[TyBF16Ptr]] CrossWorkgroup %[[Const0]] +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] +; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16{{$}} +; CHECK: OpAtomicFAddEXT %[[TyBF16]] %[[BF16Ptr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstBF16]] +; CHECK: %[[NegatedConstBF16:[0-9]+]] = OpFNegate %[[TyBF16]] %[[ConstBF16]] +; CHECK: OpAtomicFAddEXT %[[TyBF16]] %[[BF16Ptr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[NegatedConstBF16]] + + +@f = common dso_local local_unnamed_addr addrspace(1) global bfloat 0.000000e+00, align 8 + +define dso_local spir_func void @test1() local_unnamed_addr { +entry: + %addval = atomicrmw fadd ptr addrspace(1) @f, bfloat 42.000000e+00 seq_cst + %subval = atomicrmw fsub ptr addrspace(1) @f, bfloat 42.000000e+00 seq_cst + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_fminfmax_bfloat16.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_fminfmax_bfloat16.ll new file mode 100644 index 0000000..dd84480 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_16bit_atomics/atomicrmw_fminfmax_bfloat16.ll @@ -0,0 +1,28 @@ +; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_shader_atomic_float_min_max,+SPV_KHR_bfloat16 %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_EXT_shader_atomic_float_min_max,+SPV_INTEL_16bit_atomics,+SPV_KHR_bfloat16 %s -o - | FileCheck %s + +; CHECK-ERROR: LLVM ERROR: The atomic bfloat16 instruction requires the following SPIR-V extension: SPV_INTEL_16bit_atomics + +; CHECK: Capability AtomicBFloat16MinMaxINTEL +; CHECK: Extension "SPV_KHR_bfloat16" +; CHECK: Extension "SPV_EXT_shader_atomic_float_min_max" +; CHECK: Extension "SPV_INTEL_16bit_atomics" +; CHECK-DAG: %[[TyBF16:[0-9]+]] = OpTypeFloat 16 0 +; CHECK-DAG: %[[TyBF16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyBF16]] +; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[ConstBF16:[0-9]+]] = OpConstant %[[TyBF16]] 16936{{$}} +; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstantNull %[[TyBF16]] +; CHECK-DAG: %[[BF16Ptr:[0-9]+]] = OpVariable %[[TyBF16Ptr]] CrossWorkgroup %[[Const0]] +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] +; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16{{$}} +; CHECK: OpAtomicFMinEXT %[[TyBF16]] %[[BF16Ptr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstBF16]] +; CHECK: OpAtomicFMaxEXT %[[TyBF16]] %[[BF16Ptr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstBF16]] + +@f = common dso_local local_unnamed_addr addrspace(1) global bfloat 0.000000e+00, align 8 + +define spir_func void @test1() { +entry: + %minval = atomicrmw fmin ptr addrspace(1) @f, bfloat 42.0e+00 seq_cst + %maxval = atomicrmw fmax ptr addrspace(1) @f, bfloat 42.0e+00 seq_cst + ret void +} diff --git a/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll b/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll index 9531323..8ceb310 100644 --- a/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll +++ b/llvm/test/Transforms/InstCombine/sink-dereferenceable-assume.ll @@ -1,15 +1,44 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -p instcombine -S %s | FileCheck %s -define i64 @test_sink_with_dereferenceable_assume(ptr %p, ptr %q, i1 %cond) { -; CHECK-LABEL: define i64 @test_sink_with_dereferenceable_assume( -; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[COND:%.*]]) { +define i64 @test_dereferenceable_assume(ptr %p, ptr %q, i1 %c.0) { +; CHECK-LABEL: define i64 @test_dereferenceable_assume( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ] +; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: ret i64 [[DIFF]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %p_int = ptrtoint ptr %p to i64 + %q_int = ptrtoint ptr %q to i64 + %diff = sub i64 %q_int, %p_int + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ] + br i1 %c.0, label %then, label %else + +then: + ret i64 %diff + +else: + ret i64 0 +} + +define i64 @test_sink_with_dereferenceable_assume_same_block_as_user(ptr %p, ptr %q, i1 %c.0) { +; CHECK-LABEL: define i64 @test_sink_with_dereferenceable_assume_same_block_as_user( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]] ; CHECK: [[THEN]]: ; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64 ; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64 ; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ] ; CHECK-NEXT: ret i64 [[DIFF]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: ret i64 0 @@ -18,14 +47,77 @@ entry: %p_int = ptrtoint ptr %p to i64 %q_int = ptrtoint ptr %q to i64 %diff = sub i64 %q_int, %p_int - call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ] - br i1 %cond, label %then, label %else + br i1 %c.0, label %then, label %else then: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ] ret i64 %diff else: ret i64 0 } +define i64 @test_sink_with_multiple_users_dominated_by_deref(ptr %p, ptr %q, i1 %c.0, i1 %c.1) { +; CHECK-LABEL: define i64 @test_sink_with_multiple_users_dominated_by_deref( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]], i1 [[C_1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]] +; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ] +; CHECK-NEXT: br i1 [[C_1]], label %[[THEN_2:.*]], label %[[ELSE]] +; CHECK: [[THEN_2]]: +; CHECK-NEXT: [[DOUBLED:%.*]] = shl i64 [[DIFF]], 1 +; CHECK-NEXT: ret i64 [[DOUBLED]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: ret i64 0 +; +entry: + %p_int = ptrtoint ptr %p to i64 + %q_int = ptrtoint ptr %q to i64 + %diff = sub i64 %q_int, %p_int + br i1 %c.0, label %then, label %else + +then: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ] + br i1 %c.1, label %then.2, label %else + +then.2: + %doubled = mul i64 %diff, 2 + ret i64 %doubled + +else: + ret i64 0 +} + +define i64 @test_deref_user_does_not_dominate_other_user(ptr %p, ptr %q, i1 %c.0) { +; CHECK-LABEL: define i64 @test_deref_user_does_not_dominate_other_user( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i1 [[C_0:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P_INT:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[Q_INT:%.*]] = ptrtoint ptr [[Q]] to i64 +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[Q_INT]], [[P_INT]] +; CHECK-NEXT: br i1 [[C_0]], label %[[MIDDLE:.*]], label %[[EXIT:.*]] +; CHECK: [[MIDDLE]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 [[DIFF]]) ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i64 [[DIFF]] +; +entry: + %p_int = ptrtoint ptr %p to i64 + %q_int = ptrtoint ptr %q to i64 + %diff = sub i64 %q_int, %p_int + br i1 %c.0, label %middle, label %exit + +middle: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 %diff) ] + br label %exit + +exit: + ret i64 %diff +} + declare void @llvm.assume(i1 noundef) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index 33e3e83..e914979 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -133,75 +133,65 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-LABEL: define noundef ptr @std_find_caller( ; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64 +; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64 +; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST3]] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[FIRST]], i64 [[PTR_SUB]]) ] ; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] ; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]] ; CHECK: [[LOOP_HEADER_I_PREHEADER]]: -; CHECK-NEXT: [[LAST2:%.*]] = ptrtoint ptr [[LAST]] to i64 -; CHECK-NEXT: [[FIRST3:%.*]] = ptrtoint ptr [[FIRST]] to i64 -; CHECK-NEXT: [[LAST_I64:%.*]] = ptrtoint ptr [[LAST]] to i64 -; CHECK-NEXT: [[FIRST1:%.*]] = ptrtoint ptr [[FIRST]] to i64 -; CHECK-NEXT: [[PTR_SUB:%.*]] = sub i64 [[LAST_I64]], [[FIRST1]] ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[PTR_SUB]] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST2]], -2 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST3]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP1]], 6 -; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP4]], 6 -; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[LOOP_HEADER_I_PROL_LOOPEXIT:.*]], label %[[LOOP_HEADER_I_PROL:.*]] -; CHECK: [[LOOP_HEADER_I_PROL]]: -; CHECK-NEXT: [[PTR_IV_I_PROL:%.*]] = phi ptr [ [[PTR_IV_NEXT_I_PROL:%.*]], %[[LOOP_LATCH_I_PROL:.*]] ], [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ] -; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_I_PROL]] ], [ 0, %[[LOOP_HEADER_I_PREHEADER]] ] -; CHECK-NEXT: [[L_I_PROL:%.*]] = load i16, ptr [[PTR_IV_I_PROL]], align 2 -; CHECK-NEXT: [[C_1_I_PROL:%.*]] = icmp eq i16 [[L_I_PROL]], 1 -; CHECK-NEXT: br i1 [[C_1_I_PROL]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I_PROL]] -; CHECK: [[LOOP_LATCH_I_PROL]]: -; CHECK-NEXT: [[PTR_IV_NEXT_I_PROL]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I_PROL]], i64 2 -; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 158 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_HEADER_I_PREHEADER2:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], -8 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[PROL_ITER_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <8 x i16> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD_FR]], splat (i16 1) +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i8 [[TMP5]], 0 ; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]] -; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[LOOP_HEADER_I_PROL_LOOPEXIT]], label %[[LOOP_HEADER_I_PROL]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[LOOP_HEADER_I_PROL_LOOPEXIT]]: -; CHECK-NEXT: [[PTR_IV_I_UNR:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[PTR_IV_NEXT_I_PROL]], %[[LOOP_LATCH_I_PROL]] ] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP1]], 6 -; CHECK-NEXT: br i1 [[TMP5]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I:.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[PROL_ITER_CMP_NOT]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_SPLIT]]: +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[XTRAITER]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP9]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I_PREHEADER2]] +; CHECK: [[LOOP_HEADER_I_PREHEADER2]]: +; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER_I:.*]] +; CHECK: [[VECTOR_EARLY_EXIT]]: +; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] +; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]] ; CHECK: [[LOOP_HEADER_I]]: -; CHECK-NEXT: [[PTR_IV_I:%.*]] = phi ptr [ [[PTR_IV_NEXT_I_3:%.*]], %[[LOOP_LATCH_I_3:.*]] ], [ [[PTR_IV_I_UNR]], %[[LOOP_HEADER_I_PROL_LOOPEXIT]] ] +; CHECK-NEXT: [[PTR_IV_I:%.*]] = phi ptr [ [[PTR_IV_NEXT_I:%.*]], %[[LOOP_LATCH_I:.*]] ], [ [[PTR_IV_I_PH]], %[[LOOP_HEADER_I_PREHEADER2]] ] ; CHECK-NEXT: [[L_I:%.*]] = load i16, ptr [[PTR_IV_I]], align 2 ; CHECK-NEXT: [[C_1_I:%.*]] = icmp eq i16 [[L_I]], 1 -; CHECK-NEXT: br i1 [[C_1_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I:.*]] +; CHECK-NEXT: br i1 [[C_1_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_LATCH_I]] ; CHECK: [[LOOP_LATCH_I]]: -; CHECK-NEXT: [[PTR_IV_NEXT_I:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2 -; CHECK-NEXT: [[L_I_1:%.*]] = load i16, ptr [[PTR_IV_NEXT_I]], align 2 -; CHECK-NEXT: [[C_1_I_1:%.*]] = icmp eq i16 [[L_I_1]], 1 -; CHECK-NEXT: br i1 [[C_1_I_1]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11:.*]], label %[[LOOP_LATCH_I_1:.*]] -; CHECK: [[LOOP_LATCH_I_1]]: -; CHECK-NEXT: [[PTR_IV_NEXT_I_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 4 -; CHECK-NEXT: [[L_I_2:%.*]] = load i16, ptr [[PTR_IV_NEXT_I_1]], align 2 -; CHECK-NEXT: [[C_1_I_2:%.*]] = icmp eq i16 [[L_I_2]], 1 -; CHECK-NEXT: br i1 [[C_1_I_2]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9:.*]], label %[[LOOP_LATCH_I_2:.*]] -; CHECK: [[LOOP_LATCH_I_2]]: -; CHECK-NEXT: [[PTR_IV_NEXT_I_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 6 -; CHECK-NEXT: [[L_I_3:%.*]] = load i16, ptr [[PTR_IV_NEXT_I_2]], align 2 -; CHECK-NEXT: [[C_1_I_3:%.*]] = icmp eq i16 [[L_I_3]], 1 -; CHECK-NEXT: br i1 [[C_1_I_3]], label %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7:.*]], label %[[LOOP_LATCH_I_3]] -; CHECK: [[LOOP_LATCH_I_3]]: -; CHECK-NEXT: [[PTR_IV_NEXT_I_3]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 8 -; CHECK-NEXT: [[EC_I_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT_I_3]], [[LAST]] -; CHECK-NEXT: br i1 [[EC_I_3]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I]] -; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7]]: -; CHECK-NEXT: [[PTR_IV_NEXT_I_2_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 6 -; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]] -; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9]]: -; CHECK-NEXT: [[PTR_IV_NEXT_I_1_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 4 -; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]] -; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11]]: -; CHECK-NEXT: [[PTR_IV_NEXT_I_LE:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2 -; CHECK-NEXT: br label %[[STD_FIND_GENERIC_IMPL_EXIT]] +; CHECK-NEXT: [[PTR_IV_NEXT_I]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_I]], i64 2 +; CHECK-NEXT: [[EC_I:%.*]] = icmp eq ptr [[PTR_IV_NEXT_I]], [[LAST]] +; CHECK-NEXT: br i1 [[EC_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[STD_FIND_GENERIC_IMPL_EXIT]]: -; CHECK-NEXT: [[RES_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[LOOP_HEADER_I_PROL_LOOPEXIT]] ], [ [[PTR_IV_NEXT_I_2_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT7]] ], [ [[PTR_IV_NEXT_I_1_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT9]] ], [ [[PTR_IV_NEXT_I_LE]], %[[STD_FIND_GENERIC_IMPL_EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT_SPLIT_LOOP_EXIT11]] ], [ [[SCEVGEP]], %[[LOOP_LATCH_I_3]] ], [ [[PTR_IV_I]], %[[LOOP_HEADER_I]] ], [ [[PTR_IV_I_PROL]], %[[LOOP_HEADER_I_PROL]] ] +; CHECK-NEXT: [[RES_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ], [ [[SCEVGEP]], %[[LOOP_LATCH_I]] ], [ [[PTR_IV_I]], %[[LOOP_HEADER_I]] ] ; CHECK-NEXT: ret ptr [[RES_I]] ; entry: @@ -241,6 +231,6 @@ declare void @llvm.assume(i1 noundef) ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} -; CHECK: [[META4]] = !{!"llvm.loop.unroll.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/tools/llc/save-stats.ll b/llvm/test/tools/llc/save-stats.ll new file mode 100644 index 0000000..acb0367 --- /dev/null +++ b/llvm/test/tools/llc/save-stats.ll @@ -0,0 +1,16 @@ +; REQUIRES: asserts +; REQUIRES: aarch64-registered-target + +; RUN: llc -mtriple=arm64-apple-macosx --save-stats=obj -o %t.s %s && cat %t.stats | FileCheck %s +; RUN: llc -mtriple=arm64-apple-macosx --save-stats=cwd -o %t.s %s && cat %{t:stem}.tmp.stats | FileCheck %s +; RUN: llc -mtriple=arm64-apple-macosx --save-stats -o %t.s %s && cat %{t:stem}.tmp.stats | FileCheck %s +; RUN: not llc -mtriple=arm64-apple-macosx --save-stats=invalid -o %t.s %s 2>&1 | FileCheck %s --check-prefix=INVALID_ARG + +; CHECK: { +; CHECK: "asm-printer.EmittedInsts": +; CHECK: } + +; INVALID_ARG: {{.*}}llc{{.*}}: for the --save-stats option: Cannot find option named 'invalid'! +define i32 @func() { + ret i32 0 +} |
