diff options
Diffstat (limited to 'llvm/test/CodeGen/LoongArch')
22 files changed, 7959 insertions, 452 deletions
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll index 9006b5c..5f4fccd 100644 --- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll @@ -9,9 +9,11 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information +; CHECK-NEXT: Runtime Library Function Analysis ; CHECK-NEXT: Target Pass Configuration ; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information +; CHECK-NEXT: Library Function Lowering Analysis ; CHECK-NEXT: Create Garbage Collector Module Metadata ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Profile summary info diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll new file mode 100644 index 0000000..36670fa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll @@ -0,0 +1,2239 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL + +declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly +declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly + +define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $zero +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $zero +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: bcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize { +; LA32-LABEL: bcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %bcmp +} + +define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: bcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, 0 + ret i1 %ret +} + +define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind optsize { +; CHECK-LABEL: memcmp_size_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: and $a1, $a1, $a2 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.h $a0, $a0, 0 +; LA64-UAL-NEXT: ld.h $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2h $a0, $a0 +; LA64-UAL-NEXT: revb.2h $a1, $a1 +; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: lu12i.w $a4, 15 +; LA32-UAL-NEXT: ori $a4, $a4, 3840 +; LA32-UAL-NEXT: and $a5, $a0, $a4 +; LA32-UAL-NEXT: or $a2, $a5, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: and $a2, $a1, $a4 +; LA32-UAL-NEXT: or $a2, $a2, $a3 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a2, $a1 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 16 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 16 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: srli.w $a4, $a2, 8 +; LA32-UAL-NEXT: lu12i.w $a5, 15 +; LA32-UAL-NEXT: ori $a5, $a5, 3840 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a2, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a6, $a2, $a5 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a2, $a2, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a6 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: srli.w $a4, $a3, 8 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a5, $a3, $a5 +; LA32-UAL-NEXT: slli.w $a5, $a5, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: bne $a2, $a3, .LBB26_2 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB26_2: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a2, $a3 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a6, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a7, $a3, $a6 +; LA32-UAL-NEXT: slli.w $a7, $a7, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a7 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a6, $a4, $a6 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a3, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a3, $a0, $a2 +; LA32-UAL-NEXT: and $a4, $a1, $a2 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB27_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB28_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a2, $a2 +; LA64-UAL-NEXT: addi.w $a4, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a3 +; LA64-UAL-NEXT: addi.w $a5, $a3, 0 +; LA64-UAL-NEXT: bne $a4, $a5, .LBB28_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: revb.2w $a2, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a3, 0 +; LA64-UAL-NEXT: bne $a0, $a1, .LBB28_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB28_3: # %res_block +; LA64-UAL-NEXT: addi.w $a0, $a3, 0 +; LA64-UAL-NEXT: addi.w $a1, $a2, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB29_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB30_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB30_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB31_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB31_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 31 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB32_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 32 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB33_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_63: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 63 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 64 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind optsize { +; LA32-LABEL: memcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize { +; LA32-LABEL: memcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %memcmp +} + +define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: ld.bu $a2, $a1, 1 +; LA32-NUAL-NEXT: ld.bu $a3, $a1, 0 +; LA32-NUAL-NEXT: ld.bu $a4, $a1, 2 +; LA32-NUAL-NEXT: ld.bu $a1, $a1, 3 +; LA32-NUAL-NEXT: slli.w $a2, $a2, 8 +; LA32-NUAL-NEXT: or $a2, $a2, $a3 +; LA32-NUAL-NEXT: slli.w $a3, $a4, 16 +; LA32-NUAL-NEXT: slli.w $a1, $a1, 24 +; LA32-NUAL-NEXT: or $a1, $a1, $a3 +; LA32-NUAL-NEXT: or $a1, $a1, $a2 +; LA32-NUAL-NEXT: ld.bu $a2, $a0, 1 +; LA32-NUAL-NEXT: ld.bu $a3, $a0, 0 +; LA32-NUAL-NEXT: ld.bu $a4, $a0, 2 +; LA32-NUAL-NEXT: ld.bu $a0, $a0, 3 +; LA32-NUAL-NEXT: slli.w $a2, $a2, 8 +; LA32-NUAL-NEXT: or $a2, $a2, $a3 +; LA32-NUAL-NEXT: slli.w $a3, $a4, 16 +; LA32-NUAL-NEXT: slli.w $a0, $a0, 24 +; LA32-NUAL-NEXT: or $a0, $a0, $a3 +; LA32-NUAL-NEXT: or $a0, $a0, $a2 +; LA32-NUAL-NEXT: xor $a0, $a0, $a1 +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: ld.bu $a2, $a1, 1 +; LA64-NUAL-NEXT: ld.bu $a3, $a1, 0 +; LA64-NUAL-NEXT: ld.bu $a4, $a1, 2 +; LA64-NUAL-NEXT: ld.b $a1, $a1, 3 +; LA64-NUAL-NEXT: slli.d $a2, $a2, 8 +; LA64-NUAL-NEXT: or $a2, $a2, $a3 +; LA64-NUAL-NEXT: slli.d $a3, $a4, 16 +; LA64-NUAL-NEXT: slli.d $a1, $a1, 24 +; LA64-NUAL-NEXT: or $a1, $a1, $a3 +; LA64-NUAL-NEXT: or $a1, $a1, $a2 +; LA64-NUAL-NEXT: ld.bu $a2, $a0, 1 +; LA64-NUAL-NEXT: ld.bu $a3, $a0, 0 +; LA64-NUAL-NEXT: ld.bu $a4, $a0, 2 +; LA64-NUAL-NEXT: ld.b $a0, $a0, 3 +; LA64-NUAL-NEXT: slli.d $a2, $a2, 8 +; LA64-NUAL-NEXT: or $a2, $a2, $a3 +; LA64-NUAL-NEXT: slli.d $a3, $a4, 16 +; LA64-NUAL-NEXT: slli.d $a0, $a0, 24 +; LA64-NUAL-NEXT: or $a0, $a0, $a3 +; LA64-NUAL-NEXT: or $a0, $a0, $a2 +; LA64-NUAL-NEXT: xor $a0, $a0, $a1 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize { +; LA32-UAL-LABEL: memcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, 0 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll new file mode 100644 index 0000000..c1bf850 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll @@ -0,0 +1,3106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL +; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL +; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \ +; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL + +declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly +declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly + +define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $a2, $zero +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $a2, $zero +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 2 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 2 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_31: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $t0, $a0, 12 +; LA32-UAL-NEXT: ld.w $t1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a5, $t0, $t1 +; LA32-UAL-NEXT: ld.w $a6, $a0, 16 +; LA32-UAL-NEXT: ld.w $a7, $a1, 16 +; LA32-UAL-NEXT: ld.w $t0, $a0, 20 +; LA32-UAL-NEXT: ld.w $t1, $a1, 20 +; LA32-UAL-NEXT: ld.w $t2, $a0, 24 +; LA32-UAL-NEXT: ld.w $t3, $a1, 24 +; LA32-UAL-NEXT: ld.w $a0, $a0, 27 +; LA32-UAL-NEXT: ld.w $a1, $a1, 27 +; LA32-UAL-NEXT: xor $a6, $a6, $a7 +; LA32-UAL-NEXT: xor $a7, $t0, $t1 +; LA32-UAL-NEXT: xor $t0, $t2, $t3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a2, $a4, $a5 +; LA32-UAL-NEXT: or $a3, $a6, $a7 +; LA32-UAL-NEXT: or $a0, $t0, $a0 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a0, $a3, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_31: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 31 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_size_32: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $t0, $a0, 12 +; LA32-UAL-NEXT: ld.w $t1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a5, $t0, $t1 +; LA32-UAL-NEXT: ld.w $a6, $a0, 16 +; LA32-UAL-NEXT: ld.w $a7, $a1, 16 +; LA32-UAL-NEXT: ld.w $t0, $a0, 20 +; LA32-UAL-NEXT: ld.w $t1, $a1, 20 +; LA32-UAL-NEXT: ld.w $t2, $a0, 24 +; LA32-UAL-NEXT: ld.w $t3, $a1, 24 +; LA32-UAL-NEXT: ld.w $a0, $a0, 28 +; LA32-UAL-NEXT: ld.w $a1, $a1, 28 +; LA32-UAL-NEXT: xor $a6, $a6, $a7 +; LA32-UAL-NEXT: xor $a7, $t0, $t1 +; LA32-UAL-NEXT: xor $t0, $t2, $t3 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a2, $a4, $a5 +; LA32-UAL-NEXT: or $a3, $a6, $a7 +; LA32-UAL-NEXT: or $a0, $t0, $a0 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a0, $a3, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a0, $a4, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_size_32: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 32 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_63: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $t0, $a0, 24 +; LA64-UAL-NEXT: ld.d $t1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a5, $t0, $t1 +; LA64-UAL-NEXT: ld.d $a6, $a0, 32 +; LA64-UAL-NEXT: ld.d $a7, $a1, 32 +; LA64-UAL-NEXT: ld.d $t0, $a0, 40 +; LA64-UAL-NEXT: ld.d $t1, $a1, 40 +; LA64-UAL-NEXT: ld.d $t2, $a0, 48 +; LA64-UAL-NEXT: ld.d $t3, $a1, 48 +; LA64-UAL-NEXT: ld.d $a0, $a0, 55 +; LA64-UAL-NEXT: ld.d $a1, $a1, 55 +; LA64-UAL-NEXT: xor $a6, $a6, $a7 +; LA64-UAL-NEXT: xor $a7, $t0, $t1 +; LA64-UAL-NEXT: xor $t0, $t2, $t3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a2, $a4, $a5 +; LA64-UAL-NEXT: or $a3, $a6, $a7 +; LA64-UAL-NEXT: or $a0, $t0, $a0 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: or $a0, $a3, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_63: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 63 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_size_64: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a4, $a0, 8 +; LA64-UAL-NEXT: ld.d $a5, $a1, 8 +; LA64-UAL-NEXT: ld.d $a6, $a0, 16 +; LA64-UAL-NEXT: ld.d $a7, $a1, 16 +; LA64-UAL-NEXT: ld.d $t0, $a0, 24 +; LA64-UAL-NEXT: ld.d $t1, $a1, 24 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a3, $a4, $a5 +; LA64-UAL-NEXT: xor $a4, $a6, $a7 +; LA64-UAL-NEXT: xor $a5, $t0, $t1 +; LA64-UAL-NEXT: ld.d $a6, $a0, 32 +; LA64-UAL-NEXT: ld.d $a7, $a1, 32 +; LA64-UAL-NEXT: ld.d $t0, $a0, 40 +; LA64-UAL-NEXT: ld.d $t1, $a1, 40 +; LA64-UAL-NEXT: ld.d $t2, $a0, 48 +; LA64-UAL-NEXT: ld.d $t3, $a1, 48 +; LA64-UAL-NEXT: ld.d $a0, $a0, 56 +; LA64-UAL-NEXT: ld.d $a1, $a1, 56 +; LA64-UAL-NEXT: xor $a6, $a6, $a7 +; LA64-UAL-NEXT: xor $a7, $t0, $t1 +; LA64-UAL-NEXT: xor $t0, $t2, $t3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a1, $a2, $a3 +; LA64-UAL-NEXT: or $a2, $a4, $a5 +; LA64-UAL-NEXT: or $a3, $a6, $a7 +; LA64-UAL-NEXT: or $a0, $t0, $a0 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: or $a0, $a3, $a0 +; LA64-UAL-NEXT: or $a0, $a1, $a0 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_size_64: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 64 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: bcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %bcmp +} + +define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind { +; LA32-LABEL: bcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl bcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: bcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %bcmp +} + +define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16) + %ret = icmp eq i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, 0 + ret i1 %ret +} + +define i1 @bcmp_le_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_le_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $zero, $a0 +; LA32-UAL-NEXT: slti $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_le_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $zero, $a0 +; LA64-UAL-NEXT: slti $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_le_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: slti $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_le_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %bcmp, 1 + ret i1 %ret +} + +define i1 @bcmp_ge_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: bcmp_ge_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ori $a0, $zero, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: bcmp_ge_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ori $a0, $zero, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: bcmp_ge_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl bcmp +; LA32-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA32-NUAL-NEXT: slt $a0, $a1, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: bcmp_ge_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA64-NUAL-NEXT: slt $a0, $a1, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %bcmp, -1 + ret i1 %ret +} + +define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind { +; CHECK-LABEL: memcmp_size_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_1: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_1: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a1, $a1, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_1: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 1 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_1: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 1 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_2: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: and $a1, $a1, $a2 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_2: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.h $a0, $a0, 0 +; LA64-UAL-NEXT: ld.h $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2h $a0, $a0 +; LA64-UAL-NEXT: revb.2h $a1, $a1 +; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_2: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 2 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_2: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 2 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_3: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA32-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA32-UAL-NEXT: lu12i.w $a4, 15 +; LA32-UAL-NEXT: ori $a4, $a4, 3840 +; LA32-UAL-NEXT: and $a5, $a0, $a4 +; LA32-UAL-NEXT: or $a2, $a5, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a2, $a0 +; LA32-UAL-NEXT: and $a2, $a1, $a4 +; LA32-UAL-NEXT: or $a2, $a2, $a3 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a2, $a1 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_3: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 2 +; LA64-UAL-NEXT: ld.hu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 2 +; LA64-UAL-NEXT: ld.hu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 16 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 16 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_3: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 3 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_3: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 3 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_4: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a2, $a0, $a1 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a2 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_4: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_4: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_4: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_5: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: srli.w $a4, $a2, 8 +; LA32-UAL-NEXT: lu12i.w $a5, 15 +; LA32-UAL-NEXT: ori $a5, $a5, 3840 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a2, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a6, $a2, $a5 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a2, $a2, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a6 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: srli.w $a4, $a3, 8 +; LA32-UAL-NEXT: and $a4, $a4, $a5 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: and $a5, $a3, $a5 +; LA32-UAL-NEXT: slli.w $a5, $a5, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: bne $a2, $a3, .LBB28_2 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.bu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.bu $a1, $a1, 4 +; LA32-UAL-NEXT: sub.w $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB28_2: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a2, $a3 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_5: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.bu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.bu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_5: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 5 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_5: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 5 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_6: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a6, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a7, $a3, $a6 +; LA32-UAL-NEXT: slli.w $a7, $a7, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a7 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a6 +; LA32-UAL-NEXT: srli.w $a7, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a7 +; LA32-UAL-NEXT: and $a6, $a4, $a6 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.hu $a0, $a0, 4 +; LA32-UAL-NEXT: ld.hu $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 8 +; LA32-UAL-NEXT: or $a0, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a3, $a1, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 8 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: ori $a2, $a2, 4095 +; LA32-UAL-NEXT: and $a3, $a0, $a2 +; LA32-UAL-NEXT: and $a4, $a1, $a2 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB29_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_6: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.hu $a2, $a0, 4 +; LA64-UAL-NEXT: ld.wu $a0, $a0, 0 +; LA64-UAL-NEXT: ld.hu $a3, $a1, 4 +; LA64-UAL-NEXT: ld.wu $a1, $a1, 0 +; LA64-UAL-NEXT: slli.d $a2, $a2, 32 +; LA64-UAL-NEXT: or $a0, $a0, $a2 +; LA64-UAL-NEXT: slli.d $a2, $a3, 32 +; LA64-UAL-NEXT: or $a1, $a1, $a2 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_6: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 6 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_6: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 6 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_7: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 3 +; LA32-UAL-NEXT: ld.w $a1, $a1, 3 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB30_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_7: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a2, $a0, 0 +; LA64-UAL-NEXT: ld.w $a3, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a2, $a2 +; LA64-UAL-NEXT: addi.w $a4, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a3 +; LA64-UAL-NEXT: addi.w $a5, $a3, 0 +; LA64-UAL-NEXT: bne $a4, $a5, .LBB30_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.w $a0, $a0, 3 +; LA64-UAL-NEXT: ld.w $a1, $a1, 3 +; LA64-UAL-NEXT: revb.2w $a2, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a2, 0 +; LA64-UAL-NEXT: revb.2w $a3, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a3, 0 +; LA64-UAL-NEXT: bne $a0, $a1, .LBB30_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB30_3: # %res_block +; LA64-UAL-NEXT: addi.w $a0, $a3, 0 +; LA64-UAL-NEXT: addi.w $a1, $a2, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_7: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 7 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_7: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 7 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_8: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a0, $a0, 4 +; LA32-UAL-NEXT: ld.w $a1, $a1, 4 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3 +; LA32-UAL-NEXT: # %bb.2: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB31_3: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_8: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a0, $a0, 0 +; LA64-UAL-NEXT: ld.d $a1, $a1, 0 +; LA64-UAL-NEXT: revb.d $a0, $a0 +; LA64-UAL-NEXT: revb.d $a1, $a1 +; LA64-UAL-NEXT: sltu $a2, $a0, $a1 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: sub.d $a0, $a0, $a2 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_8: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 8 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_8: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 8 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_15: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 11 +; LA32-UAL-NEXT: ld.w $a1, $a1, 11 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB32_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_15: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 7 +; LA64-UAL-NEXT: ld.d $a1, $a1, 7 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB32_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_15: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 15 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_15: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 15 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_16: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5 +; LA32-UAL-NEXT: # %bb.4: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB33_5: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_16: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3 +; LA64-UAL-NEXT: # %bb.2: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB33_3: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_16: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_16: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_31: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a3, $a0, 12 +; LA32-UAL-NEXT: ld.w $a4, $a1, 12 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.4: # %loadbb4 +; LA32-UAL-NEXT: ld.w $a3, $a0, 16 +; LA32-UAL-NEXT: ld.w $a4, $a1, 16 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.5: # %loadbb5 +; LA32-UAL-NEXT: ld.w $a3, $a0, 20 +; LA32-UAL-NEXT: ld.w $a4, $a1, 20 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.6: # %loadbb6 +; LA32-UAL-NEXT: ld.w $a3, $a0, 24 +; LA32-UAL-NEXT: ld.w $a4, $a1, 24 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.7: # %loadbb7 +; LA32-UAL-NEXT: ld.w $a0, $a0, 27 +; LA32-UAL-NEXT: ld.w $a1, $a1, 27 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9 +; LA32-UAL-NEXT: # %bb.8: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB34_9: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_31: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 23 +; LA64-UAL-NEXT: ld.d $a1, $a1, 23 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB34_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_31: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 31 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_31: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 31 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_size_32: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a3, $a0, 0 +; LA32-UAL-NEXT: ld.w $a4, $a1, 0 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: lu12i.w $a2, 15 +; LA32-UAL-NEXT: ori $a2, $a2, 3840 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.1: # %loadbb1 +; LA32-UAL-NEXT: ld.w $a3, $a0, 4 +; LA32-UAL-NEXT: ld.w $a4, $a1, 4 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.2: # %loadbb2 +; LA32-UAL-NEXT: ld.w $a3, $a0, 8 +; LA32-UAL-NEXT: ld.w $a4, $a1, 8 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.3: # %loadbb3 +; LA32-UAL-NEXT: ld.w $a3, $a0, 12 +; LA32-UAL-NEXT: ld.w $a4, $a1, 12 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.4: # %loadbb4 +; LA32-UAL-NEXT: ld.w $a3, $a0, 16 +; LA32-UAL-NEXT: ld.w $a4, $a1, 16 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.5: # %loadbb5 +; LA32-UAL-NEXT: ld.w $a3, $a0, 20 +; LA32-UAL-NEXT: ld.w $a4, $a1, 20 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.6: # %loadbb6 +; LA32-UAL-NEXT: ld.w $a3, $a0, 24 +; LA32-UAL-NEXT: ld.w $a4, $a1, 24 +; LA32-UAL-NEXT: srli.w $a5, $a3, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a3, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a3, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a3, $a3, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a6 +; LA32-UAL-NEXT: or $a3, $a3, $a5 +; LA32-UAL-NEXT: srli.w $a5, $a4, 8 +; LA32-UAL-NEXT: and $a5, $a5, $a2 +; LA32-UAL-NEXT: srli.w $a6, $a4, 24 +; LA32-UAL-NEXT: or $a5, $a5, $a6 +; LA32-UAL-NEXT: and $a6, $a4, $a2 +; LA32-UAL-NEXT: slli.w $a6, $a6, 8 +; LA32-UAL-NEXT: slli.w $a4, $a4, 24 +; LA32-UAL-NEXT: or $a4, $a4, $a6 +; LA32-UAL-NEXT: or $a4, $a4, $a5 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.7: # %loadbb7 +; LA32-UAL-NEXT: ld.w $a0, $a0, 28 +; LA32-UAL-NEXT: ld.w $a1, $a1, 28 +; LA32-UAL-NEXT: srli.w $a3, $a0, 8 +; LA32-UAL-NEXT: and $a3, $a3, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a3, $a3, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a2 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a3, $a0, $a3 +; LA32-UAL-NEXT: srli.w $a0, $a1, 8 +; LA32-UAL-NEXT: and $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: and $a2, $a1, $a2 +; LA32-UAL-NEXT: slli.w $a2, $a2, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: or $a4, $a1, $a0 +; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9 +; LA32-UAL-NEXT: # %bb.8: +; LA32-UAL-NEXT: move $a0, $zero +; LA32-UAL-NEXT: ret +; LA32-UAL-NEXT: .LBB35_9: # %res_block +; LA32-UAL-NEXT: sltu $a0, $a3, $a4 +; LA32-UAL-NEXT: sub.w $a0, $zero, $a0 +; LA32-UAL-NEXT: ori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_32: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a0, $a0, 24 +; LA64-UAL-NEXT: ld.d $a1, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5 +; LA64-UAL-NEXT: # %bb.4: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB35_5: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_size_32: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 32 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_32: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 32 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_63: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 63 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_63: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a2, $a0, 24 +; LA64-UAL-NEXT: ld.d $a3, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.4: # %loadbb4 +; LA64-UAL-NEXT: ld.d $a2, $a0, 32 +; LA64-UAL-NEXT: ld.d $a3, $a1, 32 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.5: # %loadbb5 +; LA64-UAL-NEXT: ld.d $a2, $a0, 40 +; LA64-UAL-NEXT: ld.d $a3, $a1, 40 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.6: # %loadbb6 +; LA64-UAL-NEXT: ld.d $a2, $a0, 48 +; LA64-UAL-NEXT: ld.d $a3, $a1, 48 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.7: # %loadbb7 +; LA64-UAL-NEXT: ld.d $a0, $a0, 55 +; LA64-UAL-NEXT: ld.d $a1, $a1, 55 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9 +; LA64-UAL-NEXT: # %bb.8: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB36_9: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_63: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 63 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 64 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_size_64: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.1: # %loadbb1 +; LA64-UAL-NEXT: ld.d $a2, $a0, 8 +; LA64-UAL-NEXT: ld.d $a3, $a1, 8 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.2: # %loadbb2 +; LA64-UAL-NEXT: ld.d $a2, $a0, 16 +; LA64-UAL-NEXT: ld.d $a3, $a1, 16 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.3: # %loadbb3 +; LA64-UAL-NEXT: ld.d $a2, $a0, 24 +; LA64-UAL-NEXT: ld.d $a3, $a1, 24 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.4: # %loadbb4 +; LA64-UAL-NEXT: ld.d $a2, $a0, 32 +; LA64-UAL-NEXT: ld.d $a3, $a1, 32 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.5: # %loadbb5 +; LA64-UAL-NEXT: ld.d $a2, $a0, 40 +; LA64-UAL-NEXT: ld.d $a3, $a1, 40 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.6: # %loadbb6 +; LA64-UAL-NEXT: ld.d $a2, $a0, 48 +; LA64-UAL-NEXT: ld.d $a3, $a1, 48 +; LA64-UAL-NEXT: revb.d $a2, $a2 +; LA64-UAL-NEXT: revb.d $a3, $a3 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.7: # %loadbb7 +; LA64-UAL-NEXT: ld.d $a0, $a0, 56 +; LA64-UAL-NEXT: ld.d $a1, $a1, 56 +; LA64-UAL-NEXT: revb.d $a2, $a0 +; LA64-UAL-NEXT: revb.d $a3, $a1 +; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9 +; LA64-UAL-NEXT: # %bb.8: +; LA64-UAL-NEXT: move $a0, $zero +; LA64-UAL-NEXT: ret +; LA64-UAL-NEXT: .LBB37_9: # %res_block +; LA64-UAL-NEXT: sltu $a0, $a2, $a3 +; LA64-UAL-NEXT: sub.d $a0, $zero, $a0 +; LA64-UAL-NEXT: ori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_size_64: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 64 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_127: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 127 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_127: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 127 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind { +; LA32-LABEL: memcmp_size_128: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ori $a2, $zero, 128 +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_128: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ori $a2, $zero, 128 +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128) + ret i32 %memcmp +} + +define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind { +; LA32-LABEL: memcmp_size_runtime: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl memcmp +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: memcmp_size_runtime: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len) + ret i32 %memcmp +} + +define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_eq_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a2, $a0, 0 +; LA32-UAL-NEXT: ld.w $a3, $a1, 0 +; LA32-UAL-NEXT: ld.w $a4, $a0, 4 +; LA32-UAL-NEXT: ld.w $a5, $a1, 4 +; LA32-UAL-NEXT: ld.w $a6, $a0, 8 +; LA32-UAL-NEXT: ld.w $a7, $a1, 8 +; LA32-UAL-NEXT: ld.w $a0, $a0, 12 +; LA32-UAL-NEXT: ld.w $a1, $a1, 12 +; LA32-UAL-NEXT: xor $a2, $a2, $a3 +; LA32-UAL-NEXT: xor $a3, $a4, $a5 +; LA32-UAL-NEXT: xor $a4, $a6, $a7 +; LA32-UAL-NEXT: xor $a0, $a0, $a1 +; LA32-UAL-NEXT: or $a1, $a2, $a3 +; LA32-UAL-NEXT: or $a0, $a4, $a0 +; LA32-UAL-NEXT: or $a0, $a1, $a0 +; LA32-UAL-NEXT: sltui $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_eq_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.d $a2, $a0, 0 +; LA64-UAL-NEXT: ld.d $a3, $a1, 0 +; LA64-UAL-NEXT: ld.d $a0, $a0, 8 +; LA64-UAL-NEXT: ld.d $a1, $a1, 8 +; LA64-UAL-NEXT: xor $a2, $a2, $a3 +; LA64-UAL-NEXT: xor $a0, $a0, $a1 +; LA64-UAL-NEXT: or $a0, $a2, $a0 +; LA64-UAL-NEXT: sltui $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_eq_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 16 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: sltui $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_eq_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 16 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: sltui $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16) + %ret = icmp eq i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_lt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_lt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_lt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: srli.w $a0, $a0, 31 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_lt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_gt_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_gt_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_gt_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slt $a0, $zero, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_gt_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slt $a0, $zero, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, 0 + ret i1 %ret +} + +define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_le_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a1, $a0 +; LA32-UAL-NEXT: xori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_le_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a1, $a0 +; LA64-UAL-NEXT: xori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_le_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: slti $a0, $a0, 1 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_le_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: slti $a0, $a0, 1 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp slt i32 %memcmp, 1 + ret i1 %ret +} + +define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind { +; LA32-UAL-LABEL: memcmp_ge_zero: +; LA32-UAL: # %bb.0: # %entry +; LA32-UAL-NEXT: ld.w $a0, $a0, 0 +; LA32-UAL-NEXT: ld.w $a1, $a1, 0 +; LA32-UAL-NEXT: srli.w $a2, $a0, 8 +; LA32-UAL-NEXT: lu12i.w $a3, 15 +; LA32-UAL-NEXT: ori $a3, $a3, 3840 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a0, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a4, $a0, $a3 +; LA32-UAL-NEXT: slli.w $a4, $a4, 8 +; LA32-UAL-NEXT: slli.w $a0, $a0, 24 +; LA32-UAL-NEXT: or $a0, $a0, $a4 +; LA32-UAL-NEXT: or $a0, $a0, $a2 +; LA32-UAL-NEXT: srli.w $a2, $a1, 8 +; LA32-UAL-NEXT: and $a2, $a2, $a3 +; LA32-UAL-NEXT: srli.w $a4, $a1, 24 +; LA32-UAL-NEXT: or $a2, $a2, $a4 +; LA32-UAL-NEXT: and $a3, $a1, $a3 +; LA32-UAL-NEXT: slli.w $a3, $a3, 8 +; LA32-UAL-NEXT: slli.w $a1, $a1, 24 +; LA32-UAL-NEXT: or $a1, $a1, $a3 +; LA32-UAL-NEXT: or $a1, $a1, $a2 +; LA32-UAL-NEXT: sltu $a0, $a0, $a1 +; LA32-UAL-NEXT: xori $a0, $a0, 1 +; LA32-UAL-NEXT: ret +; +; LA64-UAL-LABEL: memcmp_ge_zero: +; LA64-UAL: # %bb.0: # %entry +; LA64-UAL-NEXT: ld.w $a0, $a0, 0 +; LA64-UAL-NEXT: ld.w $a1, $a1, 0 +; LA64-UAL-NEXT: revb.2w $a0, $a0 +; LA64-UAL-NEXT: addi.w $a0, $a0, 0 +; LA64-UAL-NEXT: revb.2w $a1, $a1 +; LA64-UAL-NEXT: addi.w $a1, $a1, 0 +; LA64-UAL-NEXT: sltu $a0, $a0, $a1 +; LA64-UAL-NEXT: xori $a0, $a0, 1 +; LA64-UAL-NEXT: ret +; +; LA32-NUAL-LABEL: memcmp_ge_zero: +; LA32-NUAL: # %bb.0: # %entry +; LA32-NUAL-NEXT: addi.w $sp, $sp, -16 +; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NUAL-NEXT: ori $a2, $zero, 4 +; LA32-NUAL-NEXT: bl memcmp +; LA32-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA32-NUAL-NEXT: slt $a0, $a1, $a0 +; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NUAL-NEXT: addi.w $sp, $sp, 16 +; LA32-NUAL-NEXT: ret +; +; LA64-NUAL-LABEL: memcmp_ge_zero: +; LA64-NUAL: # %bb.0: # %entry +; LA64-NUAL-NEXT: addi.d $sp, $sp, -16 +; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NUAL-NEXT: ori $a2, $zero, 4 +; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp) +; LA64-NUAL-NEXT: jirl $ra, $ra, 0 +; LA64-NUAL-NEXT: addi.w $a1, $zero, -1 +; LA64-NUAL-NEXT: slt $a0, $a1, $a0 +; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NUAL-NEXT: addi.d $sp, $sp, 16 +; LA64-NUAL-NEXT: ret +entry: + %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4) + %ret = icmp sgt i32 %memcmp, -1 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/LoongArch/issue163681.ll b/llvm/test/CodeGen/LoongArch/issue163681.ll new file mode 100644 index 0000000..f6df349 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/issue163681.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch64 -code-model=large --verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +@.str = external constant [1 x i8] + +define void @caller(ptr %0) { +; CHECK-LABEL: caller: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: ld.w $a2, $zero, 0 +; CHECK-NEXT: ld.d $a1, $a0, 0 +; CHECK-NEXT: beqz $a2, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(.str) +; CHECK-NEXT: addi.d $a2, $zero, %got_pc_lo12(.str) +; CHECK-NEXT: lu32i.d $a2, %got64_pc_lo20(.str) +; CHECK-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(.str) +; CHECK-NEXT: ldx.d $a2, $a2, $a0 +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: jirl $ra, $zero, 0 +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(.str) +; CHECK-NEXT: addi.d $a2, $zero, %got_pc_lo12(.str) +; CHECK-NEXT: lu32i.d $a2, %got64_pc_lo20(.str) +; CHECK-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(.str) +; CHECK-NEXT: ldx.d $a2, $a2, $a0 +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: move $a3, $zero +; CHECK-NEXT: jirl $ra, $zero, 0 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: st.d $zero, $zero, 0 +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %2 = load i32, ptr null, align 4 + %3 = icmp eq i32 %2, 0 + %4 = load i64, ptr %0, align 8 + br i1 %3, label %6, label %5 + +5: ; preds = %1 + call void null(ptr null, i64 %4, ptr @.str) + br label %7 + +6: ; preds = %1 + tail call void null(ptr null, i64 %4, ptr @.str, i32 0) + br label %7 + +7: ; preds = %6, %5 + store ptr null, ptr null, align 8 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll index 6754959..5ed49d9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @and_not_combine_v32i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ; CHECK-LABEL: and_not_combine_v32i8: @@ -85,3 +85,397 @@ entry: store <4 x i64> %and, ptr %res ret void } + +define void @pre_not_and_not_combine_v32i8(ptr %res, ptr %a, i8 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a + %b.not = xor i8 %b, -1 + %b.not.ele = insertelement <32 x i8> poison, i8 %b.not, i64 0 + %v1.not = shufflevector <32 x i8> %b.not.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v0.not = xor <32 x i8> %v0, splat (i8 -1) + %and = and <32 x i8> %v0.not, %v1.not + store <32 x i8> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v32i8(ptr %res, ptr %a, i8 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a + %b.ele = insertelement <32 x i8> poison, i8 %b, i64 0 + %v1 = shufflevector <32 x i8> %b.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v0.not = xor <32 x i8> %v0, splat (i8 -1) + %v1.not = xor <32 x i8> %v1, splat (i8 -1) + %and = and <32 x i8> %v0.not, %v1.not + store <32 x i8> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v16i16(ptr %res, ptr %a, i16 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a + %b.not = xor i16 %b, -1 + %b.not.ele = insertelement <16 x i16> poison, i16 %b.not, i64 0 + %v1.not = shufflevector <16 x i16> %b.not.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v0.not = xor <16 x i16> %v0, splat (i16 -1) + %and = and <16 x i16> %v0.not, %v1.not + store <16 x i16> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v16i16(ptr %res, ptr %a, i16 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a + %b.ele = insertelement <16 x i16> poison, i16 %b, i64 0 + %v1 = shufflevector <16 x i16> %b.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v0.not = xor <16 x i16> %v0, splat (i16 -1) + %v1.not = xor <16 x i16> %v1, splat (i16 -1) + %and = and <16 x i16> %v0.not, %v1.not + store <16 x i16> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v8i32(ptr %res, ptr %a, i32 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a + %b.not = xor i32 %b, -1 + %b.not.ele = insertelement <8 x i32> poison, i32 %b.not, i64 0 + %v1.not = shufflevector <8 x i32> %b.not.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v0.not = xor <8 x i32> %v0, splat (i32 -1) + %and = and <8 x i32> %v0.not, %v1.not + store <8 x i32> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v8i32(ptr %res, ptr %a, i32 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a + %b.ele = insertelement <8 x i32> poison, i32 %b, i64 0 + %v1 = shufflevector <8 x i32> %b.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v0.not = xor <8 x i32> %v0, splat (i32 -1) + %v1.not = xor <8 x i32> %v1, splat (i32 -1) + %and = and <8 x i32> %v0.not, %v1.not + store <8 x i32> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind { +; LA32-LABEL: pre_not_and_not_combine_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: nor $a1, $a3, $zero +; LA32-NEXT: nor $a2, $a2, $zero +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1 +; LA32-NEXT: xvreplve0.d $xr1, $xr1 +; LA32-NEXT: xvandn.v $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: pre_not_and_not_combine_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvnor.v $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %a + %b.not = xor i64 %b, -1 + %b.not.ele = insertelement <4 x i64> poison, i64 %b.not, i64 0 + %v1.not = shufflevector <4 x i64> %b.not.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v0.not = xor <4 x i64> %v0, splat (i64 -1) + %and = and <4 x i64> %v0.not, %v1.not + store <4 x i64> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind { +; LA32-LABEL: post_not_and_not_combine_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 +; LA32-NEXT: xvreplve0.d $xr1, $xr1 +; LA32-NEXT: xvnor.v $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: post_not_and_not_combine_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvnor.v $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %a + %b.ele = insertelement <4 x i64> poison, i64 %b, i64 0 + %v1 = shufflevector <4 x i64> %b.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v0.not = xor <4 x i64> %v0, splat (i64 -1) + %v1.not = xor <4 x i64> %v1, splat (i64 -1) + %and = and <4 x i64> %v0.not, %v1.not + store <4 x i64> %and, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvnori.b $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %and = and <32 x i8> %v0, splat (i8 -4) + %xor = xor <32 x i8> %and, splat (i8 -4) + store <32 x i8> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, -4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %and = and <16 x i16> %v0, splat (i16 -4) + %xor = xor <16 x i16> %and, splat (i16 -4) + store <16 x i16> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, -4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %and = and <8 x i32> %v0, splat (i32 -4) + %xor = xor <8 x i32> %and, splat (i32 -4) + store <8 x i32> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, -4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %and = and <4 x i64> %v0, splat (i64 -4) + %xor = xor <4 x i64> %and, splat (i64 -4) + store <4 x i64> %xor, ptr %res + ret void +} + +define void @and_or_not_combine_v32i8(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvld $xr2, $a1, 0 +; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvseq.b $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvnori.b $xr0, $xr0, 251 +; CHECK-NEXT: xvst $xr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <32 x i8>, ptr %pa + %b = load <32 x i8>, ptr %pb + %v = load <32 x i8>, ptr %pv + %ca = icmp ne <32 x i8> %v, %a + %cb = icmp ne <32 x i8> %v, %b + %or = or <32 x i1> %ca, %cb + %ext = sext <32 x i1> %or to <32 x i8> + %and = and <32 x i8> %ext, splat (i8 4) + store <32 x i8> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v16i16(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvld $xr2, $a1, 0 +; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvseq.h $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrepli.h $xr1, 4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <16 x i16>, ptr %pa + %b = load <16 x i16>, ptr %pb + %v = load <16 x i16>, ptr %pv + %ca = icmp ne <16 x i16> %v, %a + %cb = icmp ne <16 x i16> %v, %b + %or = or <16 x i1> %ca, %cb + %ext = sext <16 x i1> %or to <16 x i16> + %and = and <16 x i16> %ext, splat (i16 4) + store <16 x i16> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v8i32(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvld $xr2, $a1, 0 +; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvseq.w $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrepli.w $xr1, 4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <8 x i32>, ptr %pa + %b = load <8 x i32>, ptr %pb + %v = load <8 x i32>, ptr %pv + %ca = icmp ne <8 x i32> %v, %a + %cb = icmp ne <8 x i32> %v, %b + %or = or <8 x i1> %ca, %cb + %ext = sext <8 x i1> %or to <8 x i32> + %and = and <8 x i32> %ext, splat (i32 4) + store <8 x i32> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v4i64(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvld $xr2, $a1, 0 +; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvseq.d $xr1, $xr1, $xr2 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrepli.d $xr1, 4 +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <4 x i64>, ptr %pa + %b = load <4 x i64>, ptr %pb + %v = load <4 x i64>, ptr %pv + %ca = icmp ne <4 x i64> %v, %a + %cb = icmp ne <4 x i64> %v, %b + %or = or <4 x i1> %ca, %cb + %ext = sext <4 x i1> %or to <4 x i64> + %and = and <4 x i64> %ext, splat (i64 4) + store <4 x i64> %and, ptr %dst + ret void +} + +define void @and_extract_subvector_not_combine_v32i8(ptr %pa, ptr %dst) nounwind { +; CHECK-LABEL: and_extract_subvector_not_combine_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vnori.b $vr0, $vr0, 251 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %a = load volatile <32 x i8>, ptr %pa + %a.not = xor <32 x i8> %a, splat (i8 -1) + %subv = shufflevector <32 x i8> %a.not, <32 x i8> poison, + <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, + i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> + %and = and <16 x i8> %subv, splat (i8 4) + store <16 x i8> %and, ptr %dst + ret void +} + +define void @and_extract_subvector_not_combine_v16i16(ptr %pa, ptr %dst) nounwind { +; CHECK-LABEL: and_extract_subvector_not_combine_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vrepli.h $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %a = load volatile <16 x i16>, ptr %pa + %a.not = xor <16 x i16> %a, splat (i16 -1) + %subv = shufflevector <16 x i16> %a.not, <16 x i16> poison, + <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %and = and <8 x i16> %subv, splat (i16 4) + store <8 x i16> %and, ptr %dst + ret void +} + +define void @and_extract_subvector_not_combine_v8i32(ptr %pa, ptr %dst) nounwind { +; CHECK-LABEL: and_extract_subvector_not_combine_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vrepli.w $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %a = load volatile <8 x i32>, ptr %pa + %a.not = xor <8 x i32> %a, splat (i32 -1) + %subv = shufflevector <8 x i32> %a.not, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %and = and <4 x i32> %subv, splat (i32 4) + store <4 x i32> %and, ptr %dst + ret void +} + +define void @and_extract_subvector_not_combine_v4i64(ptr %pa, ptr %dst) nounwind { +; CHECK-LABEL: and_extract_subvector_not_combine_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vrepli.d $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %a = load volatile <4 x i64>, ptr %pa + %a.not = xor <4 x i64> %a, splat (i64 -1) + %subv = shufflevector <4 x i64> %a.not, <4 x i64> poison, <2 x i32> <i32 2, i32 3> + %and = and <2 x i64> %subv, splat (i64 4) + store <2 x i64> %and, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll index ba2118f..b3155c9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll @@ -106,6 +106,69 @@ define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { ret void } +define void @not_ctlz_v32i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: xvclz.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %src + %neg = xor <32 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %neg, i1 false) + store <32 x i8> %res, ptr %dst + ret void +} + +define void @not_ctlz_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %neg = xor <16 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> + %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %neg, i1 false) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @not_ctlz_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %neg = xor <8 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %neg, i1 false) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @not_ctlz_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvclz.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %neg = xor <4 x i64> %v, <i64 -1, i64 -1, i64 -1, i64 -1> + %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %neg, i1 false) + store <4 x i64> %res, ptr %dst + ret void +} + declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll index 48ec98c..8e08e1e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll @@ -5,40 +5,10 @@ define void @minnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind { ; CHECK-LABEL: minnum_v8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a2, 0 -; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 5 -; CHECK-NEXT: xvpickve.w $xr3, $xr1, 5 -; CHECK-NEXT: fmin.s $fa2, $fa3, $fa2 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 4 -; CHECK-NEXT: xvpickve.w $xr4, $xr1, 4 -; CHECK-NEXT: fmin.s $fa3, $fa4, $fa3 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 16 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 6 -; CHECK-NEXT: xvpickve.w $xr4, $xr1, 6 -; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 32 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 7 -; CHECK-NEXT: xvpickve.w $xr4, $xr1, 7 -; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 48 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 1 -; CHECK-NEXT: xvpickve.w $xr4, $xr1, 1 -; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2 -; CHECK-NEXT: xvpickve.w $xr4, $xr0, 0 -; CHECK-NEXT: xvpickve.w $xr5, $xr1, 0 -; CHECK-NEXT: fmin.s $fa4, $fa5, $fa4 -; CHECK-NEXT: vextrins.w $vr4, $vr2, 16 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 2 -; CHECK-NEXT: xvpickve.w $xr5, $xr1, 2 -; CHECK-NEXT: fmin.s $fa2, $fa5, $fa2 -; CHECK-NEXT: vextrins.w $vr4, $vr2, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: xvpickve.w $xr1, $xr1, 3 -; CHECK-NEXT: fmin.s $fa0, $fa1, $fa0 -; CHECK-NEXT: vextrins.w $vr4, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr4, $xr3, 2 -; CHECK-NEXT: xvst $xr4, $a0, 0 +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %x @@ -51,23 +21,9 @@ entry: define void @minnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind { ; CHECK-LABEL: minnum_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a2, 0 -; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 3 -; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3 -; CHECK-NEXT: fmin.d $fa2, $fa3, $fa2 -; CHECK-NEXT: xvpickve.d $xr3, $xr0, 2 -; CHECK-NEXT: xvpickve.d $xr4, $xr1, 2 -; CHECK-NEXT: fmin.d $fa3, $fa4, $fa3 -; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 1 -; CHECK-NEXT: xvpickve.d $xr4, $xr1, 1 -; CHECK-NEXT: fmin.d $fa2, $fa4, $fa2 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr1, 0 -; CHECK-NEXT: fmin.d $fa0, $fa1, $fa0 -; CHECK-NEXT: vextrins.d $vr0, $vr2, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -81,40 +37,10 @@ entry: define void @maxnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind { ; CHECK-LABEL: maxnum_v8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a2, 0 -; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 5 -; CHECK-NEXT: xvpickve.w $xr3, $xr1, 5 -; CHECK-NEXT: fmax.s $fa2, $fa3, $fa2 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 4 -; CHECK-NEXT: xvpickve.w $xr4, $xr1, 4 -; CHECK-NEXT: fmax.s $fa3, $fa4, $fa3 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 16 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 6 -; CHECK-NEXT: xvpickve.w $xr4, $xr1, 6 -; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 32 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 7 -; CHECK-NEXT: xvpickve.w $xr4, $xr1, 7 -; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 48 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 1 -; CHECK-NEXT: xvpickve.w $xr4, $xr1, 1 -; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2 -; CHECK-NEXT: xvpickve.w $xr4, $xr0, 0 -; CHECK-NEXT: xvpickve.w $xr5, $xr1, 0 -; CHECK-NEXT: fmax.s $fa4, $fa5, $fa4 -; CHECK-NEXT: vextrins.w $vr4, $vr2, 16 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 2 -; CHECK-NEXT: xvpickve.w $xr5, $xr1, 2 -; CHECK-NEXT: fmax.s $fa2, $fa5, $fa2 -; CHECK-NEXT: vextrins.w $vr4, $vr2, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: xvpickve.w $xr1, $xr1, 3 -; CHECK-NEXT: fmax.s $fa0, $fa1, $fa0 -; CHECK-NEXT: vextrins.w $vr4, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr4, $xr3, 2 -; CHECK-NEXT: xvst $xr4, $a0, 0 +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %x @@ -127,23 +53,9 @@ entry: define void @maxnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind { ; CHECK-LABEL: maxnum_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a2, 0 -; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 3 -; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3 -; CHECK-NEXT: fmax.d $fa2, $fa3, $fa2 -; CHECK-NEXT: xvpickve.d $xr3, $xr0, 2 -; CHECK-NEXT: xvpickve.d $xr4, $xr1, 2 -; CHECK-NEXT: fmax.d $fa3, $fa4, $fa3 -; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 1 -; CHECK-NEXT: xvpickve.d $xr4, $xr1, 1 -; CHECK-NEXT: fmax.d $fa2, $fa4, $fa2 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr1, 0 -; CHECK-NEXT: fmax.d $fa0, $fa1, $fa0 -; CHECK-NEXT: vextrins.d $vr0, $vr2, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll index 79407c3..fa5f27e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll @@ -7,38 +7,8 @@ define void @ceil_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrp.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrp.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -52,21 +22,7 @@ define void @ceil_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrp.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -81,38 +37,8 @@ define void @floor_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrm.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrm.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -126,21 +52,7 @@ define void @floor_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrm.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -155,38 +67,8 @@ define void @trunc_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrz.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrz.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -200,21 +82,7 @@ define void @trunc_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrz.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -229,38 +97,8 @@ define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrne.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrne.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -274,21 +112,7 @@ define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrne.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll new file mode 100644 index 0000000..006713c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll @@ -0,0 +1,303 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) + +define void @lasx_cast_128_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x float>, ptr %va + %b = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %a) + store <8 x float> %b, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) + +define void @lasx_cast_128_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x double>, ptr %va + %b = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %a) + store <4 x double> %b, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) + +define void @lasx_cast_128(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %va + %b = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %a) + store <4 x i64> %b, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) + +define void @lasx_concat_128_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) + +define void @lasx_concat_128_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) + +define void @lasx_concat_128(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) + +define void @lasx_extract_128_lo_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %a) + store <4 x float> %c, ptr %vd + ret void +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) + +define void @lasx_extract_128_lo_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %a) + store <2 x double> %c, ptr %vd + ret void +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) + +define void @lasx_extract_128_lo(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %a) + store <2 x i64> %c, ptr %vd + ret void +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) + +define void @lasx_extract_128_hi_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %a) + store <4 x float> %c, ptr %vd + ret void +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) + +define void @lasx_extract_128_hi_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %a) + store <2 x double> %c, ptr %vd + ret void +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) + +define void @lasx_extract_128_hi(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %a) + store <2 x i64> %c, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) + +define void @lasx_insert_128_lo_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) + +define void @lasx_insert_128_lo_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) + +define void @lasx_insert_128_lo(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) + +define void @lasx_insert_128_hi_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) + +define void @lasx_insert_128_hi_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>) + +define void @lasx_insert_128_hi(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll new file mode 100644 index 0000000..6b8ab2c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotl_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvneg.b $xr1, $xr1 +; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v1.sub = sub <32 x i8> splat (i8 8), %v1 + %b = shl <32 x i8> %v0, %v1 + %c = lshr <32 x i8> %v0, %v1.sub + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotr_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer + %v1.sub = sub <32 x i8> splat (i8 8), %v1 + %b = lshr <32 x i8> %v0, %v1 + %c = shl <32 x i8> %v0, %v1.sub + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v32i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrotri.b $xr0, $xr0, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %src + %b = lshr <32 x i8> %v0, splat (i8 2) + %c = shl <32 x i8> %v0, splat (i8 6) + %d = or <32 x i8> %b, %c + store <32 x i8> %d, ptr %dst + ret void +} + +define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotl_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvneg.h $xr1, $xr1 +; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i16> splat (i16 16), %v1 + %b = shl <16 x i16> %v0, %v1 + %c = lshr <16 x i16> %v0, %v1.sub + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotr_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i16> splat (i16 16), %v1 + %b = lshr <16 x i16> %v0, %v1 + %c = shl <16 x i16> %v0, %v1.sub + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v16i16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrotri.h $xr0, $xr0, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %src + %b = lshr <16 x i16> %v0, splat (i16 2) + %c = shl <16 x i16> %v0, splat (i16 14) + %d = or <16 x i16> %b, %c + store <16 x i16> %d, ptr %dst + ret void +} + +define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotl_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvneg.w $xr1, $xr1 +; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i32> splat (i32 32), %v1 + %b = shl <8 x i32> %v0, %v1 + %c = lshr <8 x i32> %v0, %v1.sub + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotr_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i32> splat (i32 32), %v1 + %b = lshr <8 x i32> %v0, %v1 + %c = shl <8 x i32> %v0, %v1.sub + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v8i32_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrotri.w $xr0, $xr0, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %src + %b = lshr <8 x i32> %v0, splat (i32 2) + %c = shl <8 x i32> %v0, splat (i32 30) + %d = or <8 x i32> %b, %c + store <8 x i32> %d, ptr %dst + ret void +} + +define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotl_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: xvpermi.q $xr1, $xr1, 2 +; LA32-NEXT: xvneg.d $xr1, $xr1 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvneg.d $xr1, $xr1 +; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i64> splat (i64 64), %v1 + %b = shl <4 x i64> %v0, %v1 + %c = lshr <4 x i64> %v0, %v1.sub + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotr_v4i64: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: xvpermi.q $xr1, $xr1, 2 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v4i64: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 +; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i64> splat (i64 64), %v1 + %b = lshr <4 x i64> %v0, %v1 + %c = shl <4 x i64> %v0, %v1.sub + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind { +; LA32-LABEL: rotr_v4i64_imm: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvrepli.w $xr1, -62 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v4i64_imm: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvrotri.d $xr0, $xr0, 2 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <4 x i64>, ptr %src + %b = lshr <4 x i64> %v0, splat (i64 2) + %c = shl <4 x i64> %v0, splat (i64 62) + %d = or <4 x i64> %b, %c + store <4 x i64> %d, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll new file mode 100644 index 0000000..1c9038a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +define void @shufflevector_bswap_h(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 177 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 25, i32 24, i32 27, i32 26, i32 29, i32 28, i32 31, i32 30> + store <32 x i8> %b, ptr %res + ret void +} + +define void @shufflevector_bswap_w(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 26, i32 25, i32 24, i32 31, i32 30, i32 29, i32 28> + store <32 x i8> %b, ptr %res + ret void +} + +define void @shufflevector_bswap_d(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <32 x i8>, ptr %a + %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24> + store <32 x i8> %b, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll index c3656a6..9bafa10 100644 --- a/llvm/test/CodeGen/LoongArch/ldptr.ll +++ b/llvm/test/CodeGen/LoongArch/ldptr.ll @@ -24,8 +24,7 @@ define signext i32 @ldptr_w(ptr %p) nounwind { ; LA32-LABEL: ldptr_w: ; LA32: # %bb.0: # %entry ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: ldptr_w: @@ -81,10 +80,9 @@ entry: define i64 @ldptr_d(ptr %p) nounwind { ; LA32-LABEL: ldptr_d: ; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a1, $a0, 1 -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: ld.w $a1, $a1, 4 +; LA32-NEXT: addi.w $a1, $a0, 2047 +; LA32-NEXT: ld.w $a0, $a1, 1 +; LA32-NEXT: ld.w $a1, $a1, 5 ; LA32-NEXT: ret ; ; LA64-LABEL: ldptr_d: diff --git a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll index 3c6d345..f439a33 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @and_not_combine_v16i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { ; CHECK-LABEL: and_not_combine_v16i8: @@ -85,3 +85,327 @@ entry: store <2 x i64> %and, ptr %res ret void } + +define void @pre_not_and_not_combine_v16i8(ptr %res, ptr %a, i8 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.b $vr1, $a2 +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a + %b.not = xor i8 %b, -1 + %b.not.ele = insertelement <16 x i8> poison, i8 %b.not, i64 0 + %v1.not = shufflevector <16 x i8> %b.not.ele, <16 x i8> poison, <16 x i32> zeroinitializer + %v0.not = xor <16 x i8> %v0, splat (i8 -1) + %and = and <16 x i8> %v0.not, %v1.not + store <16 x i8> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v16i8(ptr %res, ptr %a, i8 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.b $vr1, $a2 +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a + %b.ele = insertelement <16 x i8> poison, i8 %b, i64 0 + %v1 = shufflevector <16 x i8> %b.ele, <16 x i8> poison, <16 x i32> zeroinitializer + %v0.not = xor <16 x i8> %v0, splat (i8 -1) + %v1.not = xor <16 x i8> %v1, splat (i8 -1) + %and = and <16 x i8> %v0.not, %v1.not + store <16 x i8> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v8i16(ptr %res, ptr %a, i16 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.h $vr1, $a2 +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a + %b.not = xor i16 %b, -1 + %b.not.ele = insertelement <8 x i16> poison, i16 %b.not, i64 0 + %v1.not = shufflevector <8 x i16> %b.not.ele, <8 x i16> poison, <8 x i32> zeroinitializer + %v0.not = xor <8 x i16> %v0, splat (i16 -1) + %and = and <8 x i16> %v0.not, %v1.not + store <8 x i16> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v8i16(ptr %res, ptr %a, i16 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.h $vr1, $a2 +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a + %b.ele = insertelement <8 x i16> poison, i16 %b, i64 0 + %v1 = shufflevector <8 x i16> %b.ele, <8 x i16> poison, <8 x i32> zeroinitializer + %v0.not = xor <8 x i16> %v0, splat (i16 -1) + %v1.not = xor <8 x i16> %v1, splat (i16 -1) + %and = and <8 x i16> %v0.not, %v1.not + store <8 x i16> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v4i32(ptr %res, ptr %a, i32 %b) nounwind { +; CHECK-LABEL: pre_not_and_not_combine_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.w $vr1, $a2 +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a + %b.not = xor i32 %b, -1 + %b.not.ele = insertelement <4 x i32> poison, i32 %b.not, i64 0 + %v1.not = shufflevector <4 x i32> %b.not.ele, <4 x i32> poison, <4 x i32> zeroinitializer + %v0.not = xor <4 x i32> %v0, splat (i32 -1) + %and = and <4 x i32> %v0.not, %v1.not + store <4 x i32> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v4i32(ptr %res, ptr %a, i32 %b) nounwind { +; CHECK-LABEL: post_not_and_not_combine_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.w $vr1, $a2 +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a + %b.ele = insertelement <4 x i32> poison, i32 %b, i64 0 + %v1 = shufflevector <4 x i32> %b.ele, <4 x i32> poison, <4 x i32> zeroinitializer + %v0.not = xor <4 x i32> %v0, splat (i32 -1) + %v1.not = xor <4 x i32> %v1, splat (i32 -1) + %and = and <4 x i32> %v0.not, %v1.not + store <4 x i32> %and, ptr %res + ret void +} + +define void @pre_not_and_not_combine_v2i64(ptr %res, ptr %a, i64 %b) nounwind { +; LA32-LABEL: pre_not_and_not_combine_v2i64: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: nor $a1, $a3, $zero +; LA32-NEXT: nor $a2, $a2, $zero +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1 +; LA32-NEXT: vreplvei.d $vr1, $vr1, 0 +; LA32-NEXT: vandn.v $vr0, $vr0, $vr1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: pre_not_and_not_combine_v2i64: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vreplgr2vr.d $vr1, $a2 +; LA64-NEXT: vnor.v $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <2 x i64>, ptr %a + %b.not = xor i64 %b, -1 + %b.not.ele = insertelement <2 x i64> poison, i64 %b.not, i64 0 + %v1.not = shufflevector <2 x i64> %b.not.ele, <2 x i64> poison, <2 x i32> zeroinitializer + %v0.not = xor <2 x i64> %v0, splat (i64 -1) + %and = and <2 x i64> %v0.not, %v1.not + store <2 x i64> %and, ptr %res + ret void +} + +define void @post_not_and_not_combine_v2i64(ptr %res, ptr %a, i64 %b) nounwind { +; LA32-LABEL: post_not_and_not_combine_v2i64: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 +; LA32-NEXT: vreplvei.d $vr1, $vr1, 0 +; LA32-NEXT: vnor.v $vr0, $vr0, $vr1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: post_not_and_not_combine_v2i64: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vreplgr2vr.d $vr1, $a2 +; LA64-NEXT: vnor.v $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <2 x i64>, ptr %a + %b.ele = insertelement <2 x i64> poison, i64 %b, i64 0 + %v1 = shufflevector <2 x i64> %b.ele, <2 x i64> poison, <2 x i32> zeroinitializer + %v0.not = xor <2 x i64> %v0, splat (i64 -1) + %v1.not = xor <2 x i64> %v1, splat (i64 -1) + %and = and <2 x i64> %v0.not, %v1.not + store <2 x i64> %and, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vnori.b $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %and = and <16 x i8> %v0, splat (i8 -4) + %xor = xor <16 x i8> %and, splat (i8 -4) + store <16 x i8> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, -4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %and = and <8 x i16> %v0, splat (i16 -4) + %xor = xor <8 x i16> %and, splat (i16 -4) + store <8 x i16> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, -4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %and = and <4 x i32> %v0, splat (i32 -4) + %xor = xor <4 x i32> %and, splat (i32 -4) + store <4 x i32> %xor, ptr %res + ret void +} + +define void @and_not_combine_splatimm_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_not_combine_splatimm_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, -4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %and = and <2 x i64> %v0, splat (i64 -4) + %xor = xor <2 x i64> %and, splat (i64 -4) + store <2 x i64> %xor, ptr %res + ret void +} + +define void @and_or_not_combine_v16i8(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vld $vr2, $a1, 0 +; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vseq.b $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vnori.b $vr0, $vr0, 251 +; CHECK-NEXT: vst $vr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <16 x i8>, ptr %pa + %b = load <16 x i8>, ptr %pb + %v = load <16 x i8>, ptr %pv + %ca = icmp ne <16 x i8> %v, %a + %cb = icmp ne <16 x i8> %v, %b + %or = or <16 x i1> %ca, %cb + %ext = sext <16 x i1> %or to <16 x i8> + %and = and <16 x i8> %ext, splat (i8 4) + store <16 x i8> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v8i16(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vld $vr2, $a1, 0 +; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vseq.h $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vrepli.h $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <8 x i16>, ptr %pa + %b = load <8 x i16>, ptr %pb + %v = load <8 x i16>, ptr %pv + %ca = icmp ne <8 x i16> %v, %a + %cb = icmp ne <8 x i16> %v, %b + %or = or <8 x i1> %ca, %cb + %ext = sext <8 x i1> %or to <8 x i16> + %and = and <8 x i16> %ext, splat (i16 4) + store <8 x i16> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v4i32(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vld $vr2, $a1, 0 +; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vseq.w $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vrepli.w $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <4 x i32>, ptr %pa + %b = load <4 x i32>, ptr %pb + %v = load <4 x i32>, ptr %pv + %ca = icmp ne <4 x i32> %v, %a + %cb = icmp ne <4 x i32> %v, %b + %or = or <4 x i1> %ca, %cb + %ext = sext <4 x i1> %or to <4 x i32> + %and = and <4 x i32> %ext, splat (i32 4) + store <4 x i32> %and, ptr %dst + ret void +} + +define void @and_or_not_combine_v2i64(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind { +; CHECK-LABEL: and_or_not_combine_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vld $vr2, $a1, 0 +; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vseq.d $vr1, $vr1, $vr2 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vrepli.d $vr1, 4 +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a3, 0 +; CHECK-NEXT: ret + %a = load <2 x i64>, ptr %pa + %b = load <2 x i64>, ptr %pb + %v = load <2 x i64>, ptr %pv + %ca = icmp ne <2 x i64> %v, %a + %cb = icmp ne <2 x i64> %v, %b + %or = or <2 x i1> %ca, %cb + %ext = sext <2 x i1> %or to <2 x i64> + %and = and <2 x i64> %ext, splat (i64 4) + store <2 x i64> %and, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll index a9a38e8..6ac7d51 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll @@ -106,6 +106,69 @@ define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { ret void } +define void @not_ctlz_v16i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: vclz.b $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i8>, ptr %src + %neg = xor <16 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> + %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %neg, i1 false) + store <16 x i8> %res, ptr %dst + ret void +} + +define void @not_ctlz_v8i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.h $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i16>, ptr %src + %neg = xor <8 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> + %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %neg, i1 false) + store <8 x i16> %res, ptr %dst + ret void +} + +define void @not_ctlz_v4i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i32>, ptr %src + %neg = xor <4 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1> + %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %neg, i1 false) + store <4 x i32> %res, ptr %dst + ret void +} + +define void @not_ctlz_v2i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: not_ctlz_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vclz.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <2 x i64>, ptr %src + %neg = xor <2 x i64> %v, <i64 -1, i64 -1> + %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %neg, i1 false) + store <2 x i64> %res, ptr %dst + ret void +} + declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll index 27ecb75..c173092 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll @@ -5,24 +5,10 @@ define void @minnum_v4f32(ptr %res, ptr %x, ptr %y) nounwind { ; CHECK-LABEL: minnum_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a2, 0 -; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr3, $vr1, 1 -; CHECK-NEXT: fmin.s $fa2, $fa3, $fa2 -; CHECK-NEXT: vreplvei.w $vr3, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr4, $vr1, 0 -; CHECK-NEXT: fmin.s $fa3, $fa4, $fa3 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 16 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr4, $vr1, 2 -; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 3 -; CHECK-NEXT: fmin.s $fa0, $fa1, $fa0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: vst $vr3, $a0, 0 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfmin.s $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %x @@ -35,15 +21,9 @@ entry: define void @minnum_v2f64(ptr %res, ptr %x, ptr %y) nounwind { ; CHECK-LABEL: minnum_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a2, 0 -; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr2, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr3, $vr1, 1 -; CHECK-NEXT: fmin.d $fa2, $fa3, $fa2 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: fmin.d $fa0, $fa1, $fa0 -; CHECK-NEXT: vextrins.d $vr0, $vr2, 16 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfmin.d $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -57,24 +37,10 @@ entry: define void @maxnum_v4f32(ptr %res, ptr %x, ptr %y) nounwind { ; CHECK-LABEL: maxnum_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a2, 0 -; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr3, $vr1, 1 -; CHECK-NEXT: fmax.s $fa2, $fa3, $fa2 -; CHECK-NEXT: vreplvei.w $vr3, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr4, $vr1, 0 -; CHECK-NEXT: fmax.s $fa3, $fa4, $fa3 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 16 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr4, $vr1, 2 -; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2 -; CHECK-NEXT: vextrins.w $vr3, $vr2, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 3 -; CHECK-NEXT: fmax.s $fa0, $fa1, $fa0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: vst $vr3, $a0, 0 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfmax.s $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %x @@ -87,15 +53,9 @@ entry: define void @maxnum_v2f64(ptr %res, ptr %x, ptr %y) nounwind { ; CHECK-LABEL: maxnum_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a2, 0 -; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr2, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr3, $vr1, 1 -; CHECK-NEXT: fmax.d $fa2, $fa3, $fa2 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: fmax.d $fa0, $fa1, $fa0 -; CHECK-NEXT: vextrins.d $vr0, $vr2, 16 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfmax.d $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll index 1ca6290..cb01ac0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll @@ -7,22 +7,8 @@ define void @ceil_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrp.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -36,13 +22,7 @@ define void @ceil_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrp.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -57,22 +37,8 @@ define void @floor_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrm.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -86,13 +52,7 @@ define void @floor_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrm.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -107,22 +67,8 @@ define void @trunc_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrz.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -136,13 +82,7 @@ define void @trunc_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrz.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -157,22 +97,8 @@ define void @roundeven_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrne.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -186,13 +112,7 @@ define void @roundeven_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrne.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll new file mode 100644 index 0000000..106a7b0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll @@ -0,0 +1,246 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @rotl_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotl_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.b $vr1, $a2 +; CHECK-NEXT: vneg.b $vr1, $vr1 +; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %src + %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i8> splat (i8 8), %v1 + %b = shl <16 x i8> %v0, %v1 + %c = lshr <16 x i8> %v0, %v1.sub + %d = or <16 x i8> %b, %c + store <16 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { +; CHECK-LABEL: rotr_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.b $vr1, $a2 +; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %src + %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0 + %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer + %v1.sub = sub <16 x i8> splat (i8 8), %v1 + %b = lshr <16 x i8> %v0, %v1 + %c = shl <16 x i8> %v0, %v1.sub + %d = or <16 x i8> %b, %c + store <16 x i8> %d, ptr %dst + ret void +} + +define void @rotr_v16i8_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v16i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrotri.b $vr0, $vr0, 2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %src + %b = lshr <16 x i8> %v0, splat (i8 2) + %c = shl <16 x i8> %v0, splat (i8 6) + %d = or <16 x i8> %b, %c + store <16 x i8> %d, ptr %dst + ret void +} + +define void @rotl_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotl_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.h $vr1, $a2 +; CHECK-NEXT: vneg.h $vr1, $vr1 +; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %src + %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i16> splat (i16 16), %v1 + %b = shl <8 x i16> %v0, %v1 + %c = lshr <8 x i16> %v0, %v1.sub + %d = or <8 x i16> %b, %c + store <8 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { +; CHECK-LABEL: rotr_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.h $vr1, $a2 +; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %src + %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0 + %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer + %v1.sub = sub <8 x i16> splat (i16 16), %v1 + %b = lshr <8 x i16> %v0, %v1 + %c = shl <8 x i16> %v0, %v1.sub + %d = or <8 x i16> %b, %c + store <8 x i16> %d, ptr %dst + ret void +} + +define void @rotr_v8i16_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v8i16_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrotri.h $vr0, $vr0, 2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %src + %b = lshr <8 x i16> %v0, splat (i16 2) + %c = shl <8 x i16> %v0, splat (i16 14) + %d = or <8 x i16> %b, %c + store <8 x i16> %d, ptr %dst + ret void +} + +define void @rotl_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotl_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.w $vr1, $a2 +; CHECK-NEXT: vneg.w $vr1, $vr1 +; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %src + %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i32> splat (i32 32), %v1 + %b = shl <4 x i32> %v0, %v1 + %c = lshr <4 x i32> %v0, %v1.sub + %d = or <4 x i32> %b, %c + store <4 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { +; CHECK-LABEL: rotr_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vreplgr2vr.w $vr1, $a2 +; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %src + %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0 + %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer + %v1.sub = sub <4 x i32> splat (i32 32), %v1 + %b = lshr <4 x i32> %v0, %v1 + %c = shl <4 x i32> %v0, %v1.sub + %d = or <4 x i32> %b, %c + store <4 x i32> %d, ptr %dst + ret void +} + +define void @rotr_v4i32_imm(ptr %dst, ptr %src) nounwind { +; CHECK-LABEL: rotr_v4i32_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrotri.w $vr0, $vr0, 2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %src + %b = lshr <4 x i32> %v0, splat (i32 2) + %c = shl <4 x i32> %v0, splat (i32 30) + %d = or <4 x i32> %b, %c + store <4 x i32> %d, ptr %dst + ret void +} + +define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotl_v2i64: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: vneg.d $vr1, $vr1 +; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotl_v2i64: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vreplgr2vr.d $vr1, $a2 +; LA64-NEXT: vneg.d $vr1, $vr1 +; LA64-NEXT: vrotr.d $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <2 x i64>, ptr %src + %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer + %v1.sub = sub <2 x i64> splat (i64 64), %v1 + %b = shl <2 x i64> %v0, %v1 + %c = lshr <2 x i64> %v0, %v1.sub + %d = or <2 x i64> %b, %c + store <2 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { +; LA32-LABEL: rotr_v2i64: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v2i64: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vreplgr2vr.d $vr1, $a2 +; LA64-NEXT: vrotr.d $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <2 x i64>, ptr %src + %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0 + %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer + %v1.sub = sub <2 x i64> splat (i64 64), %v1 + %b = lshr <2 x i64> %v0, %v1 + %c = shl <2 x i64> %v0, %v1.sub + %d = or <2 x i64> %b, %c + store <2 x i64> %d, ptr %dst + ret void +} + +define void @rotr_v2i64_imm(ptr %dst, ptr %src) nounwind { +; LA32-LABEL: rotr_v2i64_imm: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vrepli.w $vr1, -62 +; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v2i64_imm: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vrotri.d $vr0, $vr0, 2 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret + %v0 = load <2 x i64>, ptr %src + %b = lshr <2 x i64> %v0, splat (i64 2) + %c = shl <2 x i64> %v0, splat (i64 62) + %d = or <2 x i64> %b, %c + store <2 x i64> %d, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/shuffle-as-bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/shuffle-as-bswap.ll new file mode 100644 index 0000000..a6b61dc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/shuffle-as-bswap.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +define void @shufflevector_bswap_h(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 177 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i8>, ptr %a + %b = shufflevector <16 x i8> %va, <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> + store <16 x i8> %b, ptr %res + ret void +} + +define void @shufflevector_bswap_w(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i8>, ptr %a + %b = shufflevector <16 x i8> %va, <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> + store <16 x i8> %b, ptr %res + ret void +} + +define void @shufflevector_bswap_d(ptr %res, ptr %a) nounwind { +; CHECK-LABEL: shufflevector_bswap_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %va = load <16 x i8>, ptr %a + %b = shufflevector <16 x i8> %va, <16 x i8> poison, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> + store <16 x i8> %b, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/memcmp.ll b/llvm/test/CodeGen/LoongArch/memcmp.ll index c4aaf9a..c3811c0 100644 --- a/llvm/test/CodeGen/LoongArch/memcmp.ll +++ b/llvm/test/CodeGen/LoongArch/memcmp.ll @@ -7,15 +7,24 @@ define signext i32 @test1(ptr %buffer1, ptr %buffer2) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -8 -; CHECK-NEXT: ori $a2, $zero, 16 -; CHECK-NEXT: pcaddu18i $ra, %call36(memcmp) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ld.d $a2, $a0, 0 +; CHECK-NEXT: ld.d $a3, $a1, 0 +; CHECK-NEXT: revb.d $a2, $a2 +; CHECK-NEXT: revb.d $a3, $a3 +; CHECK-NEXT: bne $a2, $a3, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %loadbb1 +; CHECK-NEXT: ld.d $a0, $a0, 8 +; CHECK-NEXT: ld.d $a1, $a1, 8 +; CHECK-NEXT: revb.d $a2, $a0 +; CHECK-NEXT: revb.d $a3, $a1 +; CHECK-NEXT: bne $a2, $a3, .LBB0_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $a0, $zero +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: # %res_block +; CHECK-NEXT: sltu $a0, $a2, $a3 +; CHECK-NEXT: sub.d $a0, $zero, $a0 +; CHECK-NEXT: ori $a0, $a0, 1 ; CHECK-NEXT: ret entry: %call = call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16) diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 661f67d..546ed6c 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -17,9 +17,11 @@ ; LAXX-LABEL: Pass Arguments: ; LAXX-NEXT: Target Library Information +; LAXX-NEXT: Runtime Library Function Analysis ; LAXX-NEXT: Target Pass Configuration ; LAXX-NEXT: Machine Module Information ; LAXX-NEXT: Target Transform Information +; LAXX-NEXT: Library Function Lowering Analysis ; LAXX-NEXT: Assumption Cache Tracker ; LAXX-NEXT: Type-Based Alias Analysis ; LAXX-NEXT: Scoped NoAlias Alias Analysis diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll new file mode 100644 index 0000000..93f73e5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll @@ -0,0 +1,746 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA32 %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \ +; RUN: | FileCheck --check-prefix=LA64 %s + +%struct.S = type { i64, i64, i8 } +%struct.F = type { float, double, float } +%struct.V = type { <4 x i32>, <4 x i32>, <16 x i16> } + +define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill +; LA32-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a1, $a0, 4 +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: add.w $s2, $a4, $a0 +; LA32-NEXT: bnez $a1, .LBB0_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: move $s5, $zero +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s6, $zero +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB0_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: ld.w $a0, $s2, 12 +; LA32-NEXT: ld.w $a1, $s2, 8 +; LA32-NEXT: add.w $a0, $a0, $s6 +; LA32-NEXT: add.w $s3, $a1, $s3 +; LA32-NEXT: sltu $a1, $s3, $a1 +; LA32-NEXT: addi.w $s4, $s4, 1 +; LA32-NEXT: sltui $a2, $s4, 1 +; LA32-NEXT: add.w $s5, $s5, $a2 +; LA32-NEXT: xor $a2, $s4, $s1 +; LA32-NEXT: xor $a3, $s5, $s0 +; LA32-NEXT: or $a2, $a2, $a3 +; LA32-NEXT: add.w $s6, $a0, $a1 +; LA32-NEXT: bnez $a2, .LBB0_2 +; LA32-NEXT: b .LBB0_4 +; LA32-NEXT: .LBB0_3: +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s6, $zero +; LA32-NEXT: .LBB0_4: # %for.cond.cleanup +; LA32-NEXT: st.w $s3, $s2, 8 +; LA32-NEXT: st.w $s6, $s2, 12 +; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 4 +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: add.d $s1, $a2, $a0 +; LA64-NEXT: blez $s0, .LBB0_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: move $s2, $zero +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB0_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $a0, $s1, 8 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: add.d $s2, $a0, $s2 +; LA64-NEXT: bnez $s0, .LBB0_2 +; LA64-NEXT: b .LBB0_4 +; LA64-NEXT: .LBB0_3: +; LA64-NEXT: move $s2, $zero +; LA64-NEXT: .LBB0_4: # %for.cond.cleanup +; LA64-NEXT: st.d $s2, $s1, 8 +; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 1 + %cmp4 = icmp sgt i64 %n, 0 + br i1 %cmp4, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %s.05 = phi i64 [ 0, %entry ], [ %add, %for.body ] + call void @f(ptr %a) + %0 = load i64, ptr %y + %add = add nsw i64 %0, %s.05 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ] + store i64 %s.0.lcssa, ptr %y + ret void +} + +define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a1, $a0, 4 +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: add.w $s2, $a4, $a0 +; LA32-NEXT: bnez $a1, .LBB1_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: movgr2fr.w $fs0, $zero +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB1_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: fld.s $fa0, $s2, 16 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: fadd.s $fs0, $fa0, $fs0 +; LA32-NEXT: bnez $a0, .LBB1_2 +; LA32-NEXT: b .LBB1_4 +; LA32-NEXT: .LBB1_3: +; LA32-NEXT: movgr2fr.w $fs0, $zero +; LA32-NEXT: .LBB1_4: # %for.cond.cleanup +; LA32-NEXT: fst.s $fs0, $s2, 16 +; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 4 +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: add.d $s1, $a2, $a0 +; LA64-NEXT: blez $s0, .LBB1_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: movgr2fr.w $fs0, $zero +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB1_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: fld.s $fa0, $s1, 16 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: fadd.s $fs0, $fa0, $fs0 +; LA64-NEXT: bnez $s0, .LBB1_2 +; LA64-NEXT: b .LBB1_4 +; LA64-NEXT: .LBB1_3: +; LA64-NEXT: movgr2fr.w $fs0, $zero +; LA64-NEXT: .LBB1_4: # %for.cond.cleanup +; LA64-NEXT: fst.s $fs0, $s1, 16 +; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.F, ptr %a, i64 %k, i32 2 + %cmp4 = icmp sgt i64 %n, 0 + br i1 %cmp4, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %s.05 = phi float [ 0.0, %entry ], [ %add, %for.body ] + call void @f(ptr %a) + %0 = load float, ptr %y + %add = fadd float %0, %s.05 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi float [ 0.0, %entry ], [ %add, %for.body ] + store float %s.0.lcssa, ptr %y + ret void +} + +define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_v4i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a0, $a0, 6 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: add.w $s2, $a4, $a0 +; LA32-NEXT: bnez $a1, .LBB2_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB2_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: vld $vr0, $s2, 16 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vadd.w $vr1, $vr0, $vr1 +; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: bnez $a0, .LBB2_2 +; LA32-NEXT: b .LBB2_4 +; LA32-NEXT: .LBB2_3: +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: .LBB2_4: # %for.cond.cleanup +; LA32-NEXT: vst $vr0, $s2, 16 +; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_v4i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: slli.d $a0, $a0, 6 +; LA64-NEXT: add.d $s1, $a2, $a0 +; LA64-NEXT: blez $a1, .LBB2_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB2_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: vld $vr0, $s1, 16 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vadd.w $vr1, $vr0, $vr1 +; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: bnez $s0, .LBB2_2 +; LA64-NEXT: b .LBB2_4 +; LA64-NEXT: .LBB2_3: +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: .LBB2_4: # %for.cond.cleanup +; LA64-NEXT: vst $vr0, $s1, 16 +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.V, ptr %a, i64 %k, i32 1 + %cmp = icmp sgt i64 %n, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.0 = phi <4 x i32> [ zeroinitializer, %entry ], [ %addv, %for.body ] + call void @f(ptr %a) + %v = load <4 x i32>, ptr %y + %addv = add <4 x i32> %v, %sum.0 + %inc = add nuw nsw i64 %i.0, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %sum.lcssa = phi <4 x i32> [ zeroinitializer, %entry ], [ %addv, %for.body ] + store <4 x i32> %sum.lcssa, ptr %y + ret void +} + +define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_v16i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -80 +; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 68 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 64 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a0, $a0, 6 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: add.w $s2, $a4, $a0 +; LA32-NEXT: bnez $a1, .LBB3_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB3_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: xvld $xr0, $s2, 32 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: xvadd.h $xr1, $xr0, $xr1 +; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: bnez $a0, .LBB3_2 +; LA32-NEXT: b .LBB3_4 +; LA32-NEXT: .LBB3_3: +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: .LBB3_4: # %for.cond.cleanup +; LA32-NEXT: xvst $xr0, $s2, 32 +; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 64 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 68 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 80 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_v16i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -80 +; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: slli.d $a0, $a0, 6 +; LA64-NEXT: add.d $s1, $a2, $a0 +; LA64-NEXT: blez $a1, .LBB3_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB3_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: xvld $xr0, $s1, 32 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1 +; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: bnez $s0, .LBB3_2 +; LA64-NEXT: b .LBB3_4 +; LA64-NEXT: .LBB3_3: +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: .LBB3_4: # %for.cond.cleanup +; LA64-NEXT: xvst $xr0, $s1, 32 +; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 80 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.V, ptr %a, i64 %k, i32 2 + %cmp = icmp sgt i64 %n, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.0 = phi <16 x i16> [ zeroinitializer, %entry ], [ %addv, %for.body ] + call void @f(ptr %a) + %v = load <16 x i16>, ptr %y + %addv = add <16 x i16> %v, %sum.0 + %inc = add nuw nsw i64 %i.0, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %sum.lcssa = phi <16 x i16> [ zeroinitializer, %entry ], [ %addv, %for.body ] + store <16 x i16> %sum.lcssa, ptr %y + ret void +} + +define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_extracti8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a1, $a0, 4 +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: add.w $s2, $a4, $a0 +; LA32-NEXT: bnez $a1, .LBB4_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB4_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: vldrepl.b $vr0, $s2, 16 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vadd.b $vr1, $vr0, $vr1 +; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: bnez $a0, .LBB4_2 +; LA32-NEXT: b .LBB4_4 +; LA32-NEXT: .LBB4_3: +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: .LBB4_4: # %for.cond.cleanup +; LA32-NEXT: vstelm.b $vr0, $s2, 16, 1 +; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_extracti8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 4 +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: add.d $s1, $a2, $a0 +; LA64-NEXT: blez $s0, .LBB4_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB4_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: vldrepl.b $vr0, $s1, 16 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vadd.b $vr1, $vr0, $vr1 +; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: bnez $s0, .LBB4_2 +; LA64-NEXT: b .LBB4_4 +; LA64-NEXT: .LBB4_3: +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: .LBB4_4: # %for.cond.cleanup +; LA64-NEXT: vstelm.b $vr0, $s1, 16, 1 +; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 2 + %cmp = icmp sgt i64 %n, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.0 = phi <16 x i8> [ zeroinitializer, %entry ], [ %addv, %for.body ] + call void @f(ptr %a) + %e = load i8, ptr %y + %ins0 = insertelement <16 x i8> poison, i8 %e, i32 0 + %v = shufflevector <16 x i8> %ins0, <16 x i8> poison, <16 x i32> zeroinitializer + %addv = add <16 x i8> %v, %sum.0 + %inc = add nuw nsw i64 %i.0, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %sum.lcssa = phi <16 x i8> [ zeroinitializer, %entry ], [ %addv, %for.body ] + %res = extractelement <16 x i8> %sum.lcssa, i32 1 + store i8 %res, ptr %y + ret void +} + +define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { +; LA32-LABEL: sink_fold_extractf64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -80 +; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 68 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 64 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: move $s0, $a3 +; LA32-NEXT: move $s1, $a2 +; LA32-NEXT: slli.w $a1, $a0, 4 +; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 +; LA32-NEXT: sltui $a1, $a3, 1 +; LA32-NEXT: slti $a2, $a3, 0 +; LA32-NEXT: masknez $a2, $a2, $a1 +; LA32-NEXT: sltui $a3, $s1, 1 +; LA32-NEXT: maskeqz $a1, $a3, $a1 +; LA32-NEXT: or $a1, $a1, $a2 +; LA32-NEXT: add.w $s2, $a4, $a0 +; LA32-NEXT: bnez $a1, .LBB5_3 +; LA32-NEXT: # %bb.1: # %for.body.preheader +; LA32-NEXT: move $fp, $a4 +; LA32-NEXT: move $s3, $zero +; LA32-NEXT: move $s4, $zero +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: .p2align 4, , 16 +; LA32-NEXT: .LBB5_2: # %for.body +; LA32-NEXT: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl f +; LA32-NEXT: xvldrepl.d $xr0, $s2, 8 +; LA32-NEXT: addi.w $s3, $s3, 1 +; LA32-NEXT: sltui $a0, $s3, 1 +; LA32-NEXT: add.w $s4, $s4, $a0 +; LA32-NEXT: xor $a0, $s3, $s1 +; LA32-NEXT: xor $a1, $s4, $s0 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: xvfadd.d $xr1, $xr0, $xr1 +; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA32-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload +; LA32-NEXT: bnez $a0, .LBB5_2 +; LA32-NEXT: b .LBB5_4 +; LA32-NEXT: .LBB5_3: +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: .LBB5_4: # %for.cond.cleanup +; LA32-NEXT: xvstelm.d $xr0, $s2, 8, 1 +; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 64 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 68 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 80 +; LA32-NEXT: ret +; +; LA64-LABEL: sink_fold_extractf64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -80 +; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: move $s0, $a1 +; LA64-NEXT: slli.d $a1, $a0, 4 +; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 +; LA64-NEXT: add.d $s1, $a2, $a0 +; LA64-NEXT: blez $s0, .LBB5_3 +; LA64-NEXT: # %bb.1: # %for.body.preheader +; LA64-NEXT: move $fp, $a2 +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: .p2align 4, , 16 +; LA64-NEXT: .LBB5_2: # %for.body +; LA64-NEXT: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(f) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: xvldrepl.d $xr0, $s1, 8 +; LA64-NEXT: addi.d $s0, $s0, -1 +; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1 +; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill +; LA64-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload +; LA64-NEXT: bnez $s0, .LBB5_2 +; LA64-NEXT: b .LBB5_4 +; LA64-NEXT: .LBB5_3: +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: .LBB5_4: # %for.cond.cleanup +; LA64-NEXT: xvstelm.d $xr0, $s1, 8, 1 +; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 80 +; LA64-NEXT: ret +entry: + %y = getelementptr inbounds %struct.F, ptr %a, i64 %k, i32 1 + %cmp = icmp sgt i64 %n, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.0 = phi <4 x double> [ zeroinitializer, %entry ], [ %addv, %for.body ] + call void @f(ptr %a) + %e = load double, ptr %y + %ins0 = insertelement <4 x double> poison, double %e, i32 0 + %v = shufflevector <4 x double> %ins0, <4 x double> poison, <4 x i32> zeroinitializer + %addv = fadd <4 x double> %v, %sum.0 + %inc = add nuw nsw i64 %i.0, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %sum.lcssa = phi <4 x double> [ zeroinitializer, %entry ], [ %addv, %for.body ] + %res = extractelement <4 x double> %sum.lcssa, i32 1 + store double %res, ptr %y + ret void +} + +declare void @f(ptr) diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll index d70f9f4..23b433a 100644 --- a/llvm/test/CodeGen/LoongArch/stptr.ll +++ b/llvm/test/CodeGen/LoongArch/stptr.ll @@ -23,8 +23,7 @@ define void @stptr_w(ptr %p, i32 signext %val) nounwind { ; LA32-LABEL: stptr_w: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: stptr_w: @@ -77,9 +76,8 @@ define void @stptr_d(ptr %p, i64 %val) nounwind { ; LA32-LABEL: stptr_d: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a2, $a0, 4 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a2, $a0, 5 +; LA32-NEXT: st.w $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: stptr_d: |
