aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/LoongArch
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/LoongArch')
-rw-r--r--llvm/test/CodeGen/LoongArch/O0-pipeline.ll2
-rw-r--r--llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll2239
-rw-r--r--llvm/test/CodeGen/LoongArch/expandmemcmp.ll3106
-rw-r--r--llvm/test/CodeGen/LoongArch/issue163681.ll56
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll398
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll63
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll116
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll200
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll303
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll248
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll47
-rw-r--r--llvm/test/CodeGen/LoongArch/ldptr.ll10
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll328
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll63
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll68
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll88
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll246
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/shuffle-as-bswap.ll47
-rw-r--r--llvm/test/CodeGen/LoongArch/memcmp.ll27
-rw-r--r--llvm/test/CodeGen/LoongArch/opt-pipeline.ll2
-rw-r--r--llvm/test/CodeGen/LoongArch/sink-fold-addi.ll746
-rw-r--r--llvm/test/CodeGen/LoongArch/stptr.ll8
22 files changed, 7959 insertions, 452 deletions
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
index 9006b5c..5f4fccd 100644
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -9,9 +9,11 @@
; CHECK-LABEL: Pass Arguments:
; CHECK-NEXT: Target Library Information
+; CHECK-NEXT: Runtime Library Function Analysis
; CHECK-NEXT: Target Pass Configuration
; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Target Transform Information
+; CHECK-NEXT: Library Function Lowering Analysis
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Profile summary info
diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
new file mode 100644
index 0000000..36670fa
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/expandmemcmp-optsize.ll
@@ -0,0 +1,2239 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL
+; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL
+; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL
+; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL
+
+declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly
+declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly
+
+define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_0:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $a2, $zero
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_0:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $a2, $zero
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_1:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_1:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_1:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 1
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_1:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 1
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_2:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_2:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_2:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 2
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_2:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 2
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_3:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a2, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a3, $a1, 0
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 2
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 2
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_3:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a2, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a3, $a1, 0
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 2
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 2
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_3:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 3
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_3:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 3
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_4:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_4:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_4:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_4:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_5:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_5:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_5:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 5
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_5:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 5
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_6:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_6:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_6:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 6
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_6:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 6
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_7:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a0, $a0, 3
+; LA32-UAL-NEXT: ld.w $a1, $a1, 3
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_7:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.w $a0, $a0, 3
+; LA64-UAL-NEXT: ld.w $a1, $a1, 3
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_7:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 7
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_7:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 7
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_8:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a0, $a0, 4
+; LA32-UAL-NEXT: ld.w $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_8:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a0, $a0, 0
+; LA64-UAL-NEXT: ld.d $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_8:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 8
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_8:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 8
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_15:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 11
+; LA32-UAL-NEXT: ld.w $a1, $a1, 11
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_15:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 7
+; LA64-UAL-NEXT: ld.d $a1, $a1, 7
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_15:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 15
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_15:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 15
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_size_16:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_16:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_16:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_16:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_31:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 31
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_31:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $a0, $a0, 23
+; LA64-UAL-NEXT: ld.d $a1, $a1, 23
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a0, $a4, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_31:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 31
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 32
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_32:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $a0, $a0, 24
+; LA64-UAL-NEXT: ld.d $a1, $a1, 24
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a0, $a4, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_32:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 32
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_63:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 63
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_63:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 63
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 64
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 64
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_127:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 127
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_127:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 127
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: bcmp_size_128:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 128
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_128:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 128
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize {
+; LA32-LABEL: bcmp_size_runtime:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_runtime:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len)
+ ret i32 %bcmp
+}
+
+define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp eq i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_lt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: srli.w $a0, $a0, 31
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_lt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: bcmp_gt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_gt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_gt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: slt $a0, $zero, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_gt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slt $a0, $zero, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind optsize {
+; CHECK-LABEL: memcmp_size_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: move $a0, $zero
+; CHECK-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_1:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_1:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_1:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 1
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_1:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 1
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_2:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 8
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 8
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 4095
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: and $a1, $a1, $a2
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_2:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.h $a0, $a0, 0
+; LA64-UAL-NEXT: ld.h $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2h $a0, $a0
+; LA64-UAL-NEXT: revb.2h $a1, $a1
+; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0
+; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_2:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 2
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_2:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 2
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_3:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a2, $a0, 2
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a3, $a1, 2
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: lu12i.w $a4, 15
+; LA32-UAL-NEXT: ori $a4, $a4, 3840
+; LA32-UAL-NEXT: and $a5, $a0, $a4
+; LA32-UAL-NEXT: or $a2, $a5, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: and $a2, $a1, $a4
+; LA32-UAL-NEXT: or $a2, $a2, $a3
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a2, $a1
+; LA32-UAL-NEXT: sltu $a2, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a2
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_3:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a2, $a0, 2
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a3, $a1, 2
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 16
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 16
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_3:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 3
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_3:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 3
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_4:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a2, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a2
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_4:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_4:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_4:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_5:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: srli.w $a4, $a2, 8
+; LA32-UAL-NEXT: lu12i.w $a5, 15
+; LA32-UAL-NEXT: ori $a5, $a5, 3840
+; LA32-UAL-NEXT: and $a4, $a4, $a5
+; LA32-UAL-NEXT: srli.w $a6, $a2, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: and $a6, $a2, $a5
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a2, $a2, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a6
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: srli.w $a4, $a3, 8
+; LA32-UAL-NEXT: and $a4, $a4, $a5
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: and $a5, $a3, $a5
+; LA32-UAL-NEXT: slli.w $a5, $a5, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: bne $a2, $a3, .LBB26_2
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB26_2: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a2, $a3
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_5:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a2, $a0, 4
+; LA64-UAL-NEXT: ld.wu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a3, $a1, 4
+; LA64-UAL-NEXT: ld.wu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 32
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 32
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_5:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 5
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_5:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 5
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_6:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a6, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a6
+; LA32-UAL-NEXT: srli.w $a7, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a7
+; LA32-UAL-NEXT: and $a7, $a3, $a6
+; LA32-UAL-NEXT: slli.w $a7, $a7, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a7
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a6
+; LA32-UAL-NEXT: srli.w $a7, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a7
+; LA32-UAL-NEXT: and $a6, $a4, $a6
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 8
+; LA32-UAL-NEXT: or $a0, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a3, $a1, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 8
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: ori $a2, $a2, 4095
+; LA32-UAL-NEXT: and $a3, $a0, $a2
+; LA32-UAL-NEXT: and $a4, $a1, $a2
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB27_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB27_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_6:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a2, $a0, 4
+; LA64-UAL-NEXT: ld.wu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a3, $a1, 4
+; LA64-UAL-NEXT: ld.wu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 32
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 32
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_6:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 6
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_6:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 6
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_7:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a0, $a0, 3
+; LA32-UAL-NEXT: ld.w $a1, $a1, 3
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB28_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB28_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_7:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a2, $a2
+; LA64-UAL-NEXT: addi.w $a4, $a2, 0
+; LA64-UAL-NEXT: revb.2w $a3, $a3
+; LA64-UAL-NEXT: addi.w $a5, $a3, 0
+; LA64-UAL-NEXT: bne $a4, $a5, .LBB28_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.w $a0, $a0, 3
+; LA64-UAL-NEXT: ld.w $a1, $a1, 3
+; LA64-UAL-NEXT: revb.2w $a2, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a2, 0
+; LA64-UAL-NEXT: revb.2w $a3, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a3, 0
+; LA64-UAL-NEXT: bne $a0, $a1, .LBB28_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB28_3: # %res_block
+; LA64-UAL-NEXT: addi.w $a0, $a3, 0
+; LA64-UAL-NEXT: addi.w $a1, $a2, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_7:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 7
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_7:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 7
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_8:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a0, $a0, 4
+; LA32-UAL-NEXT: ld.w $a1, $a1, 4
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB29_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_8:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a0, $a0, 0
+; LA64-UAL-NEXT: ld.d $a1, $a1, 0
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_8:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 8
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_8:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 8
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_15:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a0, $a0, 11
+; LA32-UAL-NEXT: ld.w $a1, $a1, 11
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_5
+; LA32-UAL-NEXT: # %bb.4:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB30_5: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_15:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a0, $a0, 7
+; LA64-UAL-NEXT: ld.d $a1, $a1, 7
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB30_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB30_3: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_15:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 15
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_15:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 15
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_size_16:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_5
+; LA32-UAL-NEXT: # %bb.4:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB31_5: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_16:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB31_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB31_3: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_16:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_16:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_31:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 31
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_31:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a0, $a0, 23
+; LA64-UAL-NEXT: ld.d $a1, $a1, 23
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_5
+; LA64-UAL-NEXT: # %bb.4:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB32_5: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_31:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 31
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 32
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_32:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a0, $a0, 24
+; LA64-UAL-NEXT: ld.d $a1, $a1, 24
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_5
+; LA64-UAL-NEXT: # %bb.4:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB33_5: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_32:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 32
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_63:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 63
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_63:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 63
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 64
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 64
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_127:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 127
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_127:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 127
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-LABEL: memcmp_size_128:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 128
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_128:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 128
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind optsize {
+; LA32-LABEL: memcmp_size_runtime:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_runtime:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len)
+ ret i32 %memcmp
+}
+
+define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: ld.bu $a2, $a1, 1
+; LA32-NUAL-NEXT: ld.bu $a3, $a1, 0
+; LA32-NUAL-NEXT: ld.bu $a4, $a1, 2
+; LA32-NUAL-NEXT: ld.bu $a1, $a1, 3
+; LA32-NUAL-NEXT: slli.w $a2, $a2, 8
+; LA32-NUAL-NEXT: or $a2, $a2, $a3
+; LA32-NUAL-NEXT: slli.w $a3, $a4, 16
+; LA32-NUAL-NEXT: slli.w $a1, $a1, 24
+; LA32-NUAL-NEXT: or $a1, $a1, $a3
+; LA32-NUAL-NEXT: or $a1, $a1, $a2
+; LA32-NUAL-NEXT: ld.bu $a2, $a0, 1
+; LA32-NUAL-NEXT: ld.bu $a3, $a0, 0
+; LA32-NUAL-NEXT: ld.bu $a4, $a0, 2
+; LA32-NUAL-NEXT: ld.bu $a0, $a0, 3
+; LA32-NUAL-NEXT: slli.w $a2, $a2, 8
+; LA32-NUAL-NEXT: or $a2, $a2, $a3
+; LA32-NUAL-NEXT: slli.w $a3, $a4, 16
+; LA32-NUAL-NEXT: slli.w $a0, $a0, 24
+; LA32-NUAL-NEXT: or $a0, $a0, $a3
+; LA32-NUAL-NEXT: or $a0, $a0, $a2
+; LA32-NUAL-NEXT: xor $a0, $a0, $a1
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: ld.bu $a2, $a1, 1
+; LA64-NUAL-NEXT: ld.bu $a3, $a1, 0
+; LA64-NUAL-NEXT: ld.bu $a4, $a1, 2
+; LA64-NUAL-NEXT: ld.b $a1, $a1, 3
+; LA64-NUAL-NEXT: slli.d $a2, $a2, 8
+; LA64-NUAL-NEXT: or $a2, $a2, $a3
+; LA64-NUAL-NEXT: slli.d $a3, $a4, 16
+; LA64-NUAL-NEXT: slli.d $a1, $a1, 24
+; LA64-NUAL-NEXT: or $a1, $a1, $a3
+; LA64-NUAL-NEXT: or $a1, $a1, $a2
+; LA64-NUAL-NEXT: ld.bu $a2, $a0, 1
+; LA64-NUAL-NEXT: ld.bu $a3, $a0, 0
+; LA64-NUAL-NEXT: ld.bu $a4, $a0, 2
+; LA64-NUAL-NEXT: ld.b $a0, $a0, 3
+; LA64-NUAL-NEXT: slli.d $a2, $a2, 8
+; LA64-NUAL-NEXT: or $a2, $a2, $a3
+; LA64-NUAL-NEXT: slli.d $a3, $a4, 16
+; LA64-NUAL-NEXT: slli.d $a0, $a0, 24
+; LA64-NUAL-NEXT: or $a0, $a0, $a3
+; LA64-NUAL-NEXT: or $a0, $a0, $a2
+; LA64-NUAL-NEXT: xor $a0, $a0, $a1
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp eq i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_lt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: srli.w $a0, $a0, 31
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_lt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind optsize {
+; LA32-UAL-LABEL: memcmp_gt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_gt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_gt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: slt $a0, $zero, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_gt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slt $a0, $zero, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %memcmp, 0
+ ret i1 %ret
+}
diff --git a/llvm/test/CodeGen/LoongArch/expandmemcmp.ll b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll
new file mode 100644
index 0000000..c1bf850
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/expandmemcmp.ll
@@ -0,0 +1,3106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=+ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-UAL
+; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=+ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-UAL
+; RUN: sed 's/iGRLen/i32/g' %s | llc --mtriple=loongarch32 --mattr=-ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA32,LA32-NUAL
+; RUN: sed 's/iGRLen/i64/g' %s | llc --mtriple=loongarch64 --mattr=-ual \
+; RUN: | FileCheck %s --check-prefixes=CHECK,LA64,LA64-NUAL
+
+declare signext i32 @bcmp(ptr, ptr, iGRLen) nounwind readonly
+declare signext i32 @memcmp(ptr, ptr, iGRLen) nounwind readonly
+
+define signext i32 @bcmp_size_0(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_0:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $a2, $zero
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_0:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $a2, $zero
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 0)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_1(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_1:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_1:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_1:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 1
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_1:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 1
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 1)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_2(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_2:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_2:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_2:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 2
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_2:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 2
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 2)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_3(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_3:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a2, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a3, $a1, 0
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 2
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 2
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_3:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a2, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a3, $a1, 0
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 2
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 2
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_3:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 3
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_3:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 3
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 3)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_4(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_4:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_4:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_4:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_4:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_5:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_5:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_5:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 5
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_5:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 5
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 5)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_6:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_6:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_6:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 6
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_6:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 6
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 6)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_7:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a0, $a0, 3
+; LA32-UAL-NEXT: ld.w $a1, $a1, 3
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_7:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: ld.w $a0, $a0, 3
+; LA64-UAL-NEXT: ld.w $a1, $a1, 3
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_7:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 7
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_7:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 7
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 7)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_8(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_8:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a0, $a0, 4
+; LA32-UAL-NEXT: ld.w $a1, $a1, 4
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_8:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a0, $a0, 0
+; LA64-UAL-NEXT: ld.d $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_8:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 8
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_8:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 8
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 8)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_15:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 11
+; LA32-UAL-NEXT: ld.w $a1, $a1, 11
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_15:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 7
+; LA64-UAL-NEXT: ld.d $a1, $a1, 7
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_15:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 15
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_15:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 15
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 15)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_16(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_16:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_16:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_16:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_16:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_31:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $t0, $a0, 12
+; LA32-UAL-NEXT: ld.w $t1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a5, $t0, $t1
+; LA32-UAL-NEXT: ld.w $a6, $a0, 16
+; LA32-UAL-NEXT: ld.w $a7, $a1, 16
+; LA32-UAL-NEXT: ld.w $t0, $a0, 20
+; LA32-UAL-NEXT: ld.w $t1, $a1, 20
+; LA32-UAL-NEXT: ld.w $t2, $a0, 24
+; LA32-UAL-NEXT: ld.w $t3, $a1, 24
+; LA32-UAL-NEXT: ld.w $a0, $a0, 27
+; LA32-UAL-NEXT: ld.w $a1, $a1, 27
+; LA32-UAL-NEXT: xor $a6, $a6, $a7
+; LA32-UAL-NEXT: xor $a7, $t0, $t1
+; LA32-UAL-NEXT: xor $t0, $t2, $t3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a2, $a4, $a5
+; LA32-UAL-NEXT: or $a3, $a6, $a7
+; LA32-UAL-NEXT: or $a0, $t0, $a0
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a0, $a3, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_31:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $a0, $a0, 23
+; LA64-UAL-NEXT: ld.d $a1, $a1, 23
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a0, $a4, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_31:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 31
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_31:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 31
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 31)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_32(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_size_32:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $t0, $a0, 12
+; LA32-UAL-NEXT: ld.w $t1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a5, $t0, $t1
+; LA32-UAL-NEXT: ld.w $a6, $a0, 16
+; LA32-UAL-NEXT: ld.w $a7, $a1, 16
+; LA32-UAL-NEXT: ld.w $t0, $a0, 20
+; LA32-UAL-NEXT: ld.w $t1, $a1, 20
+; LA32-UAL-NEXT: ld.w $t2, $a0, 24
+; LA32-UAL-NEXT: ld.w $t3, $a1, 24
+; LA32-UAL-NEXT: ld.w $a0, $a0, 28
+; LA32-UAL-NEXT: ld.w $a1, $a1, 28
+; LA32-UAL-NEXT: xor $a6, $a6, $a7
+; LA32-UAL-NEXT: xor $a7, $t0, $t1
+; LA32-UAL-NEXT: xor $t0, $t2, $t3
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a2, $a4, $a5
+; LA32-UAL-NEXT: or $a3, $a6, $a7
+; LA32-UAL-NEXT: or $a0, $t0, $a0
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a0, $a3, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_32:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $a0, $a0, 24
+; LA64-UAL-NEXT: ld.d $a1, $a1, 24
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a0, $a4, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_size_32:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 32
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_32:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 32
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 32)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_63:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 63
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_63:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $t0, $a0, 24
+; LA64-UAL-NEXT: ld.d $t1, $a1, 24
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a5, $t0, $t1
+; LA64-UAL-NEXT: ld.d $a6, $a0, 32
+; LA64-UAL-NEXT: ld.d $a7, $a1, 32
+; LA64-UAL-NEXT: ld.d $t0, $a0, 40
+; LA64-UAL-NEXT: ld.d $t1, $a1, 40
+; LA64-UAL-NEXT: ld.d $t2, $a0, 48
+; LA64-UAL-NEXT: ld.d $t3, $a1, 48
+; LA64-UAL-NEXT: ld.d $a0, $a0, 55
+; LA64-UAL-NEXT: ld.d $a1, $a1, 55
+; LA64-UAL-NEXT: xor $a6, $a6, $a7
+; LA64-UAL-NEXT: xor $a7, $t0, $t1
+; LA64-UAL-NEXT: xor $t0, $t2, $t3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a2, $a4, $a5
+; LA64-UAL-NEXT: or $a3, $a6, $a7
+; LA64-UAL-NEXT: or $a0, $t0, $a0
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: or $a0, $a3, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_63:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 63
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 63)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_64(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 64
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_size_64:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a4, $a0, 8
+; LA64-UAL-NEXT: ld.d $a5, $a1, 8
+; LA64-UAL-NEXT: ld.d $a6, $a0, 16
+; LA64-UAL-NEXT: ld.d $a7, $a1, 16
+; LA64-UAL-NEXT: ld.d $t0, $a0, 24
+; LA64-UAL-NEXT: ld.d $t1, $a1, 24
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a3, $a4, $a5
+; LA64-UAL-NEXT: xor $a4, $a6, $a7
+; LA64-UAL-NEXT: xor $a5, $t0, $t1
+; LA64-UAL-NEXT: ld.d $a6, $a0, 32
+; LA64-UAL-NEXT: ld.d $a7, $a1, 32
+; LA64-UAL-NEXT: ld.d $t0, $a0, 40
+; LA64-UAL-NEXT: ld.d $t1, $a1, 40
+; LA64-UAL-NEXT: ld.d $t2, $a0, 48
+; LA64-UAL-NEXT: ld.d $t3, $a1, 48
+; LA64-UAL-NEXT: ld.d $a0, $a0, 56
+; LA64-UAL-NEXT: ld.d $a1, $a1, 56
+; LA64-UAL-NEXT: xor $a6, $a6, $a7
+; LA64-UAL-NEXT: xor $a7, $t0, $t1
+; LA64-UAL-NEXT: xor $t0, $t2, $t3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a1, $a2, $a3
+; LA64-UAL-NEXT: or $a2, $a4, $a5
+; LA64-UAL-NEXT: or $a3, $a6, $a7
+; LA64-UAL-NEXT: or $a0, $t0, $a0
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: or $a0, $a3, $a0
+; LA64-UAL-NEXT: or $a0, $a1, $a0
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_size_64:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 64
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 64)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_127:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 127
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_127:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 127
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 127)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_128(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: bcmp_size_128:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 128
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_128:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 128
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 128)
+ ret i32 %bcmp
+}
+
+define signext i32 @bcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind {
+; LA32-LABEL: bcmp_size_runtime:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl bcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: bcmp_size_runtime:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen %len)
+ ret i32 %bcmp
+}
+
+define i1 @bcmp_eq_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 16)
+ %ret = icmp eq i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_lt_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_lt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: srli.w $a0, $a0, 31
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_lt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_gt_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_gt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_gt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_gt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: slt $a0, $zero, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_gt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slt $a0, $zero, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %bcmp, 0
+ ret i1 %ret
+}
+
+define i1 @bcmp_le_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_le_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $zero, $a0
+; LA32-UAL-NEXT: slti $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_le_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $zero, $a0
+; LA64-UAL-NEXT: slti $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_le_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: slti $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_le_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %bcmp, 1
+ ret i1 %ret
+}
+
+define i1 @bcmp_ge_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: bcmp_ge_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ori $a0, $zero, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: bcmp_ge_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ori $a0, $zero, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: bcmp_ge_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl bcmp
+; LA32-NUAL-NEXT: addi.w $a1, $zero, -1
+; LA32-NUAL-NEXT: slt $a0, $a1, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: bcmp_ge_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(bcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: addi.w $a1, $zero, -1
+; LA64-NUAL-NEXT: slt $a0, $a1, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %bcmp, -1
+ ret i1 %ret
+}
+
+define signext i32 @memcmp_size_0(ptr %s1, ptr %s2) nounwind {
+; CHECK-LABEL: memcmp_size_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: move $a0, $zero
+; CHECK-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 0)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_1(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_1:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_1:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a1, $a1, 0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_1:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 1
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_1:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 1
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 1)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_2(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_2:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 8
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 8
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 4095
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: and $a1, $a1, $a2
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_2:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.h $a0, $a0, 0
+; LA64-UAL-NEXT: ld.h $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2h $a0, $a0
+; LA64-UAL-NEXT: revb.2h $a1, $a1
+; LA64-UAL-NEXT: bstrpick.d $a0, $a0, 15, 0
+; LA64-UAL-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_2:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 2
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_2:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 2
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 2)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_3:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.bu $a2, $a0, 2
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA32-UAL-NEXT: ld.bu $a3, $a1, 2
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA32-UAL-NEXT: lu12i.w $a4, 15
+; LA32-UAL-NEXT: ori $a4, $a4, 3840
+; LA32-UAL-NEXT: and $a5, $a0, $a4
+; LA32-UAL-NEXT: or $a2, $a5, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a2, $a0
+; LA32-UAL-NEXT: and $a2, $a1, $a4
+; LA32-UAL-NEXT: or $a2, $a2, $a3
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a2, $a1
+; LA32-UAL-NEXT: sltu $a2, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a2
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_3:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a2, $a0, 2
+; LA64-UAL-NEXT: ld.hu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a3, $a1, 2
+; LA64-UAL-NEXT: ld.hu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 16
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 16
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_3:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 3
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_3:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 3
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 3)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_4:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a2, $a0, $a1
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a2
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_4:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_4:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_4:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_5:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: srli.w $a4, $a2, 8
+; LA32-UAL-NEXT: lu12i.w $a5, 15
+; LA32-UAL-NEXT: ori $a5, $a5, 3840
+; LA32-UAL-NEXT: and $a4, $a4, $a5
+; LA32-UAL-NEXT: srli.w $a6, $a2, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: and $a6, $a2, $a5
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a2, $a2, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a6
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: srli.w $a4, $a3, 8
+; LA32-UAL-NEXT: and $a4, $a4, $a5
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: and $a5, $a3, $a5
+; LA32-UAL-NEXT: slli.w $a5, $a5, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: bne $a2, $a3, .LBB28_2
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.bu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.bu $a1, $a1, 4
+; LA32-UAL-NEXT: sub.w $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB28_2: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a2, $a3
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_5:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.bu $a2, $a0, 4
+; LA64-UAL-NEXT: ld.wu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.bu $a3, $a1, 4
+; LA64-UAL-NEXT: ld.wu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 32
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 32
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_5:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 5
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_5:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 5
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 5)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_6:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a6, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a6
+; LA32-UAL-NEXT: srli.w $a7, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a7
+; LA32-UAL-NEXT: and $a7, $a3, $a6
+; LA32-UAL-NEXT: slli.w $a7, $a7, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a7
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a6
+; LA32-UAL-NEXT: srli.w $a7, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a7
+; LA32-UAL-NEXT: and $a6, $a4, $a6
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.hu $a0, $a0, 4
+; LA32-UAL-NEXT: ld.hu $a1, $a1, 4
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 8
+; LA32-UAL-NEXT: or $a0, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a3, $a1, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 8
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: ori $a2, $a2, 4095
+; LA32-UAL-NEXT: and $a3, $a0, $a2
+; LA32-UAL-NEXT: and $a4, $a1, $a2
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB29_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB29_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_6:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.hu $a2, $a0, 4
+; LA64-UAL-NEXT: ld.wu $a0, $a0, 0
+; LA64-UAL-NEXT: ld.hu $a3, $a1, 4
+; LA64-UAL-NEXT: ld.wu $a1, $a1, 0
+; LA64-UAL-NEXT: slli.d $a2, $a2, 32
+; LA64-UAL-NEXT: or $a0, $a0, $a2
+; LA64-UAL-NEXT: slli.d $a2, $a3, 32
+; LA64-UAL-NEXT: or $a1, $a1, $a2
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_6:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 6
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_6:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 6
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 6)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_7(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_7:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a0, $a0, 3
+; LA32-UAL-NEXT: ld.w $a1, $a1, 3
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB30_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB30_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_7:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a2, $a0, 0
+; LA64-UAL-NEXT: ld.w $a3, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a2, $a2
+; LA64-UAL-NEXT: addi.w $a4, $a2, 0
+; LA64-UAL-NEXT: revb.2w $a3, $a3
+; LA64-UAL-NEXT: addi.w $a5, $a3, 0
+; LA64-UAL-NEXT: bne $a4, $a5, .LBB30_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.w $a0, $a0, 3
+; LA64-UAL-NEXT: ld.w $a1, $a1, 3
+; LA64-UAL-NEXT: revb.2w $a2, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a2, 0
+; LA64-UAL-NEXT: revb.2w $a3, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a3, 0
+; LA64-UAL-NEXT: bne $a0, $a1, .LBB30_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB30_3: # %res_block
+; LA64-UAL-NEXT: addi.w $a0, $a3, 0
+; LA64-UAL-NEXT: addi.w $a1, $a2, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_7:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 7
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_7:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 7
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 7)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_8(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_8:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a0, $a0, 4
+; LA32-UAL-NEXT: ld.w $a1, $a1, 4
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB31_3
+; LA32-UAL-NEXT: # %bb.2:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB31_3: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_8:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a0, $a0, 0
+; LA64-UAL-NEXT: ld.d $a1, $a1, 0
+; LA64-UAL-NEXT: revb.d $a0, $a0
+; LA64-UAL-NEXT: revb.d $a1, $a1
+; LA64-UAL-NEXT: sltu $a2, $a0, $a1
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: sub.d $a0, $a0, $a2
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_8:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 8
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_8:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 8
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 8)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_15(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_15:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a0, $a0, 11
+; LA32-UAL-NEXT: ld.w $a1, $a1, 11
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB32_5
+; LA32-UAL-NEXT: # %bb.4:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB32_5: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_15:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a0, $a0, 7
+; LA64-UAL-NEXT: ld.d $a1, $a1, 7
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB32_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB32_3: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_15:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 15
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_15:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 15
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 15)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_16(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_16:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB33_5
+; LA32-UAL-NEXT: # %bb.4:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB33_5: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_16:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB33_3
+; LA64-UAL-NEXT: # %bb.2:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB33_3: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_16:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_16:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_31(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_31:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a3, $a0, 12
+; LA32-UAL-NEXT: ld.w $a4, $a1, 12
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.4: # %loadbb4
+; LA32-UAL-NEXT: ld.w $a3, $a0, 16
+; LA32-UAL-NEXT: ld.w $a4, $a1, 16
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.5: # %loadbb5
+; LA32-UAL-NEXT: ld.w $a3, $a0, 20
+; LA32-UAL-NEXT: ld.w $a4, $a1, 20
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.6: # %loadbb6
+; LA32-UAL-NEXT: ld.w $a3, $a0, 24
+; LA32-UAL-NEXT: ld.w $a4, $a1, 24
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.7: # %loadbb7
+; LA32-UAL-NEXT: ld.w $a0, $a0, 27
+; LA32-UAL-NEXT: ld.w $a1, $a1, 27
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB34_9
+; LA32-UAL-NEXT: # %bb.8:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB34_9: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_31:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a0, $a0, 23
+; LA64-UAL-NEXT: ld.d $a1, $a1, 23
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB34_5
+; LA64-UAL-NEXT: # %bb.4:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB34_5: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_31:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 31
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_31:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 31
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 31)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_32(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_size_32:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a3, $a0, 0
+; LA32-UAL-NEXT: ld.w $a4, $a1, 0
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: lu12i.w $a2, 15
+; LA32-UAL-NEXT: ori $a2, $a2, 3840
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.1: # %loadbb1
+; LA32-UAL-NEXT: ld.w $a3, $a0, 4
+; LA32-UAL-NEXT: ld.w $a4, $a1, 4
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.2: # %loadbb2
+; LA32-UAL-NEXT: ld.w $a3, $a0, 8
+; LA32-UAL-NEXT: ld.w $a4, $a1, 8
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.3: # %loadbb3
+; LA32-UAL-NEXT: ld.w $a3, $a0, 12
+; LA32-UAL-NEXT: ld.w $a4, $a1, 12
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.4: # %loadbb4
+; LA32-UAL-NEXT: ld.w $a3, $a0, 16
+; LA32-UAL-NEXT: ld.w $a4, $a1, 16
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.5: # %loadbb5
+; LA32-UAL-NEXT: ld.w $a3, $a0, 20
+; LA32-UAL-NEXT: ld.w $a4, $a1, 20
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.6: # %loadbb6
+; LA32-UAL-NEXT: ld.w $a3, $a0, 24
+; LA32-UAL-NEXT: ld.w $a4, $a1, 24
+; LA32-UAL-NEXT: srli.w $a5, $a3, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a3, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a3, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a3, $a3, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a6
+; LA32-UAL-NEXT: or $a3, $a3, $a5
+; LA32-UAL-NEXT: srli.w $a5, $a4, 8
+; LA32-UAL-NEXT: and $a5, $a5, $a2
+; LA32-UAL-NEXT: srli.w $a6, $a4, 24
+; LA32-UAL-NEXT: or $a5, $a5, $a6
+; LA32-UAL-NEXT: and $a6, $a4, $a2
+; LA32-UAL-NEXT: slli.w $a6, $a6, 8
+; LA32-UAL-NEXT: slli.w $a4, $a4, 24
+; LA32-UAL-NEXT: or $a4, $a4, $a6
+; LA32-UAL-NEXT: or $a4, $a4, $a5
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.7: # %loadbb7
+; LA32-UAL-NEXT: ld.w $a0, $a0, 28
+; LA32-UAL-NEXT: ld.w $a1, $a1, 28
+; LA32-UAL-NEXT: srli.w $a3, $a0, 8
+; LA32-UAL-NEXT: and $a3, $a3, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a3, $a3, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a2
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a3, $a0, $a3
+; LA32-UAL-NEXT: srli.w $a0, $a1, 8
+; LA32-UAL-NEXT: and $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: and $a2, $a1, $a2
+; LA32-UAL-NEXT: slli.w $a2, $a2, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: or $a4, $a1, $a0
+; LA32-UAL-NEXT: bne $a3, $a4, .LBB35_9
+; LA32-UAL-NEXT: # %bb.8:
+; LA32-UAL-NEXT: move $a0, $zero
+; LA32-UAL-NEXT: ret
+; LA32-UAL-NEXT: .LBB35_9: # %res_block
+; LA32-UAL-NEXT: sltu $a0, $a3, $a4
+; LA32-UAL-NEXT: sub.w $a0, $zero, $a0
+; LA32-UAL-NEXT: ori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_32:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a0, $a0, 24
+; LA64-UAL-NEXT: ld.d $a1, $a1, 24
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB35_5
+; LA64-UAL-NEXT: # %bb.4:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB35_5: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_size_32:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 32
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_32:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 32
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 32)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_63(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: memcmp_size_63:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 63
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_63:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a2, $a0, 24
+; LA64-UAL-NEXT: ld.d $a3, $a1, 24
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.4: # %loadbb4
+; LA64-UAL-NEXT: ld.d $a2, $a0, 32
+; LA64-UAL-NEXT: ld.d $a3, $a1, 32
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.5: # %loadbb5
+; LA64-UAL-NEXT: ld.d $a2, $a0, 40
+; LA64-UAL-NEXT: ld.d $a3, $a1, 40
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.6: # %loadbb6
+; LA64-UAL-NEXT: ld.d $a2, $a0, 48
+; LA64-UAL-NEXT: ld.d $a3, $a1, 48
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.7: # %loadbb7
+; LA64-UAL-NEXT: ld.d $a0, $a0, 55
+; LA64-UAL-NEXT: ld.d $a1, $a1, 55
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB36_9
+; LA64-UAL-NEXT: # %bb.8:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB36_9: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_63:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 63
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 63)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_64(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: memcmp_size_64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 64
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_size_64:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.1: # %loadbb1
+; LA64-UAL-NEXT: ld.d $a2, $a0, 8
+; LA64-UAL-NEXT: ld.d $a3, $a1, 8
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.2: # %loadbb2
+; LA64-UAL-NEXT: ld.d $a2, $a0, 16
+; LA64-UAL-NEXT: ld.d $a3, $a1, 16
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.3: # %loadbb3
+; LA64-UAL-NEXT: ld.d $a2, $a0, 24
+; LA64-UAL-NEXT: ld.d $a3, $a1, 24
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.4: # %loadbb4
+; LA64-UAL-NEXT: ld.d $a2, $a0, 32
+; LA64-UAL-NEXT: ld.d $a3, $a1, 32
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.5: # %loadbb5
+; LA64-UAL-NEXT: ld.d $a2, $a0, 40
+; LA64-UAL-NEXT: ld.d $a3, $a1, 40
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.6: # %loadbb6
+; LA64-UAL-NEXT: ld.d $a2, $a0, 48
+; LA64-UAL-NEXT: ld.d $a3, $a1, 48
+; LA64-UAL-NEXT: revb.d $a2, $a2
+; LA64-UAL-NEXT: revb.d $a3, $a3
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.7: # %loadbb7
+; LA64-UAL-NEXT: ld.d $a0, $a0, 56
+; LA64-UAL-NEXT: ld.d $a1, $a1, 56
+; LA64-UAL-NEXT: revb.d $a2, $a0
+; LA64-UAL-NEXT: revb.d $a3, $a1
+; LA64-UAL-NEXT: bne $a2, $a3, .LBB37_9
+; LA64-UAL-NEXT: # %bb.8:
+; LA64-UAL-NEXT: move $a0, $zero
+; LA64-UAL-NEXT: ret
+; LA64-UAL-NEXT: .LBB37_9: # %res_block
+; LA64-UAL-NEXT: sltu $a0, $a2, $a3
+; LA64-UAL-NEXT: sub.d $a0, $zero, $a0
+; LA64-UAL-NEXT: ori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_size_64:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 64
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 64)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_127(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: memcmp_size_127:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 127
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_127:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 127
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 127)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_128(ptr %s1, ptr %s2) nounwind {
+; LA32-LABEL: memcmp_size_128:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: ori $a2, $zero, 128
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_128:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: ori $a2, $zero, 128
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 128)
+ ret i32 %memcmp
+}
+
+define signext i32 @memcmp_size_runtime(ptr %s1, ptr %s2, iGRLen %len) nounwind {
+; LA32-LABEL: memcmp_size_runtime:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl memcmp
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: memcmp_size_runtime:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen %len)
+ ret i32 %memcmp
+}
+
+define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_eq_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a2, $a0, 0
+; LA32-UAL-NEXT: ld.w $a3, $a1, 0
+; LA32-UAL-NEXT: ld.w $a4, $a0, 4
+; LA32-UAL-NEXT: ld.w $a5, $a1, 4
+; LA32-UAL-NEXT: ld.w $a6, $a0, 8
+; LA32-UAL-NEXT: ld.w $a7, $a1, 8
+; LA32-UAL-NEXT: ld.w $a0, $a0, 12
+; LA32-UAL-NEXT: ld.w $a1, $a1, 12
+; LA32-UAL-NEXT: xor $a2, $a2, $a3
+; LA32-UAL-NEXT: xor $a3, $a4, $a5
+; LA32-UAL-NEXT: xor $a4, $a6, $a7
+; LA32-UAL-NEXT: xor $a0, $a0, $a1
+; LA32-UAL-NEXT: or $a1, $a2, $a3
+; LA32-UAL-NEXT: or $a0, $a4, $a0
+; LA32-UAL-NEXT: or $a0, $a1, $a0
+; LA32-UAL-NEXT: sltui $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_eq_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.d $a2, $a0, 0
+; LA64-UAL-NEXT: ld.d $a3, $a1, 0
+; LA64-UAL-NEXT: ld.d $a0, $a0, 8
+; LA64-UAL-NEXT: ld.d $a1, $a1, 8
+; LA64-UAL-NEXT: xor $a2, $a2, $a3
+; LA64-UAL-NEXT: xor $a0, $a0, $a1
+; LA64-UAL-NEXT: or $a0, $a2, $a0
+; LA64-UAL-NEXT: sltui $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_eq_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 16
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: sltui $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_eq_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 16
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: sltui $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 16)
+ %ret = icmp eq i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_lt_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_lt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a0, $a1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_lt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a0, $a1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_lt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: srli.w $a0, $a0, 31
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_lt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_gt_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_gt_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_gt_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_gt_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: slt $a0, $zero, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_gt_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slt $a0, $zero, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %memcmp, 0
+ ret i1 %ret
+}
+
+define i1 @memcmp_le_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_le_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a1, $a0
+; LA32-UAL-NEXT: xori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_le_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a1, $a0
+; LA64-UAL-NEXT: xori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_le_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: slti $a0, $a0, 1
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_le_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: slti $a0, $a0, 1
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp slt i32 %memcmp, 1
+ ret i1 %ret
+}
+
+define i1 @memcmp_ge_zero(ptr %s1, ptr %s2) nounwind {
+; LA32-UAL-LABEL: memcmp_ge_zero:
+; LA32-UAL: # %bb.0: # %entry
+; LA32-UAL-NEXT: ld.w $a0, $a0, 0
+; LA32-UAL-NEXT: ld.w $a1, $a1, 0
+; LA32-UAL-NEXT: srli.w $a2, $a0, 8
+; LA32-UAL-NEXT: lu12i.w $a3, 15
+; LA32-UAL-NEXT: ori $a3, $a3, 3840
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a0, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a4, $a0, $a3
+; LA32-UAL-NEXT: slli.w $a4, $a4, 8
+; LA32-UAL-NEXT: slli.w $a0, $a0, 24
+; LA32-UAL-NEXT: or $a0, $a0, $a4
+; LA32-UAL-NEXT: or $a0, $a0, $a2
+; LA32-UAL-NEXT: srli.w $a2, $a1, 8
+; LA32-UAL-NEXT: and $a2, $a2, $a3
+; LA32-UAL-NEXT: srli.w $a4, $a1, 24
+; LA32-UAL-NEXT: or $a2, $a2, $a4
+; LA32-UAL-NEXT: and $a3, $a1, $a3
+; LA32-UAL-NEXT: slli.w $a3, $a3, 8
+; LA32-UAL-NEXT: slli.w $a1, $a1, 24
+; LA32-UAL-NEXT: or $a1, $a1, $a3
+; LA32-UAL-NEXT: or $a1, $a1, $a2
+; LA32-UAL-NEXT: sltu $a0, $a0, $a1
+; LA32-UAL-NEXT: xori $a0, $a0, 1
+; LA32-UAL-NEXT: ret
+;
+; LA64-UAL-LABEL: memcmp_ge_zero:
+; LA64-UAL: # %bb.0: # %entry
+; LA64-UAL-NEXT: ld.w $a0, $a0, 0
+; LA64-UAL-NEXT: ld.w $a1, $a1, 0
+; LA64-UAL-NEXT: revb.2w $a0, $a0
+; LA64-UAL-NEXT: addi.w $a0, $a0, 0
+; LA64-UAL-NEXT: revb.2w $a1, $a1
+; LA64-UAL-NEXT: addi.w $a1, $a1, 0
+; LA64-UAL-NEXT: sltu $a0, $a0, $a1
+; LA64-UAL-NEXT: xori $a0, $a0, 1
+; LA64-UAL-NEXT: ret
+;
+; LA32-NUAL-LABEL: memcmp_ge_zero:
+; LA32-NUAL: # %bb.0: # %entry
+; LA32-NUAL-NEXT: addi.w $sp, $sp, -16
+; LA32-NUAL-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NUAL-NEXT: ori $a2, $zero, 4
+; LA32-NUAL-NEXT: bl memcmp
+; LA32-NUAL-NEXT: addi.w $a1, $zero, -1
+; LA32-NUAL-NEXT: slt $a0, $a1, $a0
+; LA32-NUAL-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NUAL-NEXT: addi.w $sp, $sp, 16
+; LA32-NUAL-NEXT: ret
+;
+; LA64-NUAL-LABEL: memcmp_ge_zero:
+; LA64-NUAL: # %bb.0: # %entry
+; LA64-NUAL-NEXT: addi.d $sp, $sp, -16
+; LA64-NUAL-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NUAL-NEXT: ori $a2, $zero, 4
+; LA64-NUAL-NEXT: pcaddu18i $ra, %call36(memcmp)
+; LA64-NUAL-NEXT: jirl $ra, $ra, 0
+; LA64-NUAL-NEXT: addi.w $a1, $zero, -1
+; LA64-NUAL-NEXT: slt $a0, $a1, $a0
+; LA64-NUAL-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NUAL-NEXT: addi.d $sp, $sp, 16
+; LA64-NUAL-NEXT: ret
+entry:
+ %memcmp = call signext i32 @memcmp(ptr %s1, ptr %s2, iGRLen 4)
+ %ret = icmp sgt i32 %memcmp, -1
+ ret i1 %ret
+}
diff --git a/llvm/test/CodeGen/LoongArch/issue163681.ll b/llvm/test/CodeGen/LoongArch/issue163681.ll
new file mode 100644
index 0000000..f6df349
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/issue163681.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch64 -code-model=large --verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+@.str = external constant [1 x i8]
+
+define void @caller(ptr %0) {
+; CHECK-LABEL: caller:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi.d $sp, $sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: ld.w $a2, $zero, 0
+; CHECK-NEXT: ld.d $a1, $a0, 0
+; CHECK-NEXT: beqz $a2, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(.str)
+; CHECK-NEXT: addi.d $a2, $zero, %got_pc_lo12(.str)
+; CHECK-NEXT: lu32i.d $a2, %got64_pc_lo20(.str)
+; CHECK-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(.str)
+; CHECK-NEXT: ldx.d $a2, $a2, $a0
+; CHECK-NEXT: move $a0, $zero
+; CHECK-NEXT: jirl $ra, $zero, 0
+; CHECK-NEXT: b .LBB0_3
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(.str)
+; CHECK-NEXT: addi.d $a2, $zero, %got_pc_lo12(.str)
+; CHECK-NEXT: lu32i.d $a2, %got64_pc_lo20(.str)
+; CHECK-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(.str)
+; CHECK-NEXT: ldx.d $a2, $a2, $a0
+; CHECK-NEXT: move $a0, $zero
+; CHECK-NEXT: move $a3, $zero
+; CHECK-NEXT: jirl $ra, $zero, 0
+; CHECK-NEXT: .LBB0_3:
+; CHECK-NEXT: st.d $zero, $zero, 0
+; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ret
+ %2 = load i32, ptr null, align 4
+ %3 = icmp eq i32 %2, 0
+ %4 = load i64, ptr %0, align 8
+ br i1 %3, label %6, label %5
+
+5: ; preds = %1
+ call void null(ptr null, i64 %4, ptr @.str)
+ br label %7
+
+6: ; preds = %1
+ tail call void null(ptr null, i64 %4, ptr @.str, i32 0)
+ br label %7
+
+7: ; preds = %6, %5
+ store ptr null, ptr null, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
index 6754959..5ed49d9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/and-not-combine.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @and_not_combine_v32i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
; CHECK-LABEL: and_not_combine_v32i8:
@@ -85,3 +85,397 @@ entry:
store <4 x i64> %and, ptr %res
ret void
}
+
+define void @pre_not_and_not_combine_v32i8(ptr %res, ptr %a, i8 %b) nounwind {
+; CHECK-LABEL: pre_not_and_not_combine_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %a
+ %b.not = xor i8 %b, -1
+ %b.not.ele = insertelement <32 x i8> poison, i8 %b.not, i64 0
+ %v1.not = shufflevector <32 x i8> %b.not.ele, <32 x i8> poison, <32 x i32> zeroinitializer
+ %v0.not = xor <32 x i8> %v0, splat (i8 -1)
+ %and = and <32 x i8> %v0.not, %v1.not
+ store <32 x i8> %and, ptr %res
+ ret void
+}
+
+define void @post_not_and_not_combine_v32i8(ptr %res, ptr %a, i8 %b) nounwind {
+; CHECK-LABEL: post_not_and_not_combine_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %a
+ %b.ele = insertelement <32 x i8> poison, i8 %b, i64 0
+ %v1 = shufflevector <32 x i8> %b.ele, <32 x i8> poison, <32 x i32> zeroinitializer
+ %v0.not = xor <32 x i8> %v0, splat (i8 -1)
+ %v1.not = xor <32 x i8> %v1, splat (i8 -1)
+ %and = and <32 x i8> %v0.not, %v1.not
+ store <32 x i8> %and, ptr %res
+ ret void
+}
+
+define void @pre_not_and_not_combine_v16i16(ptr %res, ptr %a, i16 %b) nounwind {
+; CHECK-LABEL: pre_not_and_not_combine_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %a
+ %b.not = xor i16 %b, -1
+ %b.not.ele = insertelement <16 x i16> poison, i16 %b.not, i64 0
+ %v1.not = shufflevector <16 x i16> %b.not.ele, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v0.not = xor <16 x i16> %v0, splat (i16 -1)
+ %and = and <16 x i16> %v0.not, %v1.not
+ store <16 x i16> %and, ptr %res
+ ret void
+}
+
+define void @post_not_and_not_combine_v16i16(ptr %res, ptr %a, i16 %b) nounwind {
+; CHECK-LABEL: post_not_and_not_combine_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %a
+ %b.ele = insertelement <16 x i16> poison, i16 %b, i64 0
+ %v1 = shufflevector <16 x i16> %b.ele, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v0.not = xor <16 x i16> %v0, splat (i16 -1)
+ %v1.not = xor <16 x i16> %v1, splat (i16 -1)
+ %and = and <16 x i16> %v0.not, %v1.not
+ store <16 x i16> %and, ptr %res
+ ret void
+}
+
+define void @pre_not_and_not_combine_v8i32(ptr %res, ptr %a, i32 %b) nounwind {
+; CHECK-LABEL: pre_not_and_not_combine_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %a
+ %b.not = xor i32 %b, -1
+ %b.not.ele = insertelement <8 x i32> poison, i32 %b.not, i64 0
+ %v1.not = shufflevector <8 x i32> %b.not.ele, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v0.not = xor <8 x i32> %v0, splat (i32 -1)
+ %and = and <8 x i32> %v0.not, %v1.not
+ store <8 x i32> %and, ptr %res
+ ret void
+}
+
+define void @post_not_and_not_combine_v8i32(ptr %res, ptr %a, i32 %b) nounwind {
+; CHECK-LABEL: post_not_and_not_combine_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %a
+ %b.ele = insertelement <8 x i32> poison, i32 %b, i64 0
+ %v1 = shufflevector <8 x i32> %b.ele, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v0.not = xor <8 x i32> %v0, splat (i32 -1)
+ %v1.not = xor <8 x i32> %v1, splat (i32 -1)
+ %and = and <8 x i32> %v0.not, %v1.not
+ store <8 x i32> %and, ptr %res
+ ret void
+}
+
+define void @pre_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind {
+; LA32-LABEL: pre_not_and_not_combine_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: nor $a1, $a3, $zero
+; LA32-NEXT: nor $a2, $a2, $zero
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
+; LA32-NEXT: xvreplve0.d $xr1, $xr1
+; LA32-NEXT: xvandn.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: pre_not_and_not_combine_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT: xvnor.v $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x i64>, ptr %a
+ %b.not = xor i64 %b, -1
+ %b.not.ele = insertelement <4 x i64> poison, i64 %b.not, i64 0
+ %v1.not = shufflevector <4 x i64> %b.not.ele, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v0.not = xor <4 x i64> %v0, splat (i64 -1)
+ %and = and <4 x i64> %v0.not, %v1.not
+ store <4 x i64> %and, ptr %res
+ ret void
+}
+
+define void @post_not_and_not_combine_v4i64(ptr %res, ptr %a, i64 %b) nounwind {
+; LA32-LABEL: post_not_and_not_combine_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT: xvreplve0.d $xr1, $xr1
+; LA32-NEXT: xvnor.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: post_not_and_not_combine_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT: xvnor.v $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x i64>, ptr %a
+ %b.ele = insertelement <4 x i64> poison, i64 %b, i64 0
+ %v1 = shufflevector <4 x i64> %b.ele, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v0.not = xor <4 x i64> %v0, splat (i64 -1)
+ %v1.not = xor <4 x i64> %v1, splat (i64 -1)
+ %and = and <4 x i64> %v0.not, %v1.not
+ store <4 x i64> %and, ptr %res
+ ret void
+}
+
+define void @and_not_combine_splatimm_v32i8(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: and_not_combine_splatimm_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvnori.b $xr0, $xr0, 3
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %a0
+ %and = and <32 x i8> %v0, splat (i8 -4)
+ %xor = xor <32 x i8> %and, splat (i8 -4)
+ store <32 x i8> %xor, ptr %res
+ ret void
+}
+
+define void @and_not_combine_splatimm_v16i16(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: and_not_combine_splatimm_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvrepli.h $xr1, -4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %a0
+ %and = and <16 x i16> %v0, splat (i16 -4)
+ %xor = xor <16 x i16> %and, splat (i16 -4)
+ store <16 x i16> %xor, ptr %res
+ ret void
+}
+
+define void @and_not_combine_splatimm_v8i32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: and_not_combine_splatimm_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvrepli.w $xr1, -4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %a0
+ %and = and <8 x i32> %v0, splat (i32 -4)
+ %xor = xor <8 x i32> %and, splat (i32 -4)
+ store <8 x i32> %xor, ptr %res
+ ret void
+}
+
+define void @and_not_combine_splatimm_v4i64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: and_not_combine_splatimm_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvrepli.d $xr1, -4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i64>, ptr %a0
+ %and = and <4 x i64> %v0, splat (i64 -4)
+ %xor = xor <4 x i64> %and, splat (i64 -4)
+ store <4 x i64> %xor, ptr %res
+ ret void
+}
+
+define void @and_or_not_combine_v32i8(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind {
+; CHECK-LABEL: and_or_not_combine_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0
+; CHECK-NEXT: xvseq.b $xr1, $xr1, $xr2
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnori.b $xr0, $xr0, 251
+; CHECK-NEXT: xvst $xr0, $a3, 0
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, ptr %pa
+ %b = load <32 x i8>, ptr %pb
+ %v = load <32 x i8>, ptr %pv
+ %ca = icmp ne <32 x i8> %v, %a
+ %cb = icmp ne <32 x i8> %v, %b
+ %or = or <32 x i1> %ca, %cb
+ %ext = sext <32 x i1> %or to <32 x i8>
+ %and = and <32 x i8> %ext, splat (i8 4)
+ store <32 x i8> %and, ptr %dst
+ ret void
+}
+
+define void @and_or_not_combine_v16i16(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind {
+; CHECK-LABEL: and_or_not_combine_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0
+; CHECK-NEXT: xvseq.h $xr1, $xr1, $xr2
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrepli.h $xr1, 4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a3, 0
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, ptr %pa
+ %b = load <16 x i16>, ptr %pb
+ %v = load <16 x i16>, ptr %pv
+ %ca = icmp ne <16 x i16> %v, %a
+ %cb = icmp ne <16 x i16> %v, %b
+ %or = or <16 x i1> %ca, %cb
+ %ext = sext <16 x i1> %or to <16 x i16>
+ %and = and <16 x i16> %ext, splat (i16 4)
+ store <16 x i16> %and, ptr %dst
+ ret void
+}
+
+define void @and_or_not_combine_v8i32(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind {
+; CHECK-LABEL: and_or_not_combine_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0
+; CHECK-NEXT: xvseq.w $xr1, $xr1, $xr2
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrepli.w $xr1, 4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a3, 0
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, ptr %pa
+ %b = load <8 x i32>, ptr %pb
+ %v = load <8 x i32>, ptr %pv
+ %ca = icmp ne <8 x i32> %v, %a
+ %cb = icmp ne <8 x i32> %v, %b
+ %or = or <8 x i1> %ca, %cb
+ %ext = sext <8 x i1> %or to <8 x i32>
+ %and = and <8 x i32> %ext, splat (i32 4)
+ store <8 x i32> %and, ptr %dst
+ ret void
+}
+
+define void @and_or_not_combine_v4i64(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind {
+; CHECK-LABEL: and_or_not_combine_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvld $xr2, $a1, 0
+; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0
+; CHECK-NEXT: xvseq.d $xr1, $xr1, $xr2
+; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrepli.d $xr1, 4
+; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a3, 0
+; CHECK-NEXT: ret
+ %a = load <4 x i64>, ptr %pa
+ %b = load <4 x i64>, ptr %pb
+ %v = load <4 x i64>, ptr %pv
+ %ca = icmp ne <4 x i64> %v, %a
+ %cb = icmp ne <4 x i64> %v, %b
+ %or = or <4 x i1> %ca, %cb
+ %ext = sext <4 x i1> %or to <4 x i64>
+ %and = and <4 x i64> %ext, splat (i64 4)
+ store <4 x i64> %and, ptr %dst
+ ret void
+}
+
+define void @and_extract_subvector_not_combine_v32i8(ptr %pa, ptr %dst) nounwind {
+; CHECK-LABEL: and_extract_subvector_not_combine_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: vnori.b $vr0, $vr0, 251
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %a = load volatile <32 x i8>, ptr %pa
+ %a.not = xor <32 x i8> %a, splat (i8 -1)
+ %subv = shufflevector <32 x i8> %a.not, <32 x i8> poison,
+ <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+ i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %and = and <16 x i8> %subv, splat (i8 4)
+ store <16 x i8> %and, ptr %dst
+ ret void
+}
+
+define void @and_extract_subvector_not_combine_v16i16(ptr %pa, ptr %dst) nounwind {
+; CHECK-LABEL: and_extract_subvector_not_combine_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: vrepli.h $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %a = load volatile <16 x i16>, ptr %pa
+ %a.not = xor <16 x i16> %a, splat (i16 -1)
+ %subv = shufflevector <16 x i16> %a.not, <16 x i16> poison,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %and = and <8 x i16> %subv, splat (i16 4)
+ store <8 x i16> %and, ptr %dst
+ ret void
+}
+
+define void @and_extract_subvector_not_combine_v8i32(ptr %pa, ptr %dst) nounwind {
+; CHECK-LABEL: and_extract_subvector_not_combine_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: vrepli.w $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %a = load volatile <8 x i32>, ptr %pa
+ %a.not = xor <8 x i32> %a, splat (i32 -1)
+ %subv = shufflevector <8 x i32> %a.not, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %and = and <4 x i32> %subv, splat (i32 4)
+ store <4 x i32> %and, ptr %dst
+ ret void
+}
+
+define void @and_extract_subvector_not_combine_v4i64(ptr %pa, ptr %dst) nounwind {
+; CHECK-LABEL: and_extract_subvector_not_combine_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: vrepli.d $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %a = load volatile <4 x i64>, ptr %pa
+ %a.not = xor <4 x i64> %a, splat (i64 -1)
+ %subv = shufflevector <4 x i64> %a.not, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+ %and = and <2 x i64> %subv, splat (i64 4)
+ store <2 x i64> %and, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
index ba2118f..b3155c9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
@@ -106,6 +106,69 @@ define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind {
ret void
}
+define void @not_ctlz_v32i8(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
+; CHECK-NEXT: xvclz.b $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <32 x i8>, ptr %src
+ %neg = xor <32 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %neg, i1 false)
+ store <32 x i8> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v16i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvrepli.b $xr1, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvclz.h $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <16 x i16>, ptr %src
+ %neg = xor <16 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %neg, i1 false)
+ store <16 x i16> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v8i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvrepli.b $xr1, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvclz.w $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <8 x i32>, ptr %src
+ %neg = xor <8 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %neg, i1 false)
+ store <8 x i32> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v4i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvrepli.b $xr1, -1
+; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvclz.d $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <4 x i64>, ptr %src
+ %neg = xor <4 x i64> %v, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %neg, i1 false)
+ store <4 x i64> %res, ptr %dst
+ ret void
+}
+
declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll
index 48ec98c..8e08e1e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fp-max-min.ll
@@ -5,40 +5,10 @@
define void @minnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind {
; CHECK-LABEL: minnum_v8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvld $xr1, $a1, 0
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 5
-; CHECK-NEXT: xvpickve.w $xr3, $xr1, 5
-; CHECK-NEXT: fmin.s $fa2, $fa3, $fa2
-; CHECK-NEXT: xvpickve.w $xr3, $xr0, 4
-; CHECK-NEXT: xvpickve.w $xr4, $xr1, 4
-; CHECK-NEXT: fmin.s $fa3, $fa4, $fa3
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 16
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 6
-; CHECK-NEXT: xvpickve.w $xr4, $xr1, 6
-; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 32
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 7
-; CHECK-NEXT: xvpickve.w $xr4, $xr1, 7
-; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 48
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 1
-; CHECK-NEXT: xvpickve.w $xr4, $xr1, 1
-; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2
-; CHECK-NEXT: xvpickve.w $xr4, $xr0, 0
-; CHECK-NEXT: xvpickve.w $xr5, $xr1, 0
-; CHECK-NEXT: fmin.s $fa4, $fa5, $fa4
-; CHECK-NEXT: vextrins.w $vr4, $vr2, 16
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 2
-; CHECK-NEXT: xvpickve.w $xr5, $xr1, 2
-; CHECK-NEXT: fmin.s $fa2, $fa5, $fa2
-; CHECK-NEXT: vextrins.w $vr4, $vr2, 32
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
-; CHECK-NEXT: xvpickve.w $xr1, $xr1, 3
-; CHECK-NEXT: fmin.s $fa0, $fa1, $fa0
-; CHECK-NEXT: vextrins.w $vr4, $vr0, 48
-; CHECK-NEXT: xvpermi.q $xr4, $xr3, 2
-; CHECK-NEXT: xvst $xr4, $a0, 0
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %x
@@ -51,23 +21,9 @@ entry:
define void @minnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind {
; CHECK-LABEL: minnum_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvld $xr1, $a1, 0
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 3
-; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3
-; CHECK-NEXT: fmin.d $fa2, $fa3, $fa2
-; CHECK-NEXT: xvpickve.d $xr3, $xr0, 2
-; CHECK-NEXT: xvpickve.d $xr4, $xr1, 2
-; CHECK-NEXT: fmin.d $fa3, $fa4, $fa3
-; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 1
-; CHECK-NEXT: xvpickve.d $xr4, $xr1, 1
-; CHECK-NEXT: fmin.d $fa2, $fa4, $fa2
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
-; CHECK-NEXT: xvpickve.d $xr1, $xr1, 0
-; CHECK-NEXT: fmin.d $fa0, $fa1, $fa0
-; CHECK-NEXT: vextrins.d $vr0, $vr2, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -81,40 +37,10 @@ entry:
define void @maxnum_v8f32(ptr %res, ptr %x, ptr %y) nounwind {
; CHECK-LABEL: maxnum_v8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvld $xr1, $a1, 0
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 5
-; CHECK-NEXT: xvpickve.w $xr3, $xr1, 5
-; CHECK-NEXT: fmax.s $fa2, $fa3, $fa2
-; CHECK-NEXT: xvpickve.w $xr3, $xr0, 4
-; CHECK-NEXT: xvpickve.w $xr4, $xr1, 4
-; CHECK-NEXT: fmax.s $fa3, $fa4, $fa3
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 16
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 6
-; CHECK-NEXT: xvpickve.w $xr4, $xr1, 6
-; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 32
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 7
-; CHECK-NEXT: xvpickve.w $xr4, $xr1, 7
-; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 48
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 1
-; CHECK-NEXT: xvpickve.w $xr4, $xr1, 1
-; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2
-; CHECK-NEXT: xvpickve.w $xr4, $xr0, 0
-; CHECK-NEXT: xvpickve.w $xr5, $xr1, 0
-; CHECK-NEXT: fmax.s $fa4, $fa5, $fa4
-; CHECK-NEXT: vextrins.w $vr4, $vr2, 16
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 2
-; CHECK-NEXT: xvpickve.w $xr5, $xr1, 2
-; CHECK-NEXT: fmax.s $fa2, $fa5, $fa2
-; CHECK-NEXT: vextrins.w $vr4, $vr2, 32
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
-; CHECK-NEXT: xvpickve.w $xr1, $xr1, 3
-; CHECK-NEXT: fmax.s $fa0, $fa1, $fa0
-; CHECK-NEXT: vextrins.w $vr4, $vr0, 48
-; CHECK-NEXT: xvpermi.q $xr4, $xr3, 2
-; CHECK-NEXT: xvst $xr4, $a0, 0
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %x
@@ -127,23 +53,9 @@ entry:
define void @maxnum_v4f64(ptr %res, ptr %x, ptr %y) nounwind {
; CHECK-LABEL: maxnum_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvld $xr0, $a2, 0
-; CHECK-NEXT: xvld $xr1, $a1, 0
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 3
-; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3
-; CHECK-NEXT: fmax.d $fa2, $fa3, $fa2
-; CHECK-NEXT: xvpickve.d $xr3, $xr0, 2
-; CHECK-NEXT: xvpickve.d $xr4, $xr1, 2
-; CHECK-NEXT: fmax.d $fa3, $fa4, $fa3
-; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 1
-; CHECK-NEXT: xvpickve.d $xr4, $xr1, 1
-; CHECK-NEXT: fmax.d $fa2, $fa4, $fa2
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
-; CHECK-NEXT: xvpickve.d $xr1, $xr1, 0
-; CHECK-NEXT: fmax.d $fa0, $fa1, $fa0
-; CHECK-NEXT: vextrins.d $vr0, $vr2, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
index 79407c3..fa5f27e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll
@@ -7,38 +7,8 @@ define void @ceil_v8f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: ceil_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.s $vr1, $vr1
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
-; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrp.s $vr2, $vr2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.s $vr1, $vr1
-; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
-; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
-; CHECK-NEXT: vfrintrp.s $vr3, $vr3
-; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
-; CHECK-NEXT: vfrintrp.s $vr0, $vr0
-; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
-; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
-; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: xvfrintrp.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %a0
@@ -52,21 +22,7 @@ define void @ceil_v4f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: ceil_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.d $vr1, $vr1
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
-; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrp.d $vr2, $vr2
-; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.d $vr1, $vr1
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vfrintrp.d $vr0, $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvfrintrp.d $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -81,38 +37,8 @@ define void @floor_v8f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: floor_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.s $vr1, $vr1
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
-; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrm.s $vr2, $vr2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.s $vr1, $vr1
-; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
-; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
-; CHECK-NEXT: vfrintrm.s $vr3, $vr3
-; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
-; CHECK-NEXT: vfrintrm.s $vr0, $vr0
-; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
-; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
-; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: xvfrintrm.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %a0
@@ -126,21 +52,7 @@ define void @floor_v4f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: floor_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.d $vr1, $vr1
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
-; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrm.d $vr2, $vr2
-; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.d $vr1, $vr1
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vfrintrm.d $vr0, $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvfrintrm.d $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -155,38 +67,8 @@ define void @trunc_v8f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: trunc_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.s $vr1, $vr1
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
-; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrz.s $vr2, $vr2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.s $vr1, $vr1
-; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
-; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
-; CHECK-NEXT: vfrintrz.s $vr3, $vr3
-; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
-; CHECK-NEXT: vfrintrz.s $vr0, $vr0
-; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
-; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
-; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: xvfrintrz.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %a0
@@ -200,21 +82,7 @@ define void @trunc_v4f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: trunc_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.d $vr1, $vr1
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
-; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrz.d $vr2, $vr2
-; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.d $vr1, $vr1
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vfrintrz.d $vr0, $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvfrintrz.d $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -229,38 +97,8 @@ define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: roundeven_v8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.s $vr1, $vr1
-; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4
-; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrne.s $vr2, $vr2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 48
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.s $vr1, $vr1
-; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0
-; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0
-; CHECK-NEXT: vfrintrne.s $vr3, $vr3
-; CHECK-NEXT: vextrins.w $vr3, $vr1, 16
-; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
-; CHECK-NEXT: vfrintrne.s $vr0, $vr0
-; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
-; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2
-; CHECK-NEXT: xvst $xr3, $a0, 0
+; CHECK-NEXT: xvfrintrne.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %a0
@@ -274,21 +112,7 @@ define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: roundeven_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.d $vr1, $vr1
-; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2
-; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrne.d $vr2, $vr2
-; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.d $vr1, $vr1
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vfrintrne.d $vr0, $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvfrintrne.d $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll
new file mode 100644
index 0000000..006713c
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll
@@ -0,0 +1,303 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>)
+
+define void @lasx_cast_128_s(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_cast_128_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x float>, ptr %va
+ %b = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %a)
+ store <8 x float> %b, ptr %vd
+ ret void
+}
+
+declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>)
+
+define void @lasx_cast_128_d(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_cast_128_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <2 x double>, ptr %va
+ %b = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %a)
+ store <4 x double> %b, ptr %vd
+ ret void
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>)
+
+define void @lasx_cast_128(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_cast_128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <2 x i64>, ptr %va
+ %b = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %a)
+ store <4 x i64> %b, ptr %vd
+ ret void
+}
+
+declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>)
+
+define void @lasx_concat_128_s(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_concat_128_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x float>, ptr %va
+ %b = load <4 x float>, ptr %vb
+ %c = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %a, <4 x float> %b)
+ store <8 x float> %c, ptr %vd
+ ret void
+}
+
+declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>)
+
+define void @lasx_concat_128_d(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_concat_128_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <2 x double>, ptr %va
+ %b = load <2 x double>, ptr %vb
+ %c = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %a, <2 x double> %b)
+ store <4 x double> %c, ptr %vd
+ ret void
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>)
+
+define void @lasx_concat_128(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_concat_128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <2 x i64>, ptr %va
+ %b = load <2 x i64>, ptr %vb
+ %c = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %a, <2 x i64> %b)
+ store <4 x i64> %c, ptr %vd
+ ret void
+}
+
+declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>)
+
+define void @lasx_extract_128_lo_s(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_lo_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <8 x float>, ptr %va
+ %c = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %a)
+ store <4 x float> %c, ptr %vd
+ ret void
+}
+
+declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>)
+
+define void @lasx_extract_128_lo_d(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_lo_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x double>, ptr %va
+ %c = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %a)
+ store <2 x double> %c, ptr %vd
+ ret void
+}
+
+declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>)
+
+define void @lasx_extract_128_lo(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_lo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x i64>, ptr %va
+ %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %a)
+ store <2 x i64> %c, ptr %vd
+ ret void
+}
+
+declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>)
+
+define void @lasx_extract_128_hi_s(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_hi_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <8 x float>, ptr %va
+ %c = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %a)
+ store <4 x float> %c, ptr %vd
+ ret void
+}
+
+declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>)
+
+define void @lasx_extract_128_hi_d(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_hi_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x double>, ptr %va
+ %c = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %a)
+ store <2 x double> %c, ptr %vd
+ ret void
+}
+
+declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>)
+
+define void @lasx_extract_128_hi(ptr %vd, ptr %va) {
+; CHECK-LABEL: lasx_extract_128_hi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x i64>, ptr %va
+ %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %a)
+ store <2 x i64> %c, ptr %vd
+ ret void
+}
+
+declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>)
+
+define void @lasx_insert_128_lo_s(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_lo_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <8 x float>, ptr %va
+ %b = load <4 x float>, ptr %vb
+ %c = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %a, <4 x float> %b)
+ store <8 x float> %c, ptr %vd
+ ret void
+}
+
+declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>)
+
+define void @lasx_insert_128_lo_d(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_lo_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x double>, ptr %va
+ %b = load <2 x double>, ptr %vb
+ %c = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %a, <2 x double> %b)
+ store <4 x double> %c, ptr %vd
+ ret void
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>)
+
+define void @lasx_insert_128_lo(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_lo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x i64>, ptr %va
+ %b = load <2 x i64>, ptr %vb
+ %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %a, <2 x i64> %b)
+ store <4 x i64> %c, ptr %vd
+ ret void
+}
+
+declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>)
+
+define void @lasx_insert_128_hi_s(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_hi_s:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <8 x float>, ptr %va
+ %b = load <4 x float>, ptr %vb
+ %c = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %a, <4 x float> %b)
+ store <8 x float> %c, ptr %vd
+ ret void
+}
+
+declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>)
+
+define void @lasx_insert_128_hi_d(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_hi_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x double>, ptr %va
+ %b = load <2 x double>, ptr %vb
+ %c = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %a, <2 x double> %b)
+ store <4 x double> %c, ptr %vd
+ ret void
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>)
+
+define void @lasx_insert_128_hi(ptr %vd, ptr %va, ptr %vb) {
+; CHECK-LABEL: lasx_insert_128_hi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %a = load <4 x i64>, ptr %va
+ %b = load <2 x i64>, ptr %vb
+ %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %a, <2 x i64> %b)
+ store <4 x i64> %c, ptr %vd
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
new file mode 100644
index 0000000..6b8ab2c
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
@@ -0,0 +1,248 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT: xvneg.b $xr1, $xr1
+; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %src
+ %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0
+ %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer
+ %v1.sub = sub <32 x i8> splat (i8 8), %v1
+ %b = shl <32 x i8> %v0, %v1
+ %c = lshr <32 x i8> %v0, %v1.sub
+ %d = or <32 x i8> %b, %c
+ store <32 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %src
+ %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0
+ %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer
+ %v1.sub = sub <32 x i8> splat (i8 8), %v1
+ %b = lshr <32 x i8> %v0, %v1
+ %c = shl <32 x i8> %v0, %v1.sub
+ %d = or <32 x i8> %b, %c
+ store <32 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v32i8_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvrotri.b $xr0, $xr0, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %src
+ %b = lshr <32 x i8> %v0, splat (i8 2)
+ %c = shl <32 x i8> %v0, splat (i8 6)
+ %d = or <32 x i8> %b, %c
+ store <32 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT: xvneg.h $xr1, $xr1
+; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %src
+ %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0
+ %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v1.sub = sub <16 x i16> splat (i16 16), %v1
+ %b = shl <16 x i16> %v0, %v1
+ %c = lshr <16 x i16> %v0, %v1.sub
+ %d = or <16 x i16> %b, %c
+ store <16 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %src
+ %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0
+ %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v1.sub = sub <16 x i16> splat (i16 16), %v1
+ %b = lshr <16 x i16> %v0, %v1
+ %c = shl <16 x i16> %v0, %v1.sub
+ %d = or <16 x i16> %b, %c
+ store <16 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v16i16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvrotri.h $xr0, $xr0, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %src
+ %b = lshr <16 x i16> %v0, splat (i16 2)
+ %c = shl <16 x i16> %v0, splat (i16 14)
+ %d = or <16 x i16> %b, %c
+ store <16 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT: xvneg.w $xr1, $xr1
+; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %src
+ %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0
+ %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v1.sub = sub <8 x i32> splat (i32 32), %v1
+ %b = shl <8 x i32> %v0, %v1
+ %c = lshr <8 x i32> %v0, %v1.sub
+ %d = or <8 x i32> %b, %c
+ store <8 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %src
+ %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0
+ %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v1.sub = sub <8 x i32> splat (i32 32), %v1
+ %b = lshr <8 x i32> %v0, %v1
+ %c = shl <8 x i32> %v0, %v1.sub
+ %d = or <8 x i32> %b, %c
+ store <8 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v8i32_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvrotri.w $xr0, $xr0, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %src
+ %b = lshr <8 x i32> %v0, splat (i32 2)
+ %c = shl <8 x i32> %v0, splat (i32 30)
+ %d = or <8 x i32> %b, %c
+ store <8 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotl_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT: xvpermi.q $xr1, $xr1, 2
+; LA32-NEXT: xvneg.d $xr1, $xr1
+; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotl_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT: xvneg.d $xr1, $xr1
+; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x i64>, ptr %src
+ %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0
+ %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v1.sub = sub <4 x i64> splat (i64 64), %v1
+ %b = shl <4 x i64> %v0, %v1
+ %c = lshr <4 x i64> %v0, %v1.sub
+ %d = or <4 x i64> %b, %c
+ store <4 x i64> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotr_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT: xvpermi.q $xr1, $xr1, 2
+; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotr_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x i64>, ptr %src
+ %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0
+ %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v1.sub = sub <4 x i64> splat (i64 64), %v1
+ %b = lshr <4 x i64> %v0, %v1
+ %c = shl <4 x i64> %v0, %v1.sub
+ %d = or <4 x i64> %b, %c
+ store <4 x i64> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind {
+; LA32-LABEL: rotr_v4i64_imm:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvrepli.w $xr1, -62
+; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotr_v4i64_imm:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvrotri.d $xr0, $xr0, 2
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x i64>, ptr %src
+ %b = lshr <4 x i64> %v0, splat (i64 2)
+ %c = shl <4 x i64> %v0, splat (i64 62)
+ %d = or <4 x i64> %b, %c
+ store <4 x i64> %d, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll
new file mode 100644
index 0000000..1c9038a
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-bswap.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+
+define void @shufflevector_bswap_h(ptr %res, ptr %a) nounwind {
+; CHECK-LABEL: shufflevector_bswap_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 177
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 25, i32 24, i32 27, i32 26, i32 29, i32 28, i32 31, i32 30>
+ store <32 x i8> %b, ptr %res
+ ret void
+}
+
+define void @shufflevector_bswap_w(ptr %res, ptr %a) nounwind {
+; CHECK-LABEL: shufflevector_bswap_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 26, i32 25, i32 24, i32 31, i32 30, i32 29, i32 28>
+ store <32 x i8> %b, ptr %res
+ ret void
+}
+
+define void @shufflevector_bswap_d(ptr %res, ptr %a) nounwind {
+; CHECK-LABEL: shufflevector_bswap_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0)
+; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <32 x i8>, ptr %a
+ %b = shufflevector <32 x i8> %va, <32 x i8> poison, <32 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24>
+ store <32 x i8> %b, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll
index c3656a6..9bafa10 100644
--- a/llvm/test/CodeGen/LoongArch/ldptr.ll
+++ b/llvm/test/CodeGen/LoongArch/ldptr.ll
@@ -24,8 +24,7 @@ define signext i32 @ldptr_w(ptr %p) nounwind {
; LA32-LABEL: ldptr_w:
; LA32: # %bb.0: # %entry
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: ldptr_w:
@@ -81,10 +80,9 @@ entry:
define i64 @ldptr_d(ptr %p) nounwind {
; LA32-LABEL: ldptr_d:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a1, $a0, 1
-; LA32-NEXT: ld.w $a0, $a1, 0
-; LA32-NEXT: ld.w $a1, $a1, 4
+; LA32-NEXT: addi.w $a1, $a0, 2047
+; LA32-NEXT: ld.w $a0, $a1, 1
+; LA32-NEXT: ld.w $a1, $a1, 5
; LA32-NEXT: ret
;
; LA64-LABEL: ldptr_d:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
index 3c6d345..f439a33 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/and-not-combine.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @and_not_combine_v16i8(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind {
; CHECK-LABEL: and_not_combine_v16i8:
@@ -85,3 +85,327 @@ entry:
store <2 x i64> %and, ptr %res
ret void
}
+
+define void @pre_not_and_not_combine_v16i8(ptr %res, ptr %a, i8 %b) nounwind {
+; CHECK-LABEL: pre_not_and_not_combine_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %a
+ %b.not = xor i8 %b, -1
+ %b.not.ele = insertelement <16 x i8> poison, i8 %b.not, i64 0
+ %v1.not = shufflevector <16 x i8> %b.not.ele, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v0.not = xor <16 x i8> %v0, splat (i8 -1)
+ %and = and <16 x i8> %v0.not, %v1.not
+ store <16 x i8> %and, ptr %res
+ ret void
+}
+
+define void @post_not_and_not_combine_v16i8(ptr %res, ptr %a, i8 %b) nounwind {
+; CHECK-LABEL: post_not_and_not_combine_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %a
+ %b.ele = insertelement <16 x i8> poison, i8 %b, i64 0
+ %v1 = shufflevector <16 x i8> %b.ele, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v0.not = xor <16 x i8> %v0, splat (i8 -1)
+ %v1.not = xor <16 x i8> %v1, splat (i8 -1)
+ %and = and <16 x i8> %v0.not, %v1.not
+ store <16 x i8> %and, ptr %res
+ ret void
+}
+
+define void @pre_not_and_not_combine_v8i16(ptr %res, ptr %a, i16 %b) nounwind {
+; CHECK-LABEL: pre_not_and_not_combine_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %a
+ %b.not = xor i16 %b, -1
+ %b.not.ele = insertelement <8 x i16> poison, i16 %b.not, i64 0
+ %v1.not = shufflevector <8 x i16> %b.not.ele, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v0.not = xor <8 x i16> %v0, splat (i16 -1)
+ %and = and <8 x i16> %v0.not, %v1.not
+ store <8 x i16> %and, ptr %res
+ ret void
+}
+
+define void @post_not_and_not_combine_v8i16(ptr %res, ptr %a, i16 %b) nounwind {
+; CHECK-LABEL: post_not_and_not_combine_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %a
+ %b.ele = insertelement <8 x i16> poison, i16 %b, i64 0
+ %v1 = shufflevector <8 x i16> %b.ele, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v0.not = xor <8 x i16> %v0, splat (i16 -1)
+ %v1.not = xor <8 x i16> %v1, splat (i16 -1)
+ %and = and <8 x i16> %v0.not, %v1.not
+ store <8 x i16> %and, ptr %res
+ ret void
+}
+
+define void @pre_not_and_not_combine_v4i32(ptr %res, ptr %a, i32 %b) nounwind {
+; CHECK-LABEL: pre_not_and_not_combine_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %a
+ %b.not = xor i32 %b, -1
+ %b.not.ele = insertelement <4 x i32> poison, i32 %b.not, i64 0
+ %v1.not = shufflevector <4 x i32> %b.not.ele, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v0.not = xor <4 x i32> %v0, splat (i32 -1)
+ %and = and <4 x i32> %v0.not, %v1.not
+ store <4 x i32> %and, ptr %res
+ ret void
+}
+
+define void @post_not_and_not_combine_v4i32(ptr %res, ptr %a, i32 %b) nounwind {
+; CHECK-LABEL: post_not_and_not_combine_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %a
+ %b.ele = insertelement <4 x i32> poison, i32 %b, i64 0
+ %v1 = shufflevector <4 x i32> %b.ele, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v0.not = xor <4 x i32> %v0, splat (i32 -1)
+ %v1.not = xor <4 x i32> %v1, splat (i32 -1)
+ %and = and <4 x i32> %v0.not, %v1.not
+ store <4 x i32> %and, ptr %res
+ ret void
+}
+
+define void @pre_not_and_not_combine_v2i64(ptr %res, ptr %a, i64 %b) nounwind {
+; LA32-LABEL: pre_not_and_not_combine_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: nor $a1, $a3, $zero
+; LA32-NEXT: nor $a2, $a2, $zero
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
+; LA32-NEXT: vreplvei.d $vr1, $vr1, 0
+; LA32-NEXT: vandn.v $vr0, $vr0, $vr1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: pre_not_and_not_combine_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vreplgr2vr.d $vr1, $a2
+; LA64-NEXT: vnor.v $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <2 x i64>, ptr %a
+ %b.not = xor i64 %b, -1
+ %b.not.ele = insertelement <2 x i64> poison, i64 %b.not, i64 0
+ %v1.not = shufflevector <2 x i64> %b.not.ele, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v0.not = xor <2 x i64> %v0, splat (i64 -1)
+ %and = and <2 x i64> %v0.not, %v1.not
+ store <2 x i64> %and, ptr %res
+ ret void
+}
+
+define void @post_not_and_not_combine_v2i64(ptr %res, ptr %a, i64 %b) nounwind {
+; LA32-LABEL: post_not_and_not_combine_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT: vreplvei.d $vr1, $vr1, 0
+; LA32-NEXT: vnor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: post_not_and_not_combine_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vreplgr2vr.d $vr1, $a2
+; LA64-NEXT: vnor.v $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <2 x i64>, ptr %a
+ %b.ele = insertelement <2 x i64> poison, i64 %b, i64 0
+ %v1 = shufflevector <2 x i64> %b.ele, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v0.not = xor <2 x i64> %v0, splat (i64 -1)
+ %v1.not = xor <2 x i64> %v1, splat (i64 -1)
+ %and = and <2 x i64> %v0.not, %v1.not
+ store <2 x i64> %and, ptr %res
+ ret void
+}
+
+define void @and_not_combine_splatimm_v16i8(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: and_not_combine_splatimm_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vnori.b $vr0, $vr0, 3
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %a0
+ %and = and <16 x i8> %v0, splat (i8 -4)
+ %xor = xor <16 x i8> %and, splat (i8 -4)
+ store <16 x i8> %xor, ptr %res
+ ret void
+}
+
+define void @and_not_combine_splatimm_v8i16(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: and_not_combine_splatimm_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.h $vr1, -4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %a0
+ %and = and <8 x i16> %v0, splat (i16 -4)
+ %xor = xor <8 x i16> %and, splat (i16 -4)
+ store <8 x i16> %xor, ptr %res
+ ret void
+}
+
+define void @and_not_combine_splatimm_v4i32(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: and_not_combine_splatimm_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.w $vr1, -4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %a0
+ %and = and <4 x i32> %v0, splat (i32 -4)
+ %xor = xor <4 x i32> %and, splat (i32 -4)
+ store <4 x i32> %xor, ptr %res
+ ret void
+}
+
+define void @and_not_combine_splatimm_v2i64(ptr %res, ptr %a0) nounwind {
+; CHECK-LABEL: and_not_combine_splatimm_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.d $vr1, -4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <2 x i64>, ptr %a0
+ %and = and <2 x i64> %v0, splat (i64 -4)
+ %xor = xor <2 x i64> %and, splat (i64 -4)
+ store <2 x i64> %xor, ptr %res
+ ret void
+}
+
+define void @and_or_not_combine_v16i8(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind {
+; CHECK-LABEL: and_or_not_combine_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0
+; CHECK-NEXT: vseq.b $vr1, $vr1, $vr2
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnori.b $vr0, $vr0, 251
+; CHECK-NEXT: vst $vr0, $a3, 0
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, ptr %pa
+ %b = load <16 x i8>, ptr %pb
+ %v = load <16 x i8>, ptr %pv
+ %ca = icmp ne <16 x i8> %v, %a
+ %cb = icmp ne <16 x i8> %v, %b
+ %or = or <16 x i1> %ca, %cb
+ %ext = sext <16 x i1> %or to <16 x i8>
+ %and = and <16 x i8> %ext, splat (i8 4)
+ store <16 x i8> %and, ptr %dst
+ ret void
+}
+
+define void @and_or_not_combine_v8i16(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind {
+; CHECK-LABEL: and_or_not_combine_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vseq.h $vr1, $vr1, $vr2
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrepli.h $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a3, 0
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, ptr %pa
+ %b = load <8 x i16>, ptr %pb
+ %v = load <8 x i16>, ptr %pv
+ %ca = icmp ne <8 x i16> %v, %a
+ %cb = icmp ne <8 x i16> %v, %b
+ %or = or <8 x i1> %ca, %cb
+ %ext = sext <8 x i1> %or to <8 x i16>
+ %and = and <8 x i16> %ext, splat (i16 4)
+ store <8 x i16> %and, ptr %dst
+ ret void
+}
+
+define void @and_or_not_combine_v4i32(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind {
+; CHECK-LABEL: and_or_not_combine_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vseq.w $vr1, $vr1, $vr2
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrepli.w $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a3, 0
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, ptr %pa
+ %b = load <4 x i32>, ptr %pb
+ %v = load <4 x i32>, ptr %pv
+ %ca = icmp ne <4 x i32> %v, %a
+ %cb = icmp ne <4 x i32> %v, %b
+ %or = or <4 x i1> %ca, %cb
+ %ext = sext <4 x i1> %or to <4 x i32>
+ %and = and <4 x i32> %ext, splat (i32 4)
+ store <4 x i32> %and, ptr %dst
+ ret void
+}
+
+define void @and_or_not_combine_v2i64(ptr %pa, ptr %pb, ptr %pv, ptr %dst) nounwind {
+; CHECK-LABEL: and_or_not_combine_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vld $vr2, $a1, 0
+; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vseq.d $vr1, $vr1, $vr2
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrepli.d $vr1, 4
+; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a3, 0
+; CHECK-NEXT: ret
+ %a = load <2 x i64>, ptr %pa
+ %b = load <2 x i64>, ptr %pb
+ %v = load <2 x i64>, ptr %pv
+ %ca = icmp ne <2 x i64> %v, %a
+ %cb = icmp ne <2 x i64> %v, %b
+ %or = or <2 x i1> %ca, %cb
+ %ext = sext <2 x i1> %or to <2 x i64>
+ %and = and <2 x i64> %ext, splat (i64 4)
+ store <2 x i64> %and, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
index a9a38e8..6ac7d51 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
@@ -106,6 +106,69 @@ define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind {
ret void
}
+define void @not_ctlz_v16i8(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
+; CHECK-NEXT: vclz.b $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <16 x i8>, ptr %src
+ %neg = xor <16 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %neg, i1 false)
+ store <16 x i8> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v8i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vrepli.b $vr1, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vclz.h $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <8 x i16>, ptr %src
+ %neg = xor <8 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %neg, i1 false)
+ store <8 x i16> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v4i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vrepli.b $vr1, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vclz.w $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <4 x i32>, ptr %src
+ %neg = xor <4 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %neg, i1 false)
+ store <4 x i32> %res, ptr %dst
+ ret void
+}
+
+define void @not_ctlz_v2i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: not_ctlz_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vrepli.b $vr1, -1
+; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vclz.d $vr0, $vr0
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <2 x i64>, ptr %src
+ %neg = xor <2 x i64> %v, <i64 -1, i64 -1>
+ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %neg, i1 false)
+ store <2 x i64> %res, ptr %dst
+ ret void
+}
+
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll
index 27ecb75..c173092 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/fp-max-min.ll
@@ -5,24 +5,10 @@
define void @minnum_v4f32(ptr %res, ptr %x, ptr %y) nounwind {
; CHECK-LABEL: minnum_v4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a2, 0
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 1
-; CHECK-NEXT: vreplvei.w $vr3, $vr1, 1
-; CHECK-NEXT: fmin.s $fa2, $fa3, $fa2
-; CHECK-NEXT: vreplvei.w $vr3, $vr0, 0
-; CHECK-NEXT: vreplvei.w $vr4, $vr1, 0
-; CHECK-NEXT: fmin.s $fa3, $fa4, $fa3
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 16
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 2
-; CHECK-NEXT: vreplvei.w $vr4, $vr1, 2
-; CHECK-NEXT: fmin.s $fa2, $fa4, $fa2
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 32
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 3
-; CHECK-NEXT: fmin.s $fa0, $fa1, $fa0
-; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
-; CHECK-NEXT: vst $vr3, $a0, 0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vfmin.s $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %x
@@ -35,15 +21,9 @@ entry:
define void @minnum_v2f64(ptr %res, ptr %x, ptr %y) nounwind {
; CHECK-LABEL: minnum_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a2, 0
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vreplvei.d $vr2, $vr0, 1
-; CHECK-NEXT: vreplvei.d $vr3, $vr1, 1
-; CHECK-NEXT: fmin.d $fa2, $fa3, $fa2
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: fmin.d $fa0, $fa1, $fa0
-; CHECK-NEXT: vextrins.d $vr0, $vr2, 16
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vfmin.d $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,24 +37,10 @@ entry:
define void @maxnum_v4f32(ptr %res, ptr %x, ptr %y) nounwind {
; CHECK-LABEL: maxnum_v4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a2, 0
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 1
-; CHECK-NEXT: vreplvei.w $vr3, $vr1, 1
-; CHECK-NEXT: fmax.s $fa2, $fa3, $fa2
-; CHECK-NEXT: vreplvei.w $vr3, $vr0, 0
-; CHECK-NEXT: vreplvei.w $vr4, $vr1, 0
-; CHECK-NEXT: fmax.s $fa3, $fa4, $fa3
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 16
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 2
-; CHECK-NEXT: vreplvei.w $vr4, $vr1, 2
-; CHECK-NEXT: fmax.s $fa2, $fa4, $fa2
-; CHECK-NEXT: vextrins.w $vr3, $vr2, 32
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 3
-; CHECK-NEXT: fmax.s $fa0, $fa1, $fa0
-; CHECK-NEXT: vextrins.w $vr3, $vr0, 48
-; CHECK-NEXT: vst $vr3, $a0, 0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vfmax.s $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %x
@@ -87,15 +53,9 @@ entry:
define void @maxnum_v2f64(ptr %res, ptr %x, ptr %y) nounwind {
; CHECK-LABEL: maxnum_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vld $vr0, $a2, 0
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vreplvei.d $vr2, $vr0, 1
-; CHECK-NEXT: vreplvei.d $vr3, $vr1, 1
-; CHECK-NEXT: fmax.d $fa2, $fa3, $fa2
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: fmax.d $fa0, $fa1, $fa0
-; CHECK-NEXT: vextrins.d $vr0, $vr2, 16
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vld $vr1, $a2, 0
+; CHECK-NEXT: vfmax.d $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll
index 1ca6290..cb01ac0 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll
@@ -7,22 +7,8 @@ define void @ceil_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: ceil_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.s $vr1, $vr1
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
-; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrp.s $vr2, $vr2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
; CHECK-NEXT: vfrintrp.s $vr0, $vr0
-; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
-; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %a0
@@ -36,13 +22,7 @@ define void @ceil_v2f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: ceil_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrp.d $vr1, $vr1
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
; CHECK-NEXT: vfrintrp.d $vr0, $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -57,22 +37,8 @@ define void @floor_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: floor_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.s $vr1, $vr1
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
-; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrm.s $vr2, $vr2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
; CHECK-NEXT: vfrintrm.s $vr0, $vr0
-; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
-; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %a0
@@ -86,13 +52,7 @@ define void @floor_v2f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: floor_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrm.d $vr1, $vr1
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
; CHECK-NEXT: vfrintrm.d $vr0, $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -107,22 +67,8 @@ define void @trunc_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: trunc_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.s $vr1, $vr1
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
-; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrz.s $vr2, $vr2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
; CHECK-NEXT: vfrintrz.s $vr0, $vr0
-; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
-; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %a0
@@ -136,13 +82,7 @@ define void @trunc_v2f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: trunc_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrz.d $vr1, $vr1
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
; CHECK-NEXT: vfrintrz.d $vr0, $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -157,22 +97,8 @@ define void @roundeven_v4f32(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: roundeven_v4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.s $vr1, $vr1
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
-; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0
-; CHECK-NEXT: vfrintrne.s $vr2, $vr2
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 16
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.s $vr1, $vr1
-; CHECK-NEXT: vextrins.w $vr2, $vr1, 32
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
; CHECK-NEXT: vfrintrne.s $vr0, $vr0
-; CHECK-NEXT: vextrins.w $vr2, $vr0, 48
-; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %a0
@@ -186,13 +112,7 @@ define void @roundeven_v2f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: roundeven_v2f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1
-; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0
-; CHECK-NEXT: vfrintrne.d $vr1, $vr1
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
; CHECK-NEXT: vfrintrne.d $vr0, $vr0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
new file mode 100644
index 0000000..106a7b0
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
@@ -0,0 +1,246 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @rotl_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT: vneg.b $vr1, $vr1
+; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %src
+ %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0
+ %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v1.sub = sub <16 x i8> splat (i8 8), %v1
+ %b = shl <16 x i8> %v0, %v1
+ %c = lshr <16 x i8> %v0, %v1.sub
+ %d = or <16 x i8> %b, %c
+ store <16 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %src
+ %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0
+ %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v1.sub = sub <16 x i8> splat (i8 8), %v1
+ %b = lshr <16 x i8> %v0, %v1
+ %c = shl <16 x i8> %v0, %v1.sub
+ %d = or <16 x i8> %b, %c
+ store <16 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v16i8_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v16i8_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrotri.b $vr0, $vr0, 2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %src
+ %b = lshr <16 x i8> %v0, splat (i8 2)
+ %c = shl <16 x i8> %v0, splat (i8 6)
+ %d = or <16 x i8> %b, %c
+ store <16 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT: vneg.h $vr1, $vr1
+; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %src
+ %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0
+ %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v1.sub = sub <8 x i16> splat (i16 16), %v1
+ %b = shl <8 x i16> %v0, %v1
+ %c = lshr <8 x i16> %v0, %v1.sub
+ %d = or <8 x i16> %b, %c
+ store <8 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %src
+ %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0
+ %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v1.sub = sub <8 x i16> splat (i16 16), %v1
+ %b = lshr <8 x i16> %v0, %v1
+ %c = shl <8 x i16> %v0, %v1.sub
+ %d = or <8 x i16> %b, %c
+ store <8 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v8i16_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v8i16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrotri.h $vr0, $vr0, 2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %src
+ %b = lshr <8 x i16> %v0, splat (i16 2)
+ %c = shl <8 x i16> %v0, splat (i16 14)
+ %d = or <8 x i16> %b, %c
+ store <8 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT: vneg.w $vr1, $vr1
+; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %src
+ %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0
+ %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v1.sub = sub <4 x i32> splat (i32 32), %v1
+ %b = shl <4 x i32> %v0, %v1
+ %c = lshr <4 x i32> %v0, %v1.sub
+ %d = or <4 x i32> %b, %c
+ store <4 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %src
+ %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0
+ %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v1.sub = sub <4 x i32> splat (i32 32), %v1
+ %b = lshr <4 x i32> %v0, %v1
+ %c = shl <4 x i32> %v0, %v1.sub
+ %d = or <4 x i32> %b, %c
+ store <4 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v4i32_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v4i32_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrotri.w $vr0, $vr0, 2
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %src
+ %b = lshr <4 x i32> %v0, splat (i32 2)
+ %c = shl <4 x i32> %v0, splat (i32 30)
+ %d = or <4 x i32> %b, %c
+ store <4 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotl_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT: vneg.d $vr1, $vr1
+; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotl_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vreplgr2vr.d $vr1, $a2
+; LA64-NEXT: vneg.d $vr1, $vr1
+; LA64-NEXT: vrotr.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <2 x i64>, ptr %src
+ %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0
+ %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v1.sub = sub <2 x i64> splat (i64 64), %v1
+ %b = shl <2 x i64> %v0, %v1
+ %c = lshr <2 x i64> %v0, %v1.sub
+ %d = or <2 x i64> %b, %c
+ store <2 x i64> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotr_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotr_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vreplgr2vr.d $vr1, $a2
+; LA64-NEXT: vrotr.d $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <2 x i64>, ptr %src
+ %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0
+ %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v1.sub = sub <2 x i64> splat (i64 64), %v1
+ %b = lshr <2 x i64> %v0, %v1
+ %c = shl <2 x i64> %v0, %v1.sub
+ %d = or <2 x i64> %b, %c
+ store <2 x i64> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v2i64_imm(ptr %dst, ptr %src) nounwind {
+; LA32-LABEL: rotr_v2i64_imm:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vrepli.w $vr1, -62
+; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotr_v2i64_imm:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vrotri.d $vr0, $vr0, 2
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <2 x i64>, ptr %src
+ %b = lshr <2 x i64> %v0, splat (i64 2)
+ %c = shl <2 x i64> %v0, splat (i64 62)
+ %d = or <2 x i64> %b, %c
+ store <2 x i64> %d, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/shuffle-as-bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/shuffle-as-bswap.ll
new file mode 100644
index 0000000..a6b61dc
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/shuffle-as-bswap.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+
+define void @shufflevector_bswap_h(ptr %res, ptr %a) nounwind {
+; CHECK-LABEL: shufflevector_bswap_h:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 177
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %b = shufflevector <16 x i8> %va, <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+ store <16 x i8> %b, ptr %res
+ ret void
+}
+
+define void @shufflevector_bswap_w(ptr %res, ptr %a) nounwind {
+; CHECK-LABEL: shufflevector_bswap_w:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %b = shufflevector <16 x i8> %va, <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+ store <16 x i8> %b, ptr %res
+ ret void
+}
+
+define void @shufflevector_bswap_d(ptr %res, ptr %a) nounwind {
+; CHECK-LABEL: shufflevector_bswap_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0)
+; CHECK-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %va = load <16 x i8>, ptr %a
+ %b = shufflevector <16 x i8> %va, <16 x i8> poison, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+ store <16 x i8> %b, ptr %res
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/memcmp.ll b/llvm/test/CodeGen/LoongArch/memcmp.ll
index c4aaf9a..c3811c0 100644
--- a/llvm/test/CodeGen/LoongArch/memcmp.ll
+++ b/llvm/test/CodeGen/LoongArch/memcmp.ll
@@ -7,15 +7,24 @@
define signext i32 @test1(ptr %buffer1, ptr %buffer2) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: ori $a2, $zero, 16
-; CHECK-NEXT: pcaddu18i $ra, %call36(memcmp)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: ld.d $a2, $a0, 0
+; CHECK-NEXT: ld.d $a3, $a1, 0
+; CHECK-NEXT: revb.d $a2, $a2
+; CHECK-NEXT: revb.d $a3, $a3
+; CHECK-NEXT: bne $a2, $a3, .LBB0_3
+; CHECK-NEXT: # %bb.1: # %loadbb1
+; CHECK-NEXT: ld.d $a0, $a0, 8
+; CHECK-NEXT: ld.d $a1, $a1, 8
+; CHECK-NEXT: revb.d $a2, $a0
+; CHECK-NEXT: revb.d $a3, $a1
+; CHECK-NEXT: bne $a2, $a3, .LBB0_3
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: move $a0, $zero
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_3: # %res_block
+; CHECK-NEXT: sltu $a0, $a2, $a3
+; CHECK-NEXT: sub.d $a0, $zero, $a0
+; CHECK-NEXT: ori $a0, $a0, 1
; CHECK-NEXT: ret
entry:
%call = call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 16)
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 661f67d..546ed6c 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -17,9 +17,11 @@
; LAXX-LABEL: Pass Arguments:
; LAXX-NEXT: Target Library Information
+; LAXX-NEXT: Runtime Library Function Analysis
; LAXX-NEXT: Target Pass Configuration
; LAXX-NEXT: Machine Module Information
; LAXX-NEXT: Target Transform Information
+; LAXX-NEXT: Library Function Lowering Analysis
; LAXX-NEXT: Assumption Cache Tracker
; LAXX-NEXT: Type-Based Alias Analysis
; LAXX-NEXT: Scoped NoAlias Alias Analysis
diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
new file mode 100644
index 0000000..93f73e5
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll
@@ -0,0 +1,746 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefix=LA32 %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefix=LA64 %s
+
+%struct.S = type { i64, i64, i8 }
+%struct.F = type { float, double, float }
+%struct.V = type { <4 x i32>, <4 x i32>, <16 x i16> }
+
+define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s5, $sp, 16 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s6, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a1, $a0, 4
+; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: add.w $s2, $a4, $a0
+; LA32-NEXT: bnez $a1, .LBB0_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: move $s5, $zero
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s6, $zero
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB0_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: ld.w $a0, $s2, 12
+; LA32-NEXT: ld.w $a1, $s2, 8
+; LA32-NEXT: add.w $a0, $a0, $s6
+; LA32-NEXT: add.w $s3, $a1, $s3
+; LA32-NEXT: sltu $a1, $s3, $a1
+; LA32-NEXT: addi.w $s4, $s4, 1
+; LA32-NEXT: sltui $a2, $s4, 1
+; LA32-NEXT: add.w $s5, $s5, $a2
+; LA32-NEXT: xor $a2, $s4, $s1
+; LA32-NEXT: xor $a3, $s5, $s0
+; LA32-NEXT: or $a2, $a2, $a3
+; LA32-NEXT: add.w $s6, $a0, $a1
+; LA32-NEXT: bnez $a2, .LBB0_2
+; LA32-NEXT: b .LBB0_4
+; LA32-NEXT: .LBB0_3:
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s6, $zero
+; LA32-NEXT: .LBB0_4: # %for.cond.cleanup
+; LA32-NEXT: st.w $s3, $s2, 8
+; LA32-NEXT: st.w $s6, $s2, 12
+; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 4
+; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT: add.d $s1, $a2, $a0
+; LA64-NEXT: blez $s0, .LBB0_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: move $s2, $zero
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB0_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: ld.d $a0, $s1, 8
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: add.d $s2, $a0, $s2
+; LA64-NEXT: bnez $s0, .LBB0_2
+; LA64-NEXT: b .LBB0_4
+; LA64-NEXT: .LBB0_3:
+; LA64-NEXT: move $s2, $zero
+; LA64-NEXT: .LBB0_4: # %for.cond.cleanup
+; LA64-NEXT: st.d $s2, $s1, 8
+; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 1
+ %cmp4 = icmp sgt i64 %n, 0
+ br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %s.05 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ call void @f(ptr %a)
+ %0 = load i64, ptr %y
+ %add = add nsw i64 %0, %s.05
+ %inc = add nuw nsw i64 %i.06, 1
+ %exitcond.not = icmp eq i64 %inc, %n
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %s.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ store i64 %s.0.lcssa, ptr %y
+ ret void
+}
+
+define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a1, $a0, 4
+; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: add.w $s2, $a4, $a0
+; LA32-NEXT: bnez $a1, .LBB1_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: movgr2fr.w $fs0, $zero
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB1_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: fld.s $fa0, $s2, 16
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: fadd.s $fs0, $fa0, $fs0
+; LA32-NEXT: bnez $a0, .LBB1_2
+; LA32-NEXT: b .LBB1_4
+; LA32-NEXT: .LBB1_3:
+; LA32-NEXT: movgr2fr.w $fs0, $zero
+; LA32-NEXT: .LBB1_4: # %for.cond.cleanup
+; LA32-NEXT: fst.s $fs0, $s2, 16
+; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 4
+; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT: add.d $s1, $a2, $a0
+; LA64-NEXT: blez $s0, .LBB1_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: movgr2fr.w $fs0, $zero
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB1_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fld.s $fa0, $s1, 16
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: fadd.s $fs0, $fa0, $fs0
+; LA64-NEXT: bnez $s0, .LBB1_2
+; LA64-NEXT: b .LBB1_4
+; LA64-NEXT: .LBB1_3:
+; LA64-NEXT: movgr2fr.w $fs0, $zero
+; LA64-NEXT: .LBB1_4: # %for.cond.cleanup
+; LA64-NEXT: fst.s $fs0, $s1, 16
+; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.F, ptr %a, i64 %k, i32 2
+ %cmp4 = icmp sgt i64 %n, 0
+ br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %s.05 = phi float [ 0.0, %entry ], [ %add, %for.body ]
+ call void @f(ptr %a)
+ %0 = load float, ptr %y
+ %add = fadd float %0, %s.05
+ %inc = add nuw nsw i64 %i.06, 1
+ %exitcond.not = icmp eq i64 %inc, %n
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %s.0.lcssa = phi float [ 0.0, %entry ], [ %add, %for.body ]
+ store float %s.0.lcssa, ptr %y
+ ret void
+}
+
+define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_v4i32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a0, $a0, 6
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: add.w $s2, $a4, $a0
+; LA32-NEXT: bnez $a1, .LBB2_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: vrepli.b $vr0, 0
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB2_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: vld $vr0, $s2, 16
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: vadd.w $vr1, $vr0, $vr1
+; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: bnez $a0, .LBB2_2
+; LA32-NEXT: b .LBB2_4
+; LA32-NEXT: .LBB2_3:
+; LA32-NEXT: vrepli.b $vr0, 0
+; LA32-NEXT: .LBB2_4: # %for.cond.cleanup
+; LA32-NEXT: vst $vr0, $s2, 16
+; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_v4i32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: slli.d $a0, $a0, 6
+; LA64-NEXT: add.d $s1, $a2, $a0
+; LA64-NEXT: blez $a1, .LBB2_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: vrepli.b $vr0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB2_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: vld $vr0, $s1, 16
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vadd.w $vr1, $vr0, $vr1
+; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: bnez $s0, .LBB2_2
+; LA64-NEXT: b .LBB2_4
+; LA64-NEXT: .LBB2_3:
+; LA64-NEXT: vrepli.b $vr0, 0
+; LA64-NEXT: .LBB2_4: # %for.cond.cleanup
+; LA64-NEXT: vst $vr0, $s1, 16
+; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.V, ptr %a, i64 %k, i32 1
+ %cmp = icmp sgt i64 %n, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi <4 x i32> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ call void @f(ptr %a)
+ %v = load <4 x i32>, ptr %y
+ %addv = add <4 x i32> %v, %sum.0
+ %inc = add nuw nsw i64 %i.0, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.lcssa = phi <4 x i32> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ store <4 x i32> %sum.lcssa, ptr %y
+ ret void
+}
+
+define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_v16i16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -80
+; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 68 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 64 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 52 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a0, $a0, 6
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: add.w $s2, $a4, $a0
+; LA32-NEXT: bnez $a1, .LBB3_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: xvrepli.b $xr0, 0
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB3_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: xvld $xr0, $s2, 32
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: xvadd.h $xr1, $xr0, $xr1
+; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: bnez $a0, .LBB3_2
+; LA32-NEXT: b .LBB3_4
+; LA32-NEXT: .LBB3_3:
+; LA32-NEXT: xvrepli.b $xr0, 0
+; LA32-NEXT: .LBB3_4: # %for.cond.cleanup
+; LA32-NEXT: xvst $xr0, $s2, 32
+; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 64 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 68 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 80
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_v16i16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -80
+; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64-NEXT: slli.d $a0, $a0, 6
+; LA64-NEXT: add.d $s1, $a2, $a0
+; LA64-NEXT: blez $a1, .LBB3_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: xvrepli.b $xr0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB3_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: xvld $xr0, $s1, 32
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1
+; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: bnez $s0, .LBB3_2
+; LA64-NEXT: b .LBB3_4
+; LA64-NEXT: .LBB3_3:
+; LA64-NEXT: xvrepli.b $xr0, 0
+; LA64-NEXT: .LBB3_4: # %for.cond.cleanup
+; LA64-NEXT: xvst $xr0, $s1, 32
+; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 80
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.V, ptr %a, i64 %k, i32 2
+ %cmp = icmp sgt i64 %n, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi <16 x i16> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ call void @f(ptr %a)
+ %v = load <16 x i16>, ptr %y
+ %addv = add <16 x i16> %v, %sum.0
+ %inc = add nuw nsw i64 %i.0, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.lcssa = phi <16 x i16> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ store <16 x i16> %sum.lcssa, ptr %y
+ ret void
+}
+
+define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_extracti8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 36 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 32 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a1, $a0, 4
+; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: add.w $s2, $a4, $a0
+; LA32-NEXT: bnez $a1, .LBB4_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: vrepli.b $vr0, 0
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB4_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: vldrepl.b $vr0, $s2, 16
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: vadd.b $vr1, $vr0, $vr1
+; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA32-NEXT: bnez $a0, .LBB4_2
+; LA32-NEXT: b .LBB4_4
+; LA32-NEXT: .LBB4_3:
+; LA32-NEXT: vrepli.b $vr0, 0
+; LA32-NEXT: .LBB4_4: # %for.cond.cleanup
+; LA32-NEXT: vstelm.b $vr0, $s2, 16, 1
+; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 32 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 36 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_extracti8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 4
+; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT: add.d $s1, $a2, $a0
+; LA64-NEXT: blez $s0, .LBB4_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: vrepli.b $vr0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB4_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: vldrepl.b $vr0, $s1, 16
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vadd.b $vr1, $vr0, $vr1
+; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: bnez $s0, .LBB4_2
+; LA64-NEXT: b .LBB4_4
+; LA64-NEXT: .LBB4_3:
+; LA64-NEXT: vrepli.b $vr0, 0
+; LA64-NEXT: .LBB4_4: # %for.cond.cleanup
+; LA64-NEXT: vstelm.b $vr0, $s1, 16, 1
+; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 2
+ %cmp = icmp sgt i64 %n, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi <16 x i8> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ call void @f(ptr %a)
+ %e = load i8, ptr %y
+ %ins0 = insertelement <16 x i8> poison, i8 %e, i32 0
+ %v = shufflevector <16 x i8> %ins0, <16 x i8> poison, <16 x i32> zeroinitializer
+ %addv = add <16 x i8> %v, %sum.0
+ %inc = add nuw nsw i64 %i.0, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.lcssa = phi <16 x i8> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ %res = extractelement <16 x i8> %sum.lcssa, i32 1
+ store i8 %res, ptr %y
+ ret void
+}
+
+define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
+; LA32-LABEL: sink_fold_extractf64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -80
+; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 68 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 64 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s3, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s4, $sp, 52 # 4-byte Folded Spill
+; LA32-NEXT: move $s0, $a3
+; LA32-NEXT: move $s1, $a2
+; LA32-NEXT: slli.w $a1, $a0, 4
+; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
+; LA32-NEXT: sltui $a1, $a3, 1
+; LA32-NEXT: slti $a2, $a3, 0
+; LA32-NEXT: masknez $a2, $a2, $a1
+; LA32-NEXT: sltui $a3, $s1, 1
+; LA32-NEXT: maskeqz $a1, $a3, $a1
+; LA32-NEXT: or $a1, $a1, $a2
+; LA32-NEXT: add.w $s2, $a4, $a0
+; LA32-NEXT: bnez $a1, .LBB5_3
+; LA32-NEXT: # %bb.1: # %for.body.preheader
+; LA32-NEXT: move $fp, $a4
+; LA32-NEXT: move $s3, $zero
+; LA32-NEXT: move $s4, $zero
+; LA32-NEXT: xvrepli.b $xr0, 0
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB5_2: # %for.body
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl f
+; LA32-NEXT: xvldrepl.d $xr0, $s2, 8
+; LA32-NEXT: addi.w $s3, $s3, 1
+; LA32-NEXT: sltui $a0, $s3, 1
+; LA32-NEXT: add.w $s4, $s4, $a0
+; LA32-NEXT: xor $a0, $s3, $s1
+; LA32-NEXT: xor $a1, $s4, $s0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: xvfadd.d $xr1, $xr0, $xr1
+; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: bnez $a0, .LBB5_2
+; LA32-NEXT: b .LBB5_4
+; LA32-NEXT: .LBB5_3:
+; LA32-NEXT: xvrepli.b $xr0, 0
+; LA32-NEXT: .LBB5_4: # %for.cond.cleanup
+; LA32-NEXT: xvstelm.d $xr0, $s2, 8, 1
+; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 64 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 68 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 80
+; LA32-NEXT: ret
+;
+; LA64-LABEL: sink_fold_extractf64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -80
+; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a1
+; LA64-NEXT: slli.d $a1, $a0, 4
+; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
+; LA64-NEXT: add.d $s1, $a2, $a0
+; LA64-NEXT: blez $s0, .LBB5_3
+; LA64-NEXT: # %bb.1: # %for.body.preheader
+; LA64-NEXT: move $fp, $a2
+; LA64-NEXT: xvrepli.b $xr0, 0
+; LA64-NEXT: .p2align 4, , 16
+; LA64-NEXT: .LBB5_2: # %for.body
+; LA64-NEXT: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(f)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: xvldrepl.d $xr0, $s1, 8
+; LA64-NEXT: addi.d $s0, $s0, -1
+; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1
+; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: bnez $s0, .LBB5_2
+; LA64-NEXT: b .LBB5_4
+; LA64-NEXT: .LBB5_3:
+; LA64-NEXT: xvrepli.b $xr0, 0
+; LA64-NEXT: .LBB5_4: # %for.cond.cleanup
+; LA64-NEXT: xvstelm.d $xr0, $s1, 8, 1
+; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 80
+; LA64-NEXT: ret
+entry:
+ %y = getelementptr inbounds %struct.F, ptr %a, i64 %k, i32 1
+ %cmp = icmp sgt i64 %n, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %entry, %for.body
+ %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi <4 x double> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ call void @f(ptr %a)
+ %e = load double, ptr %y
+ %ins0 = insertelement <4 x double> poison, double %e, i32 0
+ %v = shufflevector <4 x double> %ins0, <4 x double> poison, <4 x i32> zeroinitializer
+ %addv = fadd <4 x double> %v, %sum.0
+ %inc = add nuw nsw i64 %i.0, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.lcssa = phi <4 x double> [ zeroinitializer, %entry ], [ %addv, %for.body ]
+ %res = extractelement <4 x double> %sum.lcssa, i32 1
+ store double %res, ptr %y
+ ret void
+}
+
+declare void @f(ptr)
diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll
index d70f9f4..23b433a 100644
--- a/llvm/test/CodeGen/LoongArch/stptr.ll
+++ b/llvm/test/CodeGen/LoongArch/stptr.ll
@@ -23,8 +23,7 @@ define void @stptr_w(ptr %p, i32 signext %val) nounwind {
; LA32-LABEL: stptr_w:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: stptr_w:
@@ -77,9 +76,8 @@ define void @stptr_d(ptr %p, i64 %val) nounwind {
; LA32-LABEL: stptr_d:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 1
-; LA32-NEXT: st.w $a2, $a0, 4
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a2, $a0, 5
+; LA32-NEXT: st.w $a1, $a0, 1
; LA32-NEXT: ret
;
; LA64-LABEL: stptr_d: