aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
-rw-r--r--llvm/test/CodeGen/RISCV/features-info.ll10
-rw-r--r--llvm/test/CodeGen/RISCV/half-convert.ll108
-rw-r--r--llvm/test/CodeGen/RISCV/macro-fusions.mir1376
-rw-r--r--llvm/test/CodeGen/RISCV/misched-load-clustering.ll47
-rw-r--r--llvm/test/CodeGen/RISCV/misched-mem-clustering.mir6
-rw-r--r--llvm/test/CodeGen/RISCV/misched-store-clustering.ll83
-rw-r--r--llvm/test/CodeGen/RISCV/rv32zbkb.ll71
-rw-r--r--llvm/test/CodeGen/RISCV/rv64-half-convert.ll21
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zbkb.ll244
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll72
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/unaligned-load-store.ll20
12 files changed, 1975 insertions, 85 deletions
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index b94665b..fb53921 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -6,13 +6,21 @@
; CHECK-NEXT: 32bit - Implements RV32.
; CHECK-NEXT: 64bit - Implements RV64.
; CHECK-NEXT: a - 'A' (Atomic Instructions).
+; CHECK-NEXT: add-load-fusion - Enable ADD(.UW) + load macrofusion.
+; CHECK-NEXT: addi-load-fusion - Enable ADDI + load macrofusion.
; CHECK-NEXT: andes45 - Andes 45-Series processors.
; CHECK-NEXT: auipc-addi-fusion - Enable AUIPC+ADDI macrofusion.
+; CHECK-NEXT: auipc-load-fusion - Enable AUIPC + load macrofusion.
; CHECK-NEXT: b - 'B' (the collection of the Zba, Zbb, Zbs extensions).
+; CHECK-NEXT: bfext-fusion - Enable SLLI+SRLI (bitfield extract) macrofusion.
; CHECK-NEXT: c - 'C' (Compressed Instructions).
; CHECK-NEXT: conditional-cmv-fusion - Enable branch+c.mv fusion.
; CHECK-NEXT: d - 'D' (Double-Precision Floating-Point).
; CHECK-NEXT: disable-latency-sched-heuristic - Disable latency scheduling heuristic.
+; CHECK-NEXT: disable-misched-load-clustering - Disable load clustering in the machine scheduler.
+; CHECK-NEXT: disable-misched-store-clustering - Disable store clustering in the machine scheduler.
+; CHECK-NEXT: disable-postmisched-load-clustering - Disable PostRA load clustering in the machine scheduler.
+; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler.
; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN.
; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs).
; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation).
@@ -58,6 +66,7 @@
; CHECK-NEXT: ld-add-fusion - Enable LD+ADD macrofusion.
; CHECK-NEXT: log-vrgather - Has vrgather.vv with LMUL*log2(LMUL) latency
; CHECK-NEXT: lui-addi-fusion - Enable LUI+ADDI macro fusion.
+; CHECK-NEXT: lui-load-fusion - Enable LUI + load macrofusion.
; CHECK-NEXT: m - 'M' (Integer Multiplication and Division).
; CHECK-NEXT: mips-p8700 - MIPS p8700 processor.
; CHECK-NEXT: no-default-unroll - Disable default unroll preference..
@@ -130,6 +139,7 @@
; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp).
; CHECK-NEXT: shvstvala - 'Shvstvala' (vstval provides all needed values).
; CHECK-NEXT: shvstvecd - 'Shvstvecd' (vstvec supports Direct mode).
+; CHECK-NEXT: shxadd-load-fusion - Enable SH(1|2|3)ADD(.UW) + load macrofusion.
; CHECK-NEXT: sifive7 - SiFive 7-Series processors.
; CHECK-NEXT: smaia - 'Smaia' (Advanced Interrupt Architecture Machine Level).
; CHECK-NEXT: smcdeleg - 'Smcdeleg' (Counter Delegation Machine Level).
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index facb544..0c152e6 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -2262,12 +2262,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; RV32IZHINX-NEXT: addi a2, a3, -1
; RV32IZHINX-NEXT: .LBB10_4: # %start
; RV32IZHINX-NEXT: feq.s a3, s0, s0
-; RV32IZHINX-NEXT: neg a4, a1
-; RV32IZHINX-NEXT: neg a1, s1
+; RV32IZHINX-NEXT: neg a4, s1
+; RV32IZHINX-NEXT: neg a5, a1
; RV32IZHINX-NEXT: neg a3, a3
-; RV32IZHINX-NEXT: and a0, a1, a0
+; RV32IZHINX-NEXT: and a0, a4, a0
; RV32IZHINX-NEXT: and a1, a3, a2
-; RV32IZHINX-NEXT: or a0, a4, a0
+; RV32IZHINX-NEXT: or a0, a5, a0
; RV32IZHINX-NEXT: and a0, a3, a0
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -2309,12 +2309,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; RV32IZDINXZHINX-NEXT: addi a2, a3, -1
; RV32IZDINXZHINX-NEXT: .LBB10_4: # %start
; RV32IZDINXZHINX-NEXT: feq.s a3, s0, s0
-; RV32IZDINXZHINX-NEXT: neg a4, a1
-; RV32IZDINXZHINX-NEXT: neg a1, s1
+; RV32IZDINXZHINX-NEXT: neg a4, s1
+; RV32IZDINXZHINX-NEXT: neg a5, a1
; RV32IZDINXZHINX-NEXT: neg a3, a3
-; RV32IZDINXZHINX-NEXT: and a0, a1, a0
+; RV32IZDINXZHINX-NEXT: and a0, a4, a0
; RV32IZDINXZHINX-NEXT: and a1, a3, a2
-; RV32IZDINXZHINX-NEXT: or a0, a4, a0
+; RV32IZDINXZHINX-NEXT: or a0, a5, a0
; RV32IZDINXZHINX-NEXT: and a0, a3, a0
; RV32IZDINXZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZDINXZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -2653,12 +2653,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; CHECK32-IZHINXMIN-NEXT: addi a2, a3, -1
; CHECK32-IZHINXMIN-NEXT: .LBB10_4: # %start
; CHECK32-IZHINXMIN-NEXT: feq.s a3, s0, s0
-; CHECK32-IZHINXMIN-NEXT: neg a4, a1
-; CHECK32-IZHINXMIN-NEXT: neg a1, s1
+; CHECK32-IZHINXMIN-NEXT: neg a4, s1
+; CHECK32-IZHINXMIN-NEXT: neg a5, a1
; CHECK32-IZHINXMIN-NEXT: neg a3, a3
-; CHECK32-IZHINXMIN-NEXT: and a0, a1, a0
+; CHECK32-IZHINXMIN-NEXT: and a0, a4, a0
; CHECK32-IZHINXMIN-NEXT: and a1, a3, a2
-; CHECK32-IZHINXMIN-NEXT: or a0, a4, a0
+; CHECK32-IZHINXMIN-NEXT: or a0, a5, a0
; CHECK32-IZHINXMIN-NEXT: and a0, a3, a0
; CHECK32-IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK32-IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -2701,12 +2701,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; CHECK32-IZDINXZHINXMIN-NEXT: addi a2, a3, -1
; CHECK32-IZDINXZHINXMIN-NEXT: .LBB10_4: # %start
; CHECK32-IZDINXZHINXMIN-NEXT: feq.s a3, s0, s0
-; CHECK32-IZDINXZHINXMIN-NEXT: neg a4, a1
-; CHECK32-IZDINXZHINXMIN-NEXT: neg a1, s1
+; CHECK32-IZDINXZHINXMIN-NEXT: neg a4, s1
+; CHECK32-IZDINXZHINXMIN-NEXT: neg a5, a1
; CHECK32-IZDINXZHINXMIN-NEXT: neg a3, a3
-; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a1, a0
+; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a4, a0
; CHECK32-IZDINXZHINXMIN-NEXT: and a1, a3, a2
-; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a4, a0
+; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a5, a0
; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a3, a0
; CHECK32-IZDINXZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK32-IZDINXZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -2972,18 +2972,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT: fcvt.s.h a0, a0
-; RV32IZHINX-NEXT: lui a1, 391168
-; RV32IZHINX-NEXT: addi a1, a1, -1
-; RV32IZHINX-NEXT: fle.s a2, zero, a0
-; RV32IZHINX-NEXT: flt.s a1, a1, a0
-; RV32IZHINX-NEXT: neg s0, a1
-; RV32IZHINX-NEXT: neg s1, a2
+; RV32IZHINX-NEXT: fcvt.s.h s0, a0
+; RV32IZHINX-NEXT: fle.s a0, zero, s0
+; RV32IZHINX-NEXT: neg s1, a0
+; RV32IZHINX-NEXT: mv a0, s0
; RV32IZHINX-NEXT: call __fixunssfdi
; RV32IZHINX-NEXT: and a0, s1, a0
+; RV32IZHINX-NEXT: lui a2, 391168
; RV32IZHINX-NEXT: and a1, s1, a1
-; RV32IZHINX-NEXT: or a0, s0, a0
-; RV32IZHINX-NEXT: or a1, s0, a1
+; RV32IZHINX-NEXT: addi a2, a2, -1
+; RV32IZHINX-NEXT: flt.s a2, a2, s0
+; RV32IZHINX-NEXT: neg a2, a2
+; RV32IZHINX-NEXT: or a0, a2, a0
+; RV32IZHINX-NEXT: or a1, a2, a1
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -3005,18 +3006,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32IZDINXZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZDINXZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZDINXZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZDINXZHINX-NEXT: fcvt.s.h a0, a0
-; RV32IZDINXZHINX-NEXT: lui a1, 391168
-; RV32IZDINXZHINX-NEXT: addi a1, a1, -1
-; RV32IZDINXZHINX-NEXT: fle.s a2, zero, a0
-; RV32IZDINXZHINX-NEXT: flt.s a1, a1, a0
-; RV32IZDINXZHINX-NEXT: neg s0, a1
-; RV32IZDINXZHINX-NEXT: neg s1, a2
+; RV32IZDINXZHINX-NEXT: fcvt.s.h s0, a0
+; RV32IZDINXZHINX-NEXT: fle.s a0, zero, s0
+; RV32IZDINXZHINX-NEXT: neg s1, a0
+; RV32IZDINXZHINX-NEXT: mv a0, s0
; RV32IZDINXZHINX-NEXT: call __fixunssfdi
; RV32IZDINXZHINX-NEXT: and a0, s1, a0
+; RV32IZDINXZHINX-NEXT: lui a2, 391168
; RV32IZDINXZHINX-NEXT: and a1, s1, a1
-; RV32IZDINXZHINX-NEXT: or a0, s0, a0
-; RV32IZDINXZHINX-NEXT: or a1, s0, a1
+; RV32IZDINXZHINX-NEXT: addi a2, a2, -1
+; RV32IZDINXZHINX-NEXT: flt.s a2, a2, s0
+; RV32IZDINXZHINX-NEXT: neg a2, a2
+; RV32IZDINXZHINX-NEXT: or a0, a2, a0
+; RV32IZDINXZHINX-NEXT: or a1, a2, a1
; RV32IZDINXZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZDINXZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZDINXZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -3217,18 +3219,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; CHECK32-IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; CHECK32-IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; CHECK32-IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; CHECK32-IZHINXMIN-NEXT: fcvt.s.h a0, a0
-; CHECK32-IZHINXMIN-NEXT: lui a1, 391168
-; CHECK32-IZHINXMIN-NEXT: addi a1, a1, -1
-; CHECK32-IZHINXMIN-NEXT: fle.s a2, zero, a0
-; CHECK32-IZHINXMIN-NEXT: flt.s a1, a1, a0
-; CHECK32-IZHINXMIN-NEXT: neg s0, a1
-; CHECK32-IZHINXMIN-NEXT: neg s1, a2
+; CHECK32-IZHINXMIN-NEXT: fcvt.s.h s0, a0
+; CHECK32-IZHINXMIN-NEXT: fle.s a0, zero, s0
+; CHECK32-IZHINXMIN-NEXT: neg s1, a0
+; CHECK32-IZHINXMIN-NEXT: mv a0, s0
; CHECK32-IZHINXMIN-NEXT: call __fixunssfdi
; CHECK32-IZHINXMIN-NEXT: and a0, s1, a0
+; CHECK32-IZHINXMIN-NEXT: lui a2, 391168
; CHECK32-IZHINXMIN-NEXT: and a1, s1, a1
-; CHECK32-IZHINXMIN-NEXT: or a0, s0, a0
-; CHECK32-IZHINXMIN-NEXT: or a1, s0, a1
+; CHECK32-IZHINXMIN-NEXT: addi a2, a2, -1
+; CHECK32-IZHINXMIN-NEXT: flt.s a2, a2, s0
+; CHECK32-IZHINXMIN-NEXT: neg a2, a2
+; CHECK32-IZHINXMIN-NEXT: or a0, a2, a0
+; CHECK32-IZHINXMIN-NEXT: or a1, a2, a1
; CHECK32-IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK32-IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; CHECK32-IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -3251,18 +3254,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; CHECK32-IZDINXZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; CHECK32-IZDINXZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; CHECK32-IZDINXZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; CHECK32-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0
-; CHECK32-IZDINXZHINXMIN-NEXT: lui a1, 391168
-; CHECK32-IZDINXZHINXMIN-NEXT: addi a1, a1, -1
-; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a2, zero, a0
-; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a1, a1, a0
-; CHECK32-IZDINXZHINXMIN-NEXT: neg s0, a1
-; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a2
+; CHECK32-IZDINXZHINXMIN-NEXT: fcvt.s.h s0, a0
+; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a0, zero, s0
+; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a0
+; CHECK32-IZDINXZHINXMIN-NEXT: mv a0, s0
; CHECK32-IZDINXZHINXMIN-NEXT: call __fixunssfdi
; CHECK32-IZDINXZHINXMIN-NEXT: and a0, s1, a0
+; CHECK32-IZDINXZHINXMIN-NEXT: lui a2, 391168
; CHECK32-IZDINXZHINXMIN-NEXT: and a1, s1, a1
-; CHECK32-IZDINXZHINXMIN-NEXT: or a0, s0, a0
-; CHECK32-IZDINXZHINXMIN-NEXT: or a1, s0, a1
+; CHECK32-IZDINXZHINXMIN-NEXT: addi a2, a2, -1
+; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a2, a2, s0
+; CHECK32-IZDINXZHINXMIN-NEXT: neg a2, a2
+; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a2, a0
+; CHECK32-IZDINXZHINXMIN-NEXT: or a1, a2, a1
; CHECK32-IZDINXZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK32-IZDINXZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; CHECK32-IZDINXZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
index 1346414..ae5b52d 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -2,7 +2,12 @@
# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
+# RUN: -mattr=+add-load-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \
+# RUN: -mattr=+zba,+shxadd-load-fusion \
# RUN: | FileCheck %s
+# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
+# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
+# RUN: -mattr=+zba,+bfext-fusion | FileCheck --check-prefixes=CHECK-BFEXT %s
# CHECK: lui_addi:%bb.0
# CHECK: Macro fuse: {{.*}}LUI - ADDI
@@ -174,3 +179,1374 @@ body: |
$x11 = COPY %5
PseudoRET
...
+
+# CHECK: add_lb
+# CHECK: Macro fuse: {{.*}}ADD - LB
+---
+name: add_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: add_lh
+# CHECK: Macro fuse: {{.*}}ADD - LH
+---
+name: add_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: add_lw
+# CHECK: Macro fuse: {{.*}}ADD - LW
+---
+name: add_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: add_lbu
+# CHECK: Macro fuse: {{.*}}ADD - LBU
+---
+name: add_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: add_lhu
+# CHECK: Macro fuse: {{.*}}ADD - LHU
+---
+name: add_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: add_lwu
+# CHECK: Macro fuse: {{.*}}ADD - LWU
+---
+name: add_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: auipc_lb
+# CHECK: Macro fuse: {{.*}}AUIPC - LB
+---
+name: auipc_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LB %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lh
+# CHECK: Macro fuse: {{.*}}AUIPC - LH
+---
+name: auipc_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LH %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lw
+# CHECK: Macro fuse: {{.*}}AUIPC - LW
+---
+name: auipc_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LW %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_ld
+# CHECK: Macro fuse: {{.*}}AUIPC - LD
+---
+name: auipc_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LD %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lbu
+# CHECK: Macro fuse: {{.*}}AUIPC - LBU
+---
+name: auipc_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LBU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lhu
+# CHECK: Macro fuse: {{.*}}AUIPC - LHU
+---
+name: auipc_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LHU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: auipc_lwu
+# CHECK: Macro fuse: {{.*}}AUIPC - LWU
+---
+name: auipc_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = AUIPC 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LWU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lb
+# CHECK: Macro fuse: {{.*}}LUI - LB
+---
+name: lui_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LB %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lh
+# CHECK: Macro fuse: {{.*}}LUI - LH
+---
+name: lui_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LH %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lw
+# CHECK: Macro fuse: {{.*}}LUI - LW
+---
+name: lui_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LW %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_ld
+# CHECK: Macro fuse: {{.*}}LUI - LD
+---
+name: lui_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LD %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lbu
+# CHECK: Macro fuse: {{.*}}LUI - LBU
+---
+name: lui_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LBU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lhu
+# CHECK: Macro fuse: {{.*}}LUI - LHU
+---
+name: lui_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LHU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: lui_lwu
+# CHECK: Macro fuse: {{.*}}LUI - LWU
+---
+name: lui_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = LUI 1
+ %3:gpr = XORI %1, 2
+ %4:gpr = LWU %2, 4
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK-BFEXT: bitfield_extract
+# CHECK-BFEXT: Macro fuse: {{.*}}SLLI - SRLI
+---
+name: bitfield_extract
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10
+ %1:gpr = COPY $x10
+ %2:gpr = SLLI %1, 31
+ %3:gpr = XORI %1, 3
+ %4:gpr = SRLI %2, 48
+ $x10 = COPY %3
+ $x11 = COPY %4
+ PseudoRET
+...
+
+# CHECK: addi_lb
+# CHECK: Macro fuse: {{.*}}ADDI - LB
+---
+name: addi_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lh
+# CHECK: Macro fuse: {{.*}}ADDI - LH
+---
+name: addi_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lw
+# CHECK: Macro fuse: {{.*}}ADDI - LW
+---
+name: addi_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_ld
+# CHECK: Macro fuse: {{.*}}ADDI - LD
+---
+name: addi_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lbu
+# CHECK: Macro fuse: {{.*}}ADDI - LBU
+---
+name: addi_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lhu
+# CHECK: Macro fuse: {{.*}}ADDI - LHU
+---
+name: addi_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: addi_lwu
+# CHECK: Macro fuse: {{.*}}ADDI - LWU
+---
+name: addi_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADDI %1, 8
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lb
+# CHECK: Macro fuse: {{.*}}ADD_UW - LB
+---
+name: adduw_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lh
+# CHECK: Macro fuse: {{.*}}ADD_UW - LH
+---
+name: adduw_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lw
+# CHECK: Macro fuse: {{.*}}ADD_UW - LW
+---
+name: adduw_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_ld
+# CHECK: Macro fuse: {{.*}}ADD_UW - LD
+---
+name: adduw_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lbu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LBU
+---
+name: adduw_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lhu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LHU
+---
+name: adduw_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: adduw_lwu
+# CHECK: Macro fuse: {{.*}}ADD_UW - LWU
+---
+name: adduw_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 0
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lb
+# CHECK: Macro fuse: {{.*}}SH1ADD - LB
+---
+name: sh1add_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lb
+# CHECK: Macro fuse: {{.*}}SH2ADD - LB
+---
+name: sh2add_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lb
+# CHECK: Macro fuse: {{.*}}SH3ADD - LB
+---
+name: sh3add_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lh
+# CHECK: Macro fuse: {{.*}}SH1ADD - LH
+---
+name: sh1add_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lh
+# CHECK: Macro fuse: {{.*}}SH2ADD - LH
+---
+name: sh2add_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lh
+# CHECK: Macro fuse: {{.*}}SH3ADD - LH
+---
+name: sh3add_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lw
+# CHECK: Macro fuse: {{.*}}SH1ADD - LW
+---
+name: sh1add_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lw
+# CHECK: Macro fuse: {{.*}}SH2ADD - LW
+---
+name: sh2add_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lw
+# CHECK: Macro fuse: {{.*}}SH3ADD - LW
+---
+name: sh3add_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_ld
+# CHECK: Macro fuse: {{.*}}SH1ADD - LD
+---
+name: sh1add_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_ld
+# CHECK: Macro fuse: {{.*}}SH2ADD - LD
+---
+name: sh2add_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_ld
+# CHECK: Macro fuse: {{.*}}SH3ADD - LD
+---
+name: sh3add_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lbu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LBU
+---
+name: sh1add_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lbu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LBU
+---
+name: sh2add_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lbu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LBU
+---
+name: sh3add_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lhu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LHU
+---
+name: sh1add_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lhu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LHU
+---
+name: sh2add_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lhu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LHU
+---
+name: sh3add_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1add_lwu
+# CHECK: Macro fuse: {{.*}}SH1ADD - LWU
+---
+name: sh1add_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2add_lwu
+# CHECK: Macro fuse: {{.*}}SH2ADD - LWU
+---
+name: sh2add_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3add_lwu
+# CHECK: Macro fuse: {{.*}}SH3ADD - LWU
+---
+name: sh3add_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lb
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LB
+---
+name: sh1adduw_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lb
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LB
+---
+name: sh2adduw_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lb
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LB
+---
+name: sh3adduw_lb
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LB %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lh
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LH
+---
+name: sh1adduw_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lh
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LH
+---
+name: sh2adduw_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lh
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LH
+---
+name: sh3adduw_lh
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LH %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lw
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LW
+---
+name: sh1adduw_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lw
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LW
+---
+name: sh2adduw_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lw
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LW
+---
+name: sh3adduw_lw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LW %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_ld
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LD
+---
+name: sh1adduw_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_ld
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LD
+---
+name: sh2adduw_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_ld
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LD
+---
+name: sh3adduw_ld
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LD %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LBU
+---
+name: sh1adduw_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LBU
+---
+name: sh2adduw_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lbu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LBU
+---
+name: sh3adduw_lbu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LBU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LHU
+---
+name: sh1adduw_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LHU
+---
+name: sh2adduw_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lhu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LHU
+---
+name: sh3adduw_lhu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LHU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh1adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LWU
+---
+name: sh1adduw_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH1ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh2adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LWU
+---
+name: sh2adduw_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH2ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
+
+# CHECK: sh3adduw_lwu
+# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LWU
+---
+name: sh3adduw_lwu
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ %1:gpr = COPY $x10
+ %2:gpr = COPY $x11
+ %3:gpr = SH3ADD_UW %1, %2
+ %4:gpr = XORI %2, 3
+ %5:gpr = LWU %3, 8
+ $x10 = COPY %4
+ $x11 = COPY %5
+ PseudoRET
+...
diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
index 160f0ae..abdc1ba 100644
--- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -1,17 +1,42 @@
; REQUIRES: asserts
-; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \
+;
+; Disable all misched clustering
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
-; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
+;
+; ST misched clustering only
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+;
+; LD misched clustering only
; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-store-clustering \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
-
+;
+; Default misched cluster settings (i.e. both LD and ST clustering)
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
define i32 @load_clustering_1(ptr nocapture %p) {
; NOCLUSTER: ********** MI Scheduling **********
@@ -22,6 +47,14 @@ define i32 @load_clustering_1(ptr nocapture %p) {
; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
;
+; STCLUSTER: ********** MI Scheduling **********
+; STCLUSTER-LABEL: load_clustering_1:%bb.0
+; STCLUSTER: *** Final schedule for %bb.0 ***
+; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
; LDCLUSTER: ********** MI Scheduling **********
; LDCLUSTER-LABEL: load_clustering_1:%bb.0
; LDCLUSTER: *** Final schedule for %bb.0 ***
@@ -29,6 +62,14 @@ define i32 @load_clustering_1(ptr nocapture %p) {
; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
+; DEFAULTCLUSTER: ********** MI Scheduling **********
+; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0
+; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
+; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
entry:
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
%val0 = load i32, ptr %arrayidx0
diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
index 21398d3..01960f9 100644
--- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
+++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
@@ -1,10 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \
-# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN: -mattr=+disable-postmisched-load-clustering \
+# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
-# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN: -mattr=+disable-postmisched-load-clustering \
+# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOCLUSTER %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
diff --git a/llvm/test/CodeGen/RISCV/misched-store-clustering.ll b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll
new file mode 100644
index 0000000..02e853d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll
@@ -0,0 +1,83 @@
+; REQUIRES: asserts
+;
+; Disable all misched clustering
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=NOCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=NOCLUSTER %s
+;
+; ST misched clustering only
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-load-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+;
+; LD misched clustering only
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -mattr=+disable-misched-store-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=LDCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -mattr=+disable-misched-store-clustering \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=LDCLUSTER %s
+;
+; Default misched cluster settings (i.e. both LD and ST clustering)
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
+
+define i32 @store_clustering_1(ptr nocapture %p, i32 %v) {
+; NOCLUSTER: ********** MI Scheduling **********
+; NOCLUSTER-LABEL: store_clustering_1:%bb.0
+; NOCLUSTER: *** Final schedule for %bb.0 ***
+; NOCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
+; NOCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
+; NOCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
+; NOCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
+;
+; STCLUSTER: ********** MI Scheduling **********
+; STCLUSTER-LABEL: store_clustering_1:%bb.0
+; STCLUSTER: *** Final schedule for %bb.0 ***
+; STCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
+; STCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
+; STCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
+; STCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
+;
+; LDCLUSTER: ********** MI Scheduling **********
+; LDCLUSTER-LABEL: store_clustering_1:%bb.0
+; LDCLUSTER: *** Final schedule for %bb.0 ***
+; LDCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
+; LDCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
+; LDCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
+; LDCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
+;
+; DEFAULTCLUSTER: ********** MI Scheduling **********
+; DEFAULTCLUSTER-LABEL: store_clustering_1:%bb.0
+; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
+; DEFAULTCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
+; DEFAULTCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
+; DEFAULTCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
+; DEFAULTCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
+entry:
+ %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
+ store i32 %v, ptr %arrayidx0
+ %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2
+ store i32 %v, ptr %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1
+ store i32 %v, ptr %arrayidx2
+ %arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4
+ store i32 %v, ptr %arrayidx3
+ ret i32 %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbkb.ll
index 7ebbd78..42d326e 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbkb.ll
@@ -350,10 +350,43 @@ define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2,
ret i32 %j
}
+define i32 @pack_lo_packh_hi_packh_2(i8 %0, i8 %1, i8 %2, i8 %3) nounwind {
+; RV32I-LABEL: pack_lo_packh_hi_packh_2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: zext.b a0, a0
+; RV32I-NEXT: zext.b a1, a1
+; RV32I-NEXT: zext.b a2, a2
+; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: slli a2, a2, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV32ZBKB-LABEL: pack_lo_packh_hi_packh_2:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: packh a0, a0, a1
+; RV32ZBKB-NEXT: packh a1, a2, a3
+; RV32ZBKB-NEXT: pack a0, a0, a1
+; RV32ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %h, %f
+ %j = or i32 %i, %g
+ ret i32 %j
+}
+
define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) nounwind {
; RV32I-LABEL: pack_lo_zext_hi_packh:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a1, a2, 16
+; RV32I-NEXT: slli a1, a1, 16
; RV32I-NEXT: slli a2, a2, 24
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: or a0, a1, a0
@@ -361,14 +394,14 @@ define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2)
;
; RV32ZBKB-LABEL: pack_lo_zext_hi_packh:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: packh a1, a2, a2
+; RV32ZBKB-NEXT: packh a1, a1, a2
; RV32ZBKB-NEXT: pack a0, a0, a1
; RV32ZBKB-NEXT: ret
%a = zext i16 %0 to i32
%b = zext i8 %1 to i32
%c = zext i8 %2 to i32
%d = shl i32 %c, 8
- %e = or i32 %c, %d
+ %e = or i32 %b, %d
%f = shl i32 %e, 16
%g = or i32 %f, %a
ret i32 %g
@@ -379,7 +412,7 @@ define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2)
define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwind {
; RV32I-LABEL: pack_lo_noext_hi_packh:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a1, a2, 16
+; RV32I-NEXT: slli a1, a1, 16
; RV32I-NEXT: slli a2, a2, 24
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: or a0, a1, a0
@@ -387,14 +420,40 @@ define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwin
;
; RV32ZBKB-LABEL: pack_lo_noext_hi_packh:
; RV32ZBKB: # %bb.0:
-; RV32ZBKB-NEXT: packh a1, a2, a2
+; RV32ZBKB-NEXT: packh a1, a1, a2
+; RV32ZBKB-NEXT: slli a1, a1, 16
+; RV32ZBKB-NEXT: or a0, a1, a0
+; RV32ZBKB-NEXT: ret
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = shl i32 %c, 8
+ %e = or i32 %b, %d
+ %f = shl i32 %e, 16
+ %g = or i32 %f, %a
+ ret i32 %g
+}
+
+; Make sure we can match packh+slli without having the input bytes zero extended.
+define i32 @pack_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2) nounwind {
+; RV32I-LABEL: pack_lo_noext_hi_packh_nozeroext:
+; RV32I: # %bb.0:
+; RV32I-NEXT: zext.b a1, a1
+; RV32I-NEXT: slli a2, a2, 24
+; RV32I-NEXT: slli a1, a1, 16
+; RV32I-NEXT: or a0, a2, a0
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32ZBKB-LABEL: pack_lo_noext_hi_packh_nozeroext:
+; RV32ZBKB: # %bb.0:
+; RV32ZBKB-NEXT: packh a1, a1, a2
; RV32ZBKB-NEXT: slli a1, a1, 16
; RV32ZBKB-NEXT: or a0, a1, a0
; RV32ZBKB-NEXT: ret
%b = zext i8 %1 to i32
%c = zext i8 %2 to i32
%d = shl i32 %c, 8
- %e = or i32 %c, %d
+ %e = or i32 %b, %d
%f = shl i32 %e, 16
%g = or i32 %f, %a
ret i32 %g
diff --git a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll
index 57061e1..f89d1abf 100644
--- a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll
@@ -253,8 +253,8 @@ define i128 @fptosi_sat_f16_to_i128(half %a) nounwind {
; RV64IZHINX-NEXT: srli a1, a2, 1
; RV64IZHINX-NEXT: .LBB4_4:
; RV64IZHINX-NEXT: feq.s a2, s0, s0
-; RV64IZHINX-NEXT: neg a3, a3
; RV64IZHINX-NEXT: neg a4, s1
+; RV64IZHINX-NEXT: neg a3, a3
; RV64IZHINX-NEXT: neg a2, a2
; RV64IZHINX-NEXT: and a0, a4, a0
; RV64IZHINX-NEXT: and a1, a2, a1
@@ -334,18 +334,19 @@ define i128 @fptoui_sat_f16_to_i128(half %a) nounwind {
; RV64IZHINX-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64IZHINX-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64IZHINX-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64IZHINX-NEXT: fcvt.s.h a0, a0
-; RV64IZHINX-NEXT: lui a1, 522240
-; RV64IZHINX-NEXT: addi a1, a1, -1
-; RV64IZHINX-NEXT: fle.s a2, zero, a0
-; RV64IZHINX-NEXT: flt.s a1, a1, a0
-; RV64IZHINX-NEXT: neg s0, a1
-; RV64IZHINX-NEXT: neg s1, a2
+; RV64IZHINX-NEXT: fcvt.s.h s0, a0
+; RV64IZHINX-NEXT: fle.s a0, zero, s0
+; RV64IZHINX-NEXT: neg s1, a0
+; RV64IZHINX-NEXT: mv a0, s0
; RV64IZHINX-NEXT: call __fixunssfti
; RV64IZHINX-NEXT: and a0, s1, a0
+; RV64IZHINX-NEXT: lui a2, 522240
; RV64IZHINX-NEXT: and a1, s1, a1
-; RV64IZHINX-NEXT: or a0, s0, a0
-; RV64IZHINX-NEXT: or a1, s0, a1
+; RV64IZHINX-NEXT: addi a2, a2, -1
+; RV64IZHINX-NEXT: flt.s a2, a2, s0
+; RV64IZHINX-NEXT: neg a2, a2
+; RV64IZHINX-NEXT: or a0, a2, a0
+; RV64IZHINX-NEXT: or a1, a2, a1
; RV64IZHINX-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64IZHINX-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64IZHINX-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
index 818ea72..4537d18 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
@@ -392,3 +392,247 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind {
%1 = zext i16 %a to i64
ret i64 %1
}
+
+define void @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_packh_hi_packh:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: slli a2, a2, 16
+; RV64I-NEXT: slli a3, a3, 24
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: sw a0, 0(a4)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_packh_hi_packh:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a0, a0, a1
+; RV64ZBKB-NEXT: packh a1, a2, a3
+; RV64ZBKB-NEXT: packw a0, a0, a1
+; RV64ZBKB-NEXT: sw a0, 0(a4)
+; RV64ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %h, %f
+ %j = or i32 %i, %g
+ store i32 %j, ptr %p
+ ret void
+}
+
+define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_packh_hi_packh_2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: slli a2, a2, 16
+; RV64I-NEXT: slli a3, a3, 24
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: sw a0, 0(a4)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a0, a0, a1
+; RV64ZBKB-NEXT: packh a1, a3, a2
+; RV64ZBKB-NEXT: packw a0, a0, a1
+; RV64ZBKB-NEXT: sw a0, 0(a4)
+; RV64ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %g, %h
+ %j = or i32 %f, %i
+ store i32 %j, ptr %p
+ ret void
+}
+
+define void @pack_lo_packh_hi_packh_3(i8 %0, i8 %1, i8 %2, i8 %3, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_packh_hi_packh_3:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a0, a0
+; RV64I-NEXT: zext.b a1, a1
+; RV64I-NEXT: zext.b a2, a2
+; RV64I-NEXT: slli a3, a3, 24
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: slli a2, a2, 16
+; RV64I-NEXT: or a0, a3, a0
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: sw a0, 0(a4)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_3:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a0, a0, a1
+; RV64ZBKB-NEXT: packh a1, a3, a2
+; RV64ZBKB-NEXT: packw a0, a0, a1
+; RV64ZBKB-NEXT: sw a0, 0(a4)
+; RV64ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %g, %h
+ %j = or i32 %f, %i
+ store i32 %j, ptr %p
+ ret void
+}
+
+define i32 @pack_lo_packh_hi_packh_4(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_packh_hi_packh_4:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: slli a2, a2, 16
+; RV64I-NEXT: slliw a3, a3, 24
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a2, a2, a3
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_4:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a0, a0, a1
+; RV64ZBKB-NEXT: packh a1, a3, a2
+; RV64ZBKB-NEXT: packw a0, a0, a1
+; RV64ZBKB-NEXT: ret
+ %a = zext i8 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = zext i8 %3 to i32
+ %e = shl i32 %b, 8
+ %f = shl i32 %c, 16
+ %g = shl i32 %d, 24
+ %h = or i32 %a, %e
+ %i = or i32 %h, %f
+ %j = or i32 %i, %g
+ ret i32 %j
+}
+
+define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_zext_hi_packh:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: slli a2, a2, 24
+; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: sw a0, 0(a3)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_zext_hi_packh:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a1, a1, a2
+; RV64ZBKB-NEXT: packw a0, a0, a1
+; RV64ZBKB-NEXT: sw a0, 0(a3)
+; RV64ZBKB-NEXT: ret
+ %a = zext i16 %0 to i32
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = shl i32 %c, 8
+ %e = or i32 %b, %d
+ %f = shl i32 %e, 16
+ %g = or i32 %f, %a
+ store i32 %g, ptr %p
+ ret void
+}
+
+; Negative test, %a isn't extended so we can't use packw for the outer or, but
+; we can use packh for the high half.
+define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind {
+; RV64I-LABEL: pack_lo_noext_hi_packh:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: slli a2, a2, 24
+; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: sw a0, 0(a3)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_lo_noext_hi_packh:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a1, a1, a2
+; RV64ZBKB-NEXT: slli a1, a1, 16
+; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: sw a0, 0(a3)
+; RV64ZBKB-NEXT: ret
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = shl i32 %c, 8
+ %e = or i32 %b, %d
+ %f = shl i32 %e, 16
+ %g = or i32 %f, %a
+ store i32 %g, ptr %p
+ ret void
+}
+
+; Make sure we can match packh+slli without having the input bytes zero extended.
+define void @pack_i32_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2, ptr %p) nounwind {
+; RV64I-LABEL: pack_i32_lo_noext_hi_packh_nozeroext:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a1, a1
+; RV64I-NEXT: slli a2, a2, 24
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: sw a0, 0(a3)
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_i32_lo_noext_hi_packh_nozeroext:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a1, a1, a2
+; RV64ZBKB-NEXT: slli a1, a1, 16
+; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: sw a0, 0(a3)
+; RV64ZBKB-NEXT: ret
+ %b = zext i8 %1 to i32
+ %c = zext i8 %2 to i32
+ %d = shl i32 %c, 8
+ %e = or i32 %b, %d
+ %f = shl i32 %e, 16
+ %g = or i32 %f, %a
+ store i32 %g, ptr %p
+ ret void
+}
+
+; Make sure we can match packh+slli without having the input bytes zero extended.
+define i64 @pack_i64_lo_noext_hi_packh_nozeroext(i64 %a, i8 %1, i8 %2, ptr %p) nounwind {
+; RV64I-LABEL: pack_i64_lo_noext_hi_packh_nozeroext:
+; RV64I: # %bb.0:
+; RV64I-NEXT: zext.b a1, a1
+; RV64I-NEXT: zext.b a2, a2
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: slli a2, a2, 24
+; RV64I-NEXT: or a1, a2, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBKB-LABEL: pack_i64_lo_noext_hi_packh_nozeroext:
+; RV64ZBKB: # %bb.0:
+; RV64ZBKB-NEXT: packh a1, a1, a2
+; RV64ZBKB-NEXT: slli a1, a1, 16
+; RV64ZBKB-NEXT: or a0, a1, a0
+; RV64ZBKB-NEXT: ret
+ %b = zext i8 %1 to i64
+ %c = zext i8 %2 to i64
+ %d = shl i64 %c, 8
+ %e = or i64 %b, %d
+ %f = shl i64 %e, 16
+ %g = or i64 %f, %a
+ ret i64 %g
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll
new file mode 100644
index 0000000..abf2894
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+define void @store_factor2(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vssseg2e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ call void @llvm.riscv.sseg2.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret void
+}
+
+define void @store_factor3(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vssseg3e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ call void @llvm.riscv.sseg3.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret void
+}
+
+define void @store_factor4(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vssseg4e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ call void @llvm.riscv.sseg4.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret void
+}
+
+define void @store_factor5(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vssseg5e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ call void @llvm.riscv.sseg5.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret void
+}
+
+define void @store_factor6(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vssseg6e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ call void @llvm.riscv.sseg6.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret void
+}
+
+define void @store_factor7(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vssseg7e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ call void @llvm.riscv.sseg7.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret void
+}
+
+define void @store_factor8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, i64 %stride) {
+; CHECK-LABEL: store_factor8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vssseg8e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ call void @llvm.riscv.sseg8.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index 7990dfc..4c84304 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -366,8 +366,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64X60-NEXT: # => This Inner Loop Header: Depth=2
; RV64X60-NEXT: vl2r.v v8, (s2)
; RV64X60-NEXT: vl2r.v v10, (s3)
-; RV64X60-NEXT: sub s1, s1, t3
; RV64X60-NEXT: vaaddu.vv v8, v8, v10
+; RV64X60-NEXT: sub s1, s1, t3
; RV64X60-NEXT: vs2r.v v8, (s4)
; RV64X60-NEXT: add s4, s4, t3
; RV64X60-NEXT: add s3, s3, t3
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index c9c49e8..cb046cd 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -204,18 +204,16 @@ define i64 @load_i64(ptr %p) {
; RV64IZBKB-NEXT: lbu a2, 5(a0)
; RV64IZBKB-NEXT: lbu a3, 6(a0)
; RV64IZBKB-NEXT: lbu a4, 7(a0)
-; RV64IZBKB-NEXT: lbu a5, 0(a0)
-; RV64IZBKB-NEXT: lbu a6, 1(a0)
-; RV64IZBKB-NEXT: lbu a7, 2(a0)
-; RV64IZBKB-NEXT: lbu a0, 3(a0)
+; RV64IZBKB-NEXT: lbu a5, 1(a0)
+; RV64IZBKB-NEXT: lbu a6, 2(a0)
+; RV64IZBKB-NEXT: lbu a7, 3(a0)
+; RV64IZBKB-NEXT: lbu a0, 0(a0)
+; RV64IZBKB-NEXT: packh a3, a3, a4
; RV64IZBKB-NEXT: packh a1, a1, a2
-; RV64IZBKB-NEXT: packh a2, a3, a4
-; RV64IZBKB-NEXT: packh a3, a5, a6
-; RV64IZBKB-NEXT: packh a0, a7, a0
-; RV64IZBKB-NEXT: slli a2, a2, 16
-; RV64IZBKB-NEXT: slli a0, a0, 16
-; RV64IZBKB-NEXT: or a1, a2, a1
-; RV64IZBKB-NEXT: or a0, a0, a3
+; RV64IZBKB-NEXT: packh a2, a6, a7
+; RV64IZBKB-NEXT: packh a0, a0, a5
+; RV64IZBKB-NEXT: packw a1, a1, a3
+; RV64IZBKB-NEXT: packw a0, a0, a2
; RV64IZBKB-NEXT: pack a0, a0, a1
; RV64IZBKB-NEXT: ret
;