diff options
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
-rw-r--r-- | llvm/test/CodeGen/RISCV/features-info.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/half-convert.ll | 108 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/macro-fusions.mir | 1376 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/misched-load-clustering.ll | 47 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/misched-mem-clustering.mir | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/misched-store-clustering.ll | 83 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rv32zbkb.ll | 71 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rv64-half-convert.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rv64zbkb.ll | 244 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll | 72 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/unaligned-load-store.ll | 20 |
12 files changed, 1975 insertions, 85 deletions
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index b94665b..fb53921 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -6,13 +6,21 @@ ; CHECK-NEXT: 32bit - Implements RV32. ; CHECK-NEXT: 64bit - Implements RV64. ; CHECK-NEXT: a - 'A' (Atomic Instructions). +; CHECK-NEXT: add-load-fusion - Enable ADD(.UW) + load macrofusion. +; CHECK-NEXT: addi-load-fusion - Enable ADDI + load macrofusion. ; CHECK-NEXT: andes45 - Andes 45-Series processors. ; CHECK-NEXT: auipc-addi-fusion - Enable AUIPC+ADDI macrofusion. +; CHECK-NEXT: auipc-load-fusion - Enable AUIPC + load macrofusion. ; CHECK-NEXT: b - 'B' (the collection of the Zba, Zbb, Zbs extensions). +; CHECK-NEXT: bfext-fusion - Enable SLLI+SRLI (bitfield extract) macrofusion. ; CHECK-NEXT: c - 'C' (Compressed Instructions). ; CHECK-NEXT: conditional-cmv-fusion - Enable branch+c.mv fusion. ; CHECK-NEXT: d - 'D' (Double-Precision Floating-Point). ; CHECK-NEXT: disable-latency-sched-heuristic - Disable latency scheduling heuristic. +; CHECK-NEXT: disable-misched-load-clustering - Disable load clustering in the machine scheduler. +; CHECK-NEXT: disable-misched-store-clustering - Disable store clustering in the machine scheduler. +; CHECK-NEXT: disable-postmisched-load-clustering - Disable PostRA load clustering in the machine scheduler. +; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler. ; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN. ; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs). ; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation). @@ -58,6 +66,7 @@ ; CHECK-NEXT: ld-add-fusion - Enable LD+ADD macrofusion. ; CHECK-NEXT: log-vrgather - Has vrgather.vv with LMUL*log2(LMUL) latency ; CHECK-NEXT: lui-addi-fusion - Enable LUI+ADDI macro fusion. +; CHECK-NEXT: lui-load-fusion - Enable LUI + load macrofusion. ; CHECK-NEXT: m - 'M' (Integer Multiplication and Division). ; CHECK-NEXT: mips-p8700 - MIPS p8700 processor. ; CHECK-NEXT: no-default-unroll - Disable default unroll preference.. @@ -130,6 +139,7 @@ ; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp). ; CHECK-NEXT: shvstvala - 'Shvstvala' (vstval provides all needed values). ; CHECK-NEXT: shvstvecd - 'Shvstvecd' (vstvec supports Direct mode). +; CHECK-NEXT: shxadd-load-fusion - Enable SH(1|2|3)ADD(.UW) + load macrofusion. ; CHECK-NEXT: sifive7 - SiFive 7-Series processors. ; CHECK-NEXT: smaia - 'Smaia' (Advanced Interrupt Architecture Machine Level). ; CHECK-NEXT: smcdeleg - 'Smcdeleg' (Counter Delegation Machine Level). diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index facb544..0c152e6 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -2262,12 +2262,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: addi a2, a3, -1 ; RV32IZHINX-NEXT: .LBB10_4: # %start ; RV32IZHINX-NEXT: feq.s a3, s0, s0 -; RV32IZHINX-NEXT: neg a4, a1 -; RV32IZHINX-NEXT: neg a1, s1 +; RV32IZHINX-NEXT: neg a4, s1 +; RV32IZHINX-NEXT: neg a5, a1 ; RV32IZHINX-NEXT: neg a3, a3 -; RV32IZHINX-NEXT: and a0, a1, a0 +; RV32IZHINX-NEXT: and a0, a4, a0 ; RV32IZHINX-NEXT: and a1, a3, a2 -; RV32IZHINX-NEXT: or a0, a4, a0 +; RV32IZHINX-NEXT: or a0, a5, a0 ; RV32IZHINX-NEXT: and a0, a3, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2309,12 +2309,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: addi a2, a3, -1 ; RV32IZDINXZHINX-NEXT: .LBB10_4: # %start ; RV32IZDINXZHINX-NEXT: feq.s a3, s0, s0 -; RV32IZDINXZHINX-NEXT: neg a4, a1 -; RV32IZDINXZHINX-NEXT: neg a1, s1 +; RV32IZDINXZHINX-NEXT: neg a4, s1 +; RV32IZDINXZHINX-NEXT: neg a5, a1 ; RV32IZDINXZHINX-NEXT: neg a3, a3 -; RV32IZDINXZHINX-NEXT: and a0, a1, a0 +; RV32IZDINXZHINX-NEXT: and a0, a4, a0 ; RV32IZDINXZHINX-NEXT: and a1, a3, a2 -; RV32IZDINXZHINX-NEXT: or a0, a4, a0 +; RV32IZDINXZHINX-NEXT: or a0, a5, a0 ; RV32IZDINXZHINX-NEXT: and a0, a3, a0 ; RV32IZDINXZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZDINXZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2653,12 +2653,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: addi a2, a3, -1 ; CHECK32-IZHINXMIN-NEXT: .LBB10_4: # %start ; CHECK32-IZHINXMIN-NEXT: feq.s a3, s0, s0 -; CHECK32-IZHINXMIN-NEXT: neg a4, a1 -; CHECK32-IZHINXMIN-NEXT: neg a1, s1 +; CHECK32-IZHINXMIN-NEXT: neg a4, s1 +; CHECK32-IZHINXMIN-NEXT: neg a5, a1 ; CHECK32-IZHINXMIN-NEXT: neg a3, a3 -; CHECK32-IZHINXMIN-NEXT: and a0, a1, a0 +; CHECK32-IZHINXMIN-NEXT: and a0, a4, a0 ; CHECK32-IZHINXMIN-NEXT: and a1, a3, a2 -; CHECK32-IZHINXMIN-NEXT: or a0, a4, a0 +; CHECK32-IZHINXMIN-NEXT: or a0, a5, a0 ; CHECK32-IZHINXMIN-NEXT: and a0, a3, a0 ; CHECK32-IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32-IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2701,12 +2701,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: addi a2, a3, -1 ; CHECK32-IZDINXZHINXMIN-NEXT: .LBB10_4: # %start ; CHECK32-IZDINXZHINXMIN-NEXT: feq.s a3, s0, s0 -; CHECK32-IZDINXZHINXMIN-NEXT: neg a4, a1 -; CHECK32-IZDINXZHINXMIN-NEXT: neg a1, s1 +; CHECK32-IZDINXZHINXMIN-NEXT: neg a4, s1 +; CHECK32-IZDINXZHINXMIN-NEXT: neg a5, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: neg a3, a3 -; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a1, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a4, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: and a1, a3, a2 -; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a4, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a5, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a3, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32-IZDINXZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2972,18 +2972,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: fcvt.s.h a0, a0 -; RV32IZHINX-NEXT: lui a1, 391168 -; RV32IZHINX-NEXT: addi a1, a1, -1 -; RV32IZHINX-NEXT: fle.s a2, zero, a0 -; RV32IZHINX-NEXT: flt.s a1, a1, a0 -; RV32IZHINX-NEXT: neg s0, a1 -; RV32IZHINX-NEXT: neg s1, a2 +; RV32IZHINX-NEXT: fcvt.s.h s0, a0 +; RV32IZHINX-NEXT: fle.s a0, zero, s0 +; RV32IZHINX-NEXT: neg s1, a0 +; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi ; RV32IZHINX-NEXT: and a0, s1, a0 +; RV32IZHINX-NEXT: lui a2, 391168 ; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a0, s0, a0 -; RV32IZHINX-NEXT: or a1, s0, a1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: flt.s a2, a2, s0 +; RV32IZHINX-NEXT: neg a2, a2 +; RV32IZHINX-NEXT: or a0, a2, a0 +; RV32IZHINX-NEXT: or a1, a2, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -3005,18 +3006,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZDINXZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZDINXZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZDINXZHINX-NEXT: fcvt.s.h a0, a0 -; RV32IZDINXZHINX-NEXT: lui a1, 391168 -; RV32IZDINXZHINX-NEXT: addi a1, a1, -1 -; RV32IZDINXZHINX-NEXT: fle.s a2, zero, a0 -; RV32IZDINXZHINX-NEXT: flt.s a1, a1, a0 -; RV32IZDINXZHINX-NEXT: neg s0, a1 -; RV32IZDINXZHINX-NEXT: neg s1, a2 +; RV32IZDINXZHINX-NEXT: fcvt.s.h s0, a0 +; RV32IZDINXZHINX-NEXT: fle.s a0, zero, s0 +; RV32IZDINXZHINX-NEXT: neg s1, a0 +; RV32IZDINXZHINX-NEXT: mv a0, s0 ; RV32IZDINXZHINX-NEXT: call __fixunssfdi ; RV32IZDINXZHINX-NEXT: and a0, s1, a0 +; RV32IZDINXZHINX-NEXT: lui a2, 391168 ; RV32IZDINXZHINX-NEXT: and a1, s1, a1 -; RV32IZDINXZHINX-NEXT: or a0, s0, a0 -; RV32IZDINXZHINX-NEXT: or a1, s0, a1 +; RV32IZDINXZHINX-NEXT: addi a2, a2, -1 +; RV32IZDINXZHINX-NEXT: flt.s a2, a2, s0 +; RV32IZDINXZHINX-NEXT: neg a2, a2 +; RV32IZDINXZHINX-NEXT: or a0, a2, a0 +; RV32IZDINXZHINX-NEXT: or a1, a2, a1 ; RV32IZDINXZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZDINXZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZDINXZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -3217,18 +3219,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; CHECK32-IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; CHECK32-IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; CHECK32-IZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECK32-IZHINXMIN-NEXT: lui a1, 391168 -; CHECK32-IZHINXMIN-NEXT: addi a1, a1, -1 -; CHECK32-IZHINXMIN-NEXT: fle.s a2, zero, a0 -; CHECK32-IZHINXMIN-NEXT: flt.s a1, a1, a0 -; CHECK32-IZHINXMIN-NEXT: neg s0, a1 -; CHECK32-IZHINXMIN-NEXT: neg s1, a2 +; CHECK32-IZHINXMIN-NEXT: fcvt.s.h s0, a0 +; CHECK32-IZHINXMIN-NEXT: fle.s a0, zero, s0 +; CHECK32-IZHINXMIN-NEXT: neg s1, a0 +; CHECK32-IZHINXMIN-NEXT: mv a0, s0 ; CHECK32-IZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZHINXMIN-NEXT: and a0, s1, a0 +; CHECK32-IZHINXMIN-NEXT: lui a2, 391168 ; CHECK32-IZHINXMIN-NEXT: and a1, s1, a1 -; CHECK32-IZHINXMIN-NEXT: or a0, s0, a0 -; CHECK32-IZHINXMIN-NEXT: or a1, s0, a1 +; CHECK32-IZHINXMIN-NEXT: addi a2, a2, -1 +; CHECK32-IZHINXMIN-NEXT: flt.s a2, a2, s0 +; CHECK32-IZHINXMIN-NEXT: neg a2, a2 +; CHECK32-IZHINXMIN-NEXT: or a0, a2, a0 +; CHECK32-IZHINXMIN-NEXT: or a1, a2, a1 ; CHECK32-IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32-IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; CHECK32-IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -3251,18 +3254,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; CHECK32-IZDINXZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; CHECK32-IZDINXZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; CHECK32-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: lui a1, 391168 -; CHECK32-IZDINXZHINXMIN-NEXT: addi a1, a1, -1 -; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a2, zero, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a1, a1, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: neg s0, a1 -; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a2 +; CHECK32-IZDINXZHINXMIN-NEXT: fcvt.s.h s0, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a0, zero, s0 +; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: mv a0, s0 ; CHECK32-IZDINXZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZDINXZHINXMIN-NEXT: and a0, s1, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: lui a2, 391168 ; CHECK32-IZDINXZHINXMIN-NEXT: and a1, s1, a1 -; CHECK32-IZDINXZHINXMIN-NEXT: or a0, s0, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: or a1, s0, a1 +; CHECK32-IZDINXZHINXMIN-NEXT: addi a2, a2, -1 +; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a2, a2, s0 +; CHECK32-IZDINXZHINXMIN-NEXT: neg a2, a2 +; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a2, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: or a1, a2, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32-IZDINXZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; CHECK32-IZDINXZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir index 1346414..ae5b52d 100644 --- a/llvm/test/CodeGen/RISCV/macro-fusions.mir +++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir @@ -2,7 +2,12 @@ # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \ # RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \ # RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \ +# RUN: -mattr=+add-load-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \ +# RUN: -mattr=+zba,+shxadd-load-fusion \ # RUN: | FileCheck %s +# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \ +# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \ +# RUN: -mattr=+zba,+bfext-fusion | FileCheck --check-prefixes=CHECK-BFEXT %s # CHECK: lui_addi:%bb.0 # CHECK: Macro fuse: {{.*}}LUI - ADDI @@ -174,3 +179,1374 @@ body: | $x11 = COPY %5 PseudoRET ... + +# CHECK: add_lb +# CHECK: Macro fuse: {{.*}}ADD - LB +--- +name: add_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lh +# CHECK: Macro fuse: {{.*}}ADD - LH +--- +name: add_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lw +# CHECK: Macro fuse: {{.*}}ADD - LW +--- +name: add_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lbu +# CHECK: Macro fuse: {{.*}}ADD - LBU +--- +name: add_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lhu +# CHECK: Macro fuse: {{.*}}ADD - LHU +--- +name: add_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lwu +# CHECK: Macro fuse: {{.*}}ADD - LWU +--- +name: add_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: auipc_lb +# CHECK: Macro fuse: {{.*}}AUIPC - LB +--- +name: auipc_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LB %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lh +# CHECK: Macro fuse: {{.*}}AUIPC - LH +--- +name: auipc_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LH %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lw +# CHECK: Macro fuse: {{.*}}AUIPC - LW +--- +name: auipc_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LW %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_ld +# CHECK: Macro fuse: {{.*}}AUIPC - LD +--- +name: auipc_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LD %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lbu +# CHECK: Macro fuse: {{.*}}AUIPC - LBU +--- +name: auipc_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LBU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lhu +# CHECK: Macro fuse: {{.*}}AUIPC - LHU +--- +name: auipc_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LHU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lwu +# CHECK: Macro fuse: {{.*}}AUIPC - LWU +--- +name: auipc_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LWU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lb +# CHECK: Macro fuse: {{.*}}LUI - LB +--- +name: lui_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LB %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lh +# CHECK: Macro fuse: {{.*}}LUI - LH +--- +name: lui_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LH %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lw +# CHECK: Macro fuse: {{.*}}LUI - LW +--- +name: lui_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LW %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_ld +# CHECK: Macro fuse: {{.*}}LUI - LD +--- +name: lui_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LD %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lbu +# CHECK: Macro fuse: {{.*}}LUI - LBU +--- +name: lui_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LBU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lhu +# CHECK: Macro fuse: {{.*}}LUI - LHU +--- +name: lui_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LHU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lwu +# CHECK: Macro fuse: {{.*}}LUI - LWU +--- +name: lui_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LWU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK-BFEXT: bitfield_extract +# CHECK-BFEXT: Macro fuse: {{.*}}SLLI - SRLI +--- +name: bitfield_extract +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = SLLI %1, 31 + %3:gpr = XORI %1, 3 + %4:gpr = SRLI %2, 48 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: addi_lb +# CHECK: Macro fuse: {{.*}}ADDI - LB +--- +name: addi_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lh +# CHECK: Macro fuse: {{.*}}ADDI - LH +--- +name: addi_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lw +# CHECK: Macro fuse: {{.*}}ADDI - LW +--- +name: addi_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_ld +# CHECK: Macro fuse: {{.*}}ADDI - LD +--- +name: addi_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lbu +# CHECK: Macro fuse: {{.*}}ADDI - LBU +--- +name: addi_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lhu +# CHECK: Macro fuse: {{.*}}ADDI - LHU +--- +name: addi_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lwu +# CHECK: Macro fuse: {{.*}}ADDI - LWU +--- +name: addi_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lb +# CHECK: Macro fuse: {{.*}}ADD_UW - LB +--- +name: adduw_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lh +# CHECK: Macro fuse: {{.*}}ADD_UW - LH +--- +name: adduw_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lw +# CHECK: Macro fuse: {{.*}}ADD_UW - LW +--- +name: adduw_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_ld +# CHECK: Macro fuse: {{.*}}ADD_UW - LD +--- +name: adduw_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lbu +# CHECK: Macro fuse: {{.*}}ADD_UW - LBU +--- +name: adduw_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lhu +# CHECK: Macro fuse: {{.*}}ADD_UW - LHU +--- +name: adduw_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lwu +# CHECK: Macro fuse: {{.*}}ADD_UW - LWU +--- +name: adduw_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lb +# CHECK: Macro fuse: {{.*}}SH1ADD - LB +--- +name: sh1add_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lb +# CHECK: Macro fuse: {{.*}}SH2ADD - LB +--- +name: sh2add_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lb +# CHECK: Macro fuse: {{.*}}SH3ADD - LB +--- +name: sh3add_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lh +# CHECK: Macro fuse: {{.*}}SH1ADD - LH +--- +name: sh1add_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lh +# CHECK: Macro fuse: {{.*}}SH2ADD - LH +--- +name: sh2add_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lh +# CHECK: Macro fuse: {{.*}}SH3ADD - LH +--- +name: sh3add_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lw +# CHECK: Macro fuse: {{.*}}SH1ADD - LW +--- +name: sh1add_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lw +# CHECK: Macro fuse: {{.*}}SH2ADD - LW +--- +name: sh2add_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lw +# CHECK: Macro fuse: {{.*}}SH3ADD - LW +--- +name: sh3add_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_ld +# CHECK: Macro fuse: {{.*}}SH1ADD - LD +--- +name: sh1add_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_ld +# CHECK: Macro fuse: {{.*}}SH2ADD - LD +--- +name: sh2add_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_ld +# CHECK: Macro fuse: {{.*}}SH3ADD - LD +--- +name: sh3add_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lbu +# CHECK: Macro fuse: {{.*}}SH1ADD - LBU +--- +name: sh1add_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lbu +# CHECK: Macro fuse: {{.*}}SH2ADD - LBU +--- +name: sh2add_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lbu +# CHECK: Macro fuse: {{.*}}SH3ADD - LBU +--- +name: sh3add_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lhu +# CHECK: Macro fuse: {{.*}}SH1ADD - LHU +--- +name: sh1add_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lhu +# CHECK: Macro fuse: {{.*}}SH2ADD - LHU +--- +name: sh2add_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lhu +# CHECK: Macro fuse: {{.*}}SH3ADD - LHU +--- +name: sh3add_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lwu +# CHECK: Macro fuse: {{.*}}SH1ADD - LWU +--- +name: sh1add_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lwu +# CHECK: Macro fuse: {{.*}}SH2ADD - LWU +--- +name: sh2add_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lwu +# CHECK: Macro fuse: {{.*}}SH3ADD - LWU +--- +name: sh3add_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lb +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LB +--- +name: sh1adduw_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lb +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LB +--- +name: sh2adduw_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lb +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LB +--- +name: sh3adduw_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lh +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LH +--- +name: sh1adduw_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lh +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LH +--- +name: sh2adduw_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lh +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LH +--- +name: sh3adduw_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lw +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LW +--- +name: sh1adduw_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lw +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LW +--- +name: sh2adduw_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lw +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LW +--- +name: sh3adduw_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_ld +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LD +--- +name: sh1adduw_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_ld +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LD +--- +name: sh2adduw_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_ld +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LD +--- +name: sh3adduw_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lbu +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LBU +--- +name: sh1adduw_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lbu +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LBU +--- +name: sh2adduw_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lbu +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LBU +--- +name: sh3adduw_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lhu +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LHU +--- +name: sh1adduw_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lhu +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LHU +--- +name: sh2adduw_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lhu +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LHU +--- +name: sh3adduw_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lwu +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LWU +--- +name: sh1adduw_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lwu +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LWU +--- +name: sh2adduw_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lwu +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LWU +--- +name: sh3adduw_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll index 160f0ae..abdc1ba 100644 --- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll +++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll @@ -1,17 +1,42 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \ +; +; Disable all misched clustering +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s -; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \ +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; +; ST misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; +; LD misched clustering only ; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=LDCLUSTER %s ; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=LDCLUSTER %s - +; +; Default misched cluster settings (i.e. both LD and ST clustering) +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s define i32 @load_clustering_1(ptr nocapture %p) { ; NOCLUSTER: ********** MI Scheduling ********** @@ -22,6 +47,14 @@ define i32 @load_clustering_1(ptr nocapture %p) { ; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 ; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 ; +; STCLUSTER: ********** MI Scheduling ********** +; STCLUSTER-LABEL: load_clustering_1:%bb.0 +; STCLUSTER: *** Final schedule for %bb.0 *** +; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 +; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 +; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 +; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 +; ; LDCLUSTER: ********** MI Scheduling ********** ; LDCLUSTER-LABEL: load_clustering_1:%bb.0 ; LDCLUSTER: *** Final schedule for %bb.0 *** @@ -29,6 +62,14 @@ define i32 @load_clustering_1(ptr nocapture %p) { ; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 ; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 ; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 +; +; DEFAULTCLUSTER: ********** MI Scheduling ********** +; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0 +; DEFAULTCLUSTER: *** Final schedule for %bb.0 *** +; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 +; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 +; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 +; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 entry: %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3 %val0 = load i32, ptr %arrayidx0 diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir index 21398d3..01960f9 100644 --- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir +++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir @@ -1,10 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \ -# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \ +# RUN: -mattr=+disable-postmisched-load-clustering \ +# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \ # RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \ # RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \ -# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \ +# RUN: -mattr=+disable-postmisched-load-clustering \ +# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \ # RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \ # RUN: | FileCheck -check-prefix=NOCLUSTER %s # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \ diff --git a/llvm/test/CodeGen/RISCV/misched-store-clustering.ll b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll new file mode 100644 index 0000000..02e853d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll @@ -0,0 +1,83 @@ +; REQUIRES: asserts +; +; Disable all misched clustering +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; +; ST misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; +; LD misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=LDCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=LDCLUSTER %s +; +; Default misched cluster settings (i.e. both LD and ST clustering) +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s + +define i32 @store_clustering_1(ptr nocapture %p, i32 %v) { +; NOCLUSTER: ********** MI Scheduling ********** +; NOCLUSTER-LABEL: store_clustering_1:%bb.0 +; NOCLUSTER: *** Final schedule for %bb.0 *** +; NOCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; NOCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; NOCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; NOCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; STCLUSTER: ********** MI Scheduling ********** +; STCLUSTER-LABEL: store_clustering_1:%bb.0 +; STCLUSTER: *** Final schedule for %bb.0 *** +; STCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; STCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; STCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; STCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; LDCLUSTER: ********** MI Scheduling ********** +; LDCLUSTER-LABEL: store_clustering_1:%bb.0 +; LDCLUSTER: *** Final schedule for %bb.0 *** +; LDCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; LDCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; LDCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; LDCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; DEFAULTCLUSTER: ********** MI Scheduling ********** +; DEFAULTCLUSTER-LABEL: store_clustering_1:%bb.0 +; DEFAULTCLUSTER: *** Final schedule for %bb.0 *** +; DEFAULTCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; DEFAULTCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; DEFAULTCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; DEFAULTCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +entry: + %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3 + store i32 %v, ptr %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2 + store i32 %v, ptr %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1 + store i32 %v, ptr %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4 + store i32 %v, ptr %arrayidx3 + ret i32 %v +} diff --git a/llvm/test/CodeGen/RISCV/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbkb.ll index 7ebbd78..42d326e 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbkb.ll @@ -350,10 +350,43 @@ define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, ret i32 %j } +define i32 @pack_lo_packh_hi_packh_2(i8 %0, i8 %1, i8 %2, i8 %3) nounwind { +; RV32I-LABEL: pack_lo_packh_hi_packh_2: +; RV32I: # %bb.0: +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: zext.b a1, a1 +; RV32I-NEXT: zext.b a2, a2 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: slli a2, a2, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32ZBKB-LABEL: pack_lo_packh_hi_packh_2: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: packh a0, a0, a1 +; RV32ZBKB-NEXT: packh a1, a2, a3 +; RV32ZBKB-NEXT: pack a0, a0, a1 +; RV32ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %h, %f + %j = or i32 %i, %g + ret i32 %j +} + define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) nounwind { ; RV32I-LABEL: pack_lo_zext_hi_packh: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a2, 16 +; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: slli a2, a2, 24 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: or a0, a1, a0 @@ -361,14 +394,14 @@ define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) ; ; RV32ZBKB-LABEL: pack_lo_zext_hi_packh: ; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: packh a1, a2, a2 +; RV32ZBKB-NEXT: packh a1, a1, a2 ; RV32ZBKB-NEXT: pack a0, a0, a1 ; RV32ZBKB-NEXT: ret %a = zext i16 %0 to i32 %b = zext i8 %1 to i32 %c = zext i8 %2 to i32 %d = shl i32 %c, 8 - %e = or i32 %c, %d + %e = or i32 %b, %d %f = shl i32 %e, 16 %g = or i32 %f, %a ret i32 %g @@ -379,7 +412,7 @@ define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwind { ; RV32I-LABEL: pack_lo_noext_hi_packh: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a2, 16 +; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: slli a2, a2, 24 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: or a0, a1, a0 @@ -387,14 +420,40 @@ define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwin ; ; RV32ZBKB-LABEL: pack_lo_noext_hi_packh: ; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: packh a1, a2, a2 +; RV32ZBKB-NEXT: packh a1, a1, a2 +; RV32ZBKB-NEXT: slli a1, a1, 16 +; RV32ZBKB-NEXT: or a0, a1, a0 +; RV32ZBKB-NEXT: ret + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + ret i32 %g +} + +; Make sure we can match packh+slli without having the input bytes zero extended. +define i32 @pack_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2) nounwind { +; RV32I-LABEL: pack_lo_noext_hi_packh_nozeroext: +; RV32I: # %bb.0: +; RV32I-NEXT: zext.b a1, a1 +; RV32I-NEXT: slli a2, a2, 24 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBKB-LABEL: pack_lo_noext_hi_packh_nozeroext: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: packh a1, a1, a2 ; RV32ZBKB-NEXT: slli a1, a1, 16 ; RV32ZBKB-NEXT: or a0, a1, a0 ; RV32ZBKB-NEXT: ret %b = zext i8 %1 to i32 %c = zext i8 %2 to i32 %d = shl i32 %c, 8 - %e = or i32 %c, %d + %e = or i32 %b, %d %f = shl i32 %e, 16 %g = or i32 %f, %a ret i32 %g diff --git a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll index 57061e1..f89d1abf 100644 --- a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll @@ -253,8 +253,8 @@ define i128 @fptosi_sat_f16_to_i128(half %a) nounwind { ; RV64IZHINX-NEXT: srli a1, a2, 1 ; RV64IZHINX-NEXT: .LBB4_4: ; RV64IZHINX-NEXT: feq.s a2, s0, s0 -; RV64IZHINX-NEXT: neg a3, a3 ; RV64IZHINX-NEXT: neg a4, s1 +; RV64IZHINX-NEXT: neg a3, a3 ; RV64IZHINX-NEXT: neg a2, a2 ; RV64IZHINX-NEXT: and a0, a4, a0 ; RV64IZHINX-NEXT: and a1, a2, a1 @@ -334,18 +334,19 @@ define i128 @fptoui_sat_f16_to_i128(half %a) nounwind { ; RV64IZHINX-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64IZHINX-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64IZHINX-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64IZHINX-NEXT: fcvt.s.h a0, a0 -; RV64IZHINX-NEXT: lui a1, 522240 -; RV64IZHINX-NEXT: addi a1, a1, -1 -; RV64IZHINX-NEXT: fle.s a2, zero, a0 -; RV64IZHINX-NEXT: flt.s a1, a1, a0 -; RV64IZHINX-NEXT: neg s0, a1 -; RV64IZHINX-NEXT: neg s1, a2 +; RV64IZHINX-NEXT: fcvt.s.h s0, a0 +; RV64IZHINX-NEXT: fle.s a0, zero, s0 +; RV64IZHINX-NEXT: neg s1, a0 +; RV64IZHINX-NEXT: mv a0, s0 ; RV64IZHINX-NEXT: call __fixunssfti ; RV64IZHINX-NEXT: and a0, s1, a0 +; RV64IZHINX-NEXT: lui a2, 522240 ; RV64IZHINX-NEXT: and a1, s1, a1 -; RV64IZHINX-NEXT: or a0, s0, a0 -; RV64IZHINX-NEXT: or a1, s0, a1 +; RV64IZHINX-NEXT: addi a2, a2, -1 +; RV64IZHINX-NEXT: flt.s a2, a2, s0 +; RV64IZHINX-NEXT: neg a2, a2 +; RV64IZHINX-NEXT: or a0, a2, a0 +; RV64IZHINX-NEXT: or a1, a2, a1 ; RV64IZHINX-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64IZHINX-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64IZHINX-NEXT: ld s1, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll index 818ea72..4537d18 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll @@ -392,3 +392,247 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind { %1 = zext i16 %a to i64 ret i64 %1 } + +define void @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_packh_hi_packh: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: sw a0, 0(a4) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_packh_hi_packh: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a0, a0, a1 +; RV64ZBKB-NEXT: packh a1, a2, a3 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: sw a0, 0(a4) +; RV64ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %h, %f + %j = or i32 %i, %g + store i32 %j, ptr %p + ret void +} + +define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_packh_hi_packh_2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: sw a0, 0(a4) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a0, a0, a1 +; RV64ZBKB-NEXT: packh a1, a3, a2 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: sw a0, 0(a4) +; RV64ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %g, %h + %j = or i32 %f, %i + store i32 %j, ptr %p + ret void +} + +define void @pack_lo_packh_hi_packh_3(i8 %0, i8 %1, i8 %2, i8 %3, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_packh_hi_packh_3: +; RV64I: # %bb.0: +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: zext.b a2, a2 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: sw a0, 0(a4) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_3: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a0, a0, a1 +; RV64ZBKB-NEXT: packh a1, a3, a2 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: sw a0, 0(a4) +; RV64ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %g, %h + %j = or i32 %f, %i + store i32 %j, ptr %p + ret void +} + +define i32 @pack_lo_packh_hi_packh_4(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_packh_hi_packh_4: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: slliw a3, a3, 24 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_4: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a0, a0, a1 +; RV64ZBKB-NEXT: packh a1, a3, a2 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %h, %f + %j = or i32 %i, %g + ret i32 %j +} + +define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_zext_hi_packh: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sw a0, 0(a3) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_zext_hi_packh: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a1, a1, a2 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: sw a0, 0(a3) +; RV64ZBKB-NEXT: ret + %a = zext i16 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + store i32 %g, ptr %p + ret void +} + +; Negative test, %a isn't extended so we can't use packw for the outer or, but +; we can use packh for the high half. +define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_noext_hi_packh: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sw a0, 0(a3) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_noext_hi_packh: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a1, a1, a2 +; RV64ZBKB-NEXT: slli a1, a1, 16 +; RV64ZBKB-NEXT: or a0, a1, a0 +; RV64ZBKB-NEXT: sw a0, 0(a3) +; RV64ZBKB-NEXT: ret + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + store i32 %g, ptr %p + ret void +} + +; Make sure we can match packh+slli without having the input bytes zero extended. +define void @pack_i32_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2, ptr %p) nounwind { +; RV64I-LABEL: pack_i32_lo_noext_hi_packh_nozeroext: +; RV64I: # %bb.0: +; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sw a0, 0(a3) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_i32_lo_noext_hi_packh_nozeroext: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a1, a1, a2 +; RV64ZBKB-NEXT: slli a1, a1, 16 +; RV64ZBKB-NEXT: or a0, a1, a0 +; RV64ZBKB-NEXT: sw a0, 0(a3) +; RV64ZBKB-NEXT: ret + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + store i32 %g, ptr %p + ret void +} + +; Make sure we can match packh+slli without having the input bytes zero extended. +define i64 @pack_i64_lo_noext_hi_packh_nozeroext(i64 %a, i8 %1, i8 %2, ptr %p) nounwind { +; RV64I-LABEL: pack_i64_lo_noext_hi_packh_nozeroext: +; RV64I: # %bb.0: +; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: zext.b a2, a2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_i64_lo_noext_hi_packh_nozeroext: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a1, a1, a2 +; RV64ZBKB-NEXT: slli a1, a1, 16 +; RV64ZBKB-NEXT: or a0, a1, a0 +; RV64ZBKB-NEXT: ret + %b = zext i8 %1 to i64 + %c = zext i8 %2 to i64 + %d = shl i64 %c, 8 + %e = or i64 %b, %d + %f = shl i64 %e, 16 + %g = or i64 %f, %a + ret i64 %g +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll new file mode 100644 index 0000000..abf2894 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-store.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +define void @store_factor2(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, i64 %stride) { +; CHECK-LABEL: store_factor2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vssseg2e8.v v8, (a0), a1 +; CHECK-NEXT: ret + call void @llvm.riscv.sseg2.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret void +} + +define void @store_factor3(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, i64 %stride) { +; CHECK-LABEL: store_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vssseg3e8.v v8, (a0), a1 +; CHECK-NEXT: ret + call void @llvm.riscv.sseg3.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret void +} + +define void @store_factor4(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, i64 %stride) { +; CHECK-LABEL: store_factor4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vssseg4e8.v v8, (a0), a1 +; CHECK-NEXT: ret + call void @llvm.riscv.sseg4.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret void +} + +define void @store_factor5(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, i64 %stride) { +; CHECK-LABEL: store_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vssseg5e8.v v8, (a0), a1 +; CHECK-NEXT: ret + call void @llvm.riscv.sseg5.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret void +} + +define void @store_factor6(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, i64 %stride) { +; CHECK-LABEL: store_factor6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vssseg6e8.v v8, (a0), a1 +; CHECK-NEXT: ret + call void @llvm.riscv.sseg6.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret void +} + +define void @store_factor7(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, i64 %stride) { +; CHECK-LABEL: store_factor7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vssseg7e8.v v8, (a0), a1 +; CHECK-NEXT: ret + call void @llvm.riscv.sseg7.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret void +} + +define void @store_factor8(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, i64 %stride) { +; CHECK-LABEL: store_factor8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vssseg8e8.v v8, (a0), a1 +; CHECK-NEXT: ret + call void @llvm.riscv.sseg8.store.mask.v8i8.i64.i64(<8 x i8> %v0, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3, <8 x i8> %v4, <8 x i8> %v5, <8 x i8> %v6, <8 x i8> %v7, ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index 7990dfc..4c84304 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -366,8 +366,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64X60-NEXT: # => This Inner Loop Header: Depth=2 ; RV64X60-NEXT: vl2r.v v8, (s2) ; RV64X60-NEXT: vl2r.v v10, (s3) -; RV64X60-NEXT: sub s1, s1, t3 ; RV64X60-NEXT: vaaddu.vv v8, v8, v10 +; RV64X60-NEXT: sub s1, s1, t3 ; RV64X60-NEXT: vs2r.v v8, (s4) ; RV64X60-NEXT: add s4, s4, t3 ; RV64X60-NEXT: add s3, s3, t3 diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index c9c49e8..cb046cd 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -204,18 +204,16 @@ define i64 @load_i64(ptr %p) { ; RV64IZBKB-NEXT: lbu a2, 5(a0) ; RV64IZBKB-NEXT: lbu a3, 6(a0) ; RV64IZBKB-NEXT: lbu a4, 7(a0) -; RV64IZBKB-NEXT: lbu a5, 0(a0) -; RV64IZBKB-NEXT: lbu a6, 1(a0) -; RV64IZBKB-NEXT: lbu a7, 2(a0) -; RV64IZBKB-NEXT: lbu a0, 3(a0) +; RV64IZBKB-NEXT: lbu a5, 1(a0) +; RV64IZBKB-NEXT: lbu a6, 2(a0) +; RV64IZBKB-NEXT: lbu a7, 3(a0) +; RV64IZBKB-NEXT: lbu a0, 0(a0) +; RV64IZBKB-NEXT: packh a3, a3, a4 ; RV64IZBKB-NEXT: packh a1, a1, a2 -; RV64IZBKB-NEXT: packh a2, a3, a4 -; RV64IZBKB-NEXT: packh a3, a5, a6 -; RV64IZBKB-NEXT: packh a0, a7, a0 -; RV64IZBKB-NEXT: slli a2, a2, 16 -; RV64IZBKB-NEXT: slli a0, a0, 16 -; RV64IZBKB-NEXT: or a1, a2, a1 -; RV64IZBKB-NEXT: or a0, a0, a3 +; RV64IZBKB-NEXT: packh a2, a6, a7 +; RV64IZBKB-NEXT: packh a0, a0, a5 +; RV64IZBKB-NEXT: packw a1, a1, a3 +; RV64IZBKB-NEXT: packw a0, a0, a2 ; RV64IZBKB-NEXT: pack a0, a0, a1 ; RV64IZBKB-NEXT: ret ; |