aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlewis-revill <lewis.revill@embecosm.com>2020-07-15 11:50:03 +0100
committerHans Wennborg <hans@chromium.org>2020-07-27 13:07:37 +0200
commitf749d92f7a32f71598e8c1e1f37d7eb261a40ec5 (patch)
tree37dbaa4097ecb1810aa2ef52b3147ae1f5a35709
parent3c1fca803bc14617b67ba2125e1b4b77190e9f86 (diff)
downloadllvm-f749d92f7a32f71598e8c1e1f37d7eb261a40ec5.zip
llvm-f749d92f7a32f71598e8c1e1f37d7eb261a40ec5.tar.gz
llvm-f749d92f7a32f71598e8c1e1f37d7eb261a40ec5.tar.bz2
[RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbb asm instructions
This patch provides optimization of bit manipulation operations by enabling the +experimental-b target feature. It adds matching of single block patterns of instructions to specific bit-manip instructions from the base subset (zbb subextension) of the experimental B extension of RISC-V. It adds also the correspondent codegen tests. This patch is based on Claire Wolf's proposal for the bit manipulation extension of RISCV: https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf Differential Revision: https://reviews.llvm.org/D79870 (cherry picked from commit e2692f0ee7f338fea4fc918669643315cefc7678)
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp190
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoB.td76
-rw-r--r--llvm/test/CodeGen/RISCV/rv32Zbb.ll1218
-rw-r--r--llvm/test/CodeGen/RISCV/rv64Zbb.ll1149
6 files changed, 2645 insertions, 3 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index a0ae0508..99e5135 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -184,6 +184,196 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
return false;
}
+// Check that it is a SLOI (Shift Left Ones Immediate). We first check that
+// it is the right node tree:
+//
+// (OR (SHL RS1, VC2), VC1)
+//
+// and then we check that VC1, the mask used to fill with ones, is compatible
+// with VC2, the shamt:
+//
+// VC1 == maskTrailingOnes<uint64_t>(VC2)
+
+bool RISCVDAGToDAGISel::SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt) {
+ MVT XLenVT = Subtarget->getXLenVT();
+ if (N.getOpcode() == ISD::OR) {
+ SDValue Or = N;
+ if (Or.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue Shl = Or.getOperand(0);
+ if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
+ isa<ConstantSDNode>(Or.getOperand(1))) {
+ if (XLenVT == MVT::i64) {
+ uint64_t VC1 = Or.getConstantOperandVal(1);
+ uint64_t VC2 = Shl.getConstantOperandVal(1);
+ if (VC1 == maskTrailingOnes<uint64_t>(VC2)) {
+ RS1 = Shl.getOperand(0);
+ Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+ Shl.getOperand(1).getValueType());
+ return true;
+ }
+ }
+ if (XLenVT == MVT::i32) {
+ uint32_t VC1 = Or.getConstantOperandVal(1);
+ uint32_t VC2 = Shl.getConstantOperandVal(1);
+ if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
+ RS1 = Shl.getOperand(0);
+ Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+ Shl.getOperand(1).getValueType());
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+// Check that it is a SROI (Shift Right Ones Immediate). We first check that
+// it is the right node tree:
+//
+// (OR (SRL RS1, VC2), VC1)
+//
+// and then we check that VC1, the mask used to fill with ones, is compatible
+// with VC2, the shamt:
+//
+// VC1 == maskLeadingOnes<uint64_t>(VC2)
+
+bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
+ MVT XLenVT = Subtarget->getXLenVT();
+ if (N.getOpcode() == ISD::OR) {
+ SDValue Or = N;
+ if (Or.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue Srl = Or.getOperand(0);
+ if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+ isa<ConstantSDNode>(Or.getOperand(1))) {
+ if (XLenVT == MVT::i64) {
+ uint64_t VC1 = Or.getConstantOperandVal(1);
+ uint64_t VC2 = Srl.getConstantOperandVal(1);
+ if (VC1 == maskLeadingOnes<uint64_t>(VC2)) {
+ RS1 = Srl.getOperand(0);
+ Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+ Srl.getOperand(1).getValueType());
+ return true;
+ }
+ }
+ if (XLenVT == MVT::i32) {
+ uint32_t VC1 = Or.getConstantOperandVal(1);
+ uint32_t VC2 = Srl.getConstantOperandVal(1);
+ if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
+ RS1 = Srl.getOperand(0);
+ Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+ Srl.getOperand(1).getValueType());
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
+// on RV64).
+// SLLIUW is the same as SLLI except for the fact that it clears the bits
+// XLEN-1:32 of the input RS1 before shifting.
+// We first check that it is the right node tree:
+//
+// (AND (SHL RS1, VC2), VC1)
+//
+// We check that VC2, the shamt is less than 32, otherwise the pattern is
+// exactly the same as SLLI and we give priority to that.
+// Eventually we check that that VC1, the mask used to clear the upper 32 bits
+// of RS1, is correct:
+//
+// VC1 == (0xFFFFFFFF << VC2)
+
+bool RISCVDAGToDAGISel::SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+ if (N.getOpcode() == ISD::AND && Subtarget->getXLenVT() == MVT::i64) {
+ SDValue And = N;
+ if (And.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue Shl = And.getOperand(0);
+ if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
+ isa<ConstantSDNode>(And.getOperand(1))) {
+ uint64_t VC1 = And.getConstantOperandVal(1);
+ uint64_t VC2 = Shl.getConstantOperandVal(1);
+ if (VC2 < 32 && VC1 == ((uint64_t)0xFFFFFFFF << VC2)) {
+ RS1 = Shl.getOperand(0);
+ Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+ Shl.getOperand(1).getValueType());
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+// Check that it is a SLOIW (Shift Left Ones Immediate i32 on RV64).
+// We first check that it is the right node tree:
+//
+// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), VC1))
+//
+// and then we check that VC1, the mask used to fill with ones, is compatible
+// with VC2, the shamt:
+//
+// VC1 == maskTrailingOnes<uint32_t>(VC2)
+
+bool RISCVDAGToDAGISel::SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+ if (Subtarget->getXLenVT() == MVT::i64 &&
+ N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+ if (N.getOperand(0).getOpcode() == ISD::OR) {
+ SDValue Or = N.getOperand(0);
+ if (Or.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue Shl = Or.getOperand(0);
+ if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
+ isa<ConstantSDNode>(Or.getOperand(1))) {
+ uint32_t VC1 = Or.getConstantOperandVal(1);
+ uint32_t VC2 = Shl.getConstantOperandVal(1);
+ if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
+ RS1 = Shl.getOperand(0);
+ Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+ Shl.getOperand(1).getValueType());
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64).
+// We first check that it is the right node tree:
+//
+// (OR (SHL RS1, VC2), VC1)
+//
+// and then we check that VC1, the mask used to fill with ones, is compatible
+// with VC2, the shamt:
+//
+// VC1 == maskLeadingOnes<uint32_t>(VC2)
+
+bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+ if (N.getOpcode() == ISD::OR && Subtarget->getXLenVT() == MVT::i64) {
+ SDValue Or = N;
+ if (Or.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue Srl = Or.getOperand(0);
+ if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+ isa<ConstantSDNode>(Or.getOperand(1))) {
+ uint32_t VC1 = Or.getConstantOperandVal(1);
+ uint32_t VC2 = Srl.getConstantOperandVal(1);
+ if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
+ RS1 = Srl.getOperand(0);
+ Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+ Srl.getOperand(1).getValueType());
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index dcf733e..4e382ee 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -45,6 +45,12 @@ public:
bool SelectAddrFI(SDValue Addr, SDValue &Base);
+ bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
+ bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
+ bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
+ bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+ bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 91fc69b..fb44f82 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -152,9 +152,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL, XLenVT, Expand);
setOperationAction(ISD::ROTR, XLenVT, Expand);
setOperationAction(ISD::BSWAP, XLenVT, Expand);
- setOperationAction(ISD::CTTZ, XLenVT, Expand);
- setOperationAction(ISD::CTLZ, XLenVT, Expand);
- setOperationAction(ISD::CTPOP, XLenVT, Expand);
+
+ if (!Subtarget.hasStdExtZbb()) {
+ setOperationAction(ISD::CTTZ, XLenVT, Expand);
+ setOperationAction(ISD::CTLZ, XLenVT, Expand);
+ setOperationAction(ISD::CTPOP, XLenVT, Expand);
+ }
ISD::CondCode FPCCToExtend[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 34a4636..dc3d6cb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -632,3 +632,79 @@ let Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] i
def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
(C_ZEXTW GPRC:$rs1)>;
} // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64]
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+def SLOIPat : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
+def SROIPat : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
+def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
+def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
+def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
+
+let Predicates = [HasStdExtZbb] in {
+def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1),
+ (SLO GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1),
+ (SRO GPR:$rs1, GPR:$rs2)>;
+def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
+ (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
+ (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
+def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
+def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbb, IsRV32] in
+def : Pat<(sra (shl GPR:$rs1, (i32 24)), (i32 24)), (SEXTB GPR:$rs1)>;
+let Predicates = [HasStdExtZbb, IsRV64] in
+def : Pat<(sra (shl GPR:$rs1, (i64 56)), (i64 56)), (SEXTB GPR:$rs1)>;
+
+let Predicates = [HasStdExtZbb, IsRV32] in
+def : Pat<(sra (shl GPR:$rs1, (i32 16)), (i32 16)), (SEXTH GPR:$rs1)>;
+let Predicates = [HasStdExtZbb, IsRV64] in
+def : Pat<(sra (shl GPR:$rs1, (i64 48)), (i64 48)), (SEXTH GPR:$rs1)>;
+
+let Predicates = [HasStdExtZbb] in {
+def : Pat<(smin GPR:$rs1, GPR:$rs2), (MIN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 20), GPR:$rs1, GPR:$rs2),
+ (MIN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(smax GPR:$rs1, GPR:$rs2), (MAX GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 20), GPR:$rs1, GPR:$rs2),
+ (MAX GPR:$rs1, GPR:$rs2)>;
+def : Pat<(umin GPR:$rs1, GPR:$rs2), (MINU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 12), GPR:$rs1, GPR:$rs2),
+ (MINU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(umax GPR:$rs1, GPR:$rs2), (MAXU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
+ (MAXU GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)),
+ (ADDIWU GPR:$rs, simm12:$simm12)>;
+def : Pat<(SLLIUWPat GPR:$rs1, uimmlog2xlen:$shamt),
+ (SLLIUW GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(and (add GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
+ (ADDWU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(and (sub GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
+ (SUBWU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
+ (ADDUW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sub GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
+ (SUBUW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1),
+ (SLOW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1),
+ (SROW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
+ (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
+ (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
+ (CLZW GPR:$rs1)>;
+// We don't pattern-match CTZW here as it has the same pattern and result as
+// RV64 CTZ
+def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbb.ll b/llvm/test/CodeGen/RISCV/rv32Zbb.ll
new file mode 100644
index 0000000..6933bad
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbb.ll
@@ -0,0 +1,1218 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IBB
+
+define i32 @slo_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: slo_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: sll a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: slo_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: slo a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: slo_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: slo a0, a0, a1
+; RV32IBB-NEXT: ret
+ %neg = xor i32 %a, -1
+ %shl = shl i32 %neg, %b
+ %neg1 = xor i32 %shl, -1
+ ret i32 %neg1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @slo_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: slo_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a3, a2, -32
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: bltz a3, .LBB1_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a2, zero
+; RV32I-NEXT: sll a1, a0, a3
+; RV32I-NEXT: j .LBB1_3
+; RV32I-NEXT: .LBB1_2:
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: addi a3, zero, 31
+; RV32I-NEXT: sub a3, a3, a2
+; RV32I-NEXT: srli a4, a0, 1
+; RV32I-NEXT: srl a3, a4, a3
+; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: sll a2, a0, a2
+; RV32I-NEXT: .LBB1_3:
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: not a0, a2
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: slo_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: addi a3, a2, -32
+; RV32IB-NEXT: not a0, a0
+; RV32IB-NEXT: bltz a3, .LBB1_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: mv a2, zero
+; RV32IB-NEXT: sll a1, a0, a3
+; RV32IB-NEXT: j .LBB1_3
+; RV32IB-NEXT: .LBB1_2:
+; RV32IB-NEXT: not a1, a1
+; RV32IB-NEXT: sll a1, a1, a2
+; RV32IB-NEXT: addi a3, zero, 31
+; RV32IB-NEXT: sub a3, a3, a2
+; RV32IB-NEXT: srli a4, a0, 1
+; RV32IB-NEXT: srl a3, a4, a3
+; RV32IB-NEXT: or a1, a1, a3
+; RV32IB-NEXT: sll a2, a0, a2
+; RV32IB-NEXT: .LBB1_3:
+; RV32IB-NEXT: not a1, a1
+; RV32IB-NEXT: not a0, a2
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: slo_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: addi a3, a2, -32
+; RV32IBB-NEXT: not a0, a0
+; RV32IBB-NEXT: bltz a3, .LBB1_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: mv a2, zero
+; RV32IBB-NEXT: sll a1, a0, a3
+; RV32IBB-NEXT: j .LBB1_3
+; RV32IBB-NEXT: .LBB1_2:
+; RV32IBB-NEXT: not a1, a1
+; RV32IBB-NEXT: sll a1, a1, a2
+; RV32IBB-NEXT: addi a3, zero, 31
+; RV32IBB-NEXT: sub a3, a3, a2
+; RV32IBB-NEXT: srli a4, a0, 1
+; RV32IBB-NEXT: srl a3, a4, a3
+; RV32IBB-NEXT: or a1, a1, a3
+; RV32IBB-NEXT: sll a2, a0, a2
+; RV32IBB-NEXT: .LBB1_3:
+; RV32IBB-NEXT: not a1, a1
+; RV32IBB-NEXT: not a0, a2
+; RV32IBB-NEXT: ret
+ %neg = xor i64 %a, -1
+ %shl = shl i64 %neg, %b
+ %neg1 = xor i64 %shl, -1
+ ret i64 %neg1
+}
+
+define i32 @sro_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sro_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srl a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sro_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: sro a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sro_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: sro a0, a0, a1
+; RV32IBB-NEXT: ret
+ %neg = xor i32 %a, -1
+ %shr = lshr i32 %neg, %b
+ %neg1 = xor i32 %shr, -1
+ ret i32 %neg1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @sro_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: sro_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a3, a2, -32
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: bltz a3, .LBB3_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a2, zero
+; RV32I-NEXT: srl a0, a1, a3
+; RV32I-NEXT: j .LBB3_3
+; RV32I-NEXT: .LBB3_2:
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: addi a3, zero, 31
+; RV32I-NEXT: sub a3, a3, a2
+; RV32I-NEXT: slli a4, a1, 1
+; RV32I-NEXT: sll a3, a4, a3
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: srl a2, a1, a2
+; RV32I-NEXT: .LBB3_3:
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: not a1, a2
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sro_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: addi a3, a2, -32
+; RV32IB-NEXT: not a1, a1
+; RV32IB-NEXT: bltz a3, .LBB3_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: mv a2, zero
+; RV32IB-NEXT: srl a0, a1, a3
+; RV32IB-NEXT: j .LBB3_3
+; RV32IB-NEXT: .LBB3_2:
+; RV32IB-NEXT: not a0, a0
+; RV32IB-NEXT: srl a0, a0, a2
+; RV32IB-NEXT: addi a3, zero, 31
+; RV32IB-NEXT: sub a3, a3, a2
+; RV32IB-NEXT: slli a4, a1, 1
+; RV32IB-NEXT: sll a3, a4, a3
+; RV32IB-NEXT: or a0, a0, a3
+; RV32IB-NEXT: srl a2, a1, a2
+; RV32IB-NEXT: .LBB3_3:
+; RV32IB-NEXT: not a0, a0
+; RV32IB-NEXT: not a1, a2
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sro_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: addi a3, a2, -32
+; RV32IBB-NEXT: not a1, a1
+; RV32IBB-NEXT: bltz a3, .LBB3_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: mv a2, zero
+; RV32IBB-NEXT: srl a0, a1, a3
+; RV32IBB-NEXT: j .LBB3_3
+; RV32IBB-NEXT: .LBB3_2:
+; RV32IBB-NEXT: not a0, a0
+; RV32IBB-NEXT: srl a0, a0, a2
+; RV32IBB-NEXT: addi a3, zero, 31
+; RV32IBB-NEXT: sub a3, a3, a2
+; RV32IBB-NEXT: slli a4, a1, 1
+; RV32IBB-NEXT: sll a3, a4, a3
+; RV32IBB-NEXT: or a0, a0, a3
+; RV32IBB-NEXT: srl a2, a1, a2
+; RV32IBB-NEXT: .LBB3_3:
+; RV32IBB-NEXT: not a0, a0
+; RV32IBB-NEXT: not a1, a2
+; RV32IBB-NEXT: ret
+ %neg = xor i64 %a, -1
+ %shr = lshr i64 %neg, %b
+ %neg1 = xor i64 %shr, -1
+ ret i64 %neg1
+}
+
+define i32 @sloi_i32(i32 %a) nounwind {
+; RV32I-LABEL: sloi_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ori a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sloi_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: sloi a0, a0, 1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sloi_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: sloi a0, a0, 1
+; RV32IBB-NEXT: ret
+ %neg = shl i32 %a, 1
+ %neg12 = or i32 %neg, 1
+ ret i32 %neg12
+}
+
+define i64 @sloi_i64(i64 %a) nounwind {
+; RV32I-LABEL: sloi_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a2, a0, 31
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: slli a0, a0, 1
+; RV32I-NEXT: ori a0, a0, 1
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sloi_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: addi a2, zero, 1
+; RV32IB-NEXT: fsl a1, a1, a2, a0
+; RV32IB-NEXT: sloi a0, a0, 1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sloi_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: srli a2, a0, 31
+; RV32IBB-NEXT: slli a1, a1, 1
+; RV32IBB-NEXT: or a1, a1, a2
+; RV32IBB-NEXT: sloi a0, a0, 1
+; RV32IBB-NEXT: ret
+ %neg = shl i64 %a, 1
+ %neg12 = or i64 %neg, 1
+ ret i64 %neg12
+}
+
+define i32 @sroi_i32(i32 %a) nounwind {
+; RV32I-LABEL: sroi_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sroi_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: sroi a0, a0, 1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sroi_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: sroi a0, a0, 1
+; RV32IBB-NEXT: ret
+ %neg = lshr i32 %a, 1
+ %neg12 = or i32 %neg, -2147483648
+ ret i32 %neg12
+}
+
+define i64 @sroi_i64(i64 %a) nounwind {
+; RV32I-LABEL: sroi_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a2, a1, 31
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a1, a1, 1
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: or a1, a1, a2
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sroi_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: addi a2, zero, 31
+; RV32IB-NEXT: fsl a0, a1, a2, a0
+; RV32IB-NEXT: sroi a1, a1, 1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sroi_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: slli a2, a1, 31
+; RV32IBB-NEXT: srli a0, a0, 1
+; RV32IBB-NEXT: or a0, a0, a2
+; RV32IBB-NEXT: sroi a1, a1, 1
+; RV32IBB-NEXT: ret
+ %neg = lshr i64 %a, 1
+ %neg12 = or i64 %neg, -9223372036854775808
+ ret i64 %neg12
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define i32 @ctlz_i32(i32 %a) nounwind {
+; RV32I-LABEL: ctlz_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp)
+; RV32I-NEXT: beqz a0, .LBB8_2
+; RV32I-NEXT: # %bb.1: # %cond.false
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a2, a2, 1365
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi a1, a1, 819
+; RV32I-NEXT: and a2, a0, a1
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi a1, a1, -241
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi a1, a1, 257
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: j .LBB8_3
+; RV32I-NEXT: .LBB8_2:
+; RV32I-NEXT: addi a0, zero, 32
+; RV32I-NEXT: .LBB8_3: # %cond.end
+; RV32I-NEXT: lw ra, 12(sp)
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: ctlz_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: beqz a0, .LBB8_2
+; RV32IB-NEXT: # %bb.1: # %cond.false
+; RV32IB-NEXT: clz a0, a0
+; RV32IB-NEXT: ret
+; RV32IB-NEXT: .LBB8_2:
+; RV32IB-NEXT: addi a0, zero, 32
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: ctlz_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: beqz a0, .LBB8_2
+; RV32IBB-NEXT: # %bb.1: # %cond.false
+; RV32IBB-NEXT: clz a0, a0
+; RV32IBB-NEXT: ret
+; RV32IBB-NEXT: .LBB8_2:
+; RV32IBB-NEXT: addi a0, zero, 32
+; RV32IBB-NEXT: ret
+ %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ ret i32 %1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV32I-LABEL: ctlz_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp)
+; RV32I-NEXT: sw s0, 24(sp)
+; RV32I-NEXT: sw s1, 20(sp)
+; RV32I-NEXT: sw s2, 16(sp)
+; RV32I-NEXT: sw s3, 12(sp)
+; RV32I-NEXT: sw s4, 8(sp)
+; RV32I-NEXT: sw s5, 4(sp)
+; RV32I-NEXT: sw s6, 0(sp)
+; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi s5, a2, 1365
+; RV32I-NEXT: and a1, a1, s5
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi s1, a1, 819
+; RV32I-NEXT: and a1, a0, s1
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi s6, a1, -241
+; RV32I-NEXT: and a0, a0, s6
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi s0, a1, 257
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: srli a0, s4, 1
+; RV32I-NEXT: or a0, s4, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a1, a1, s5
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, s1
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, s6
+; RV32I-NEXT: mv a1, s0
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: bnez s3, .LBB9_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: j .LBB9_3
+; RV32I-NEXT: .LBB9_2:
+; RV32I-NEXT: srli a0, s2, 24
+; RV32I-NEXT: .LBB9_3:
+; RV32I-NEXT: mv a1, zero
+; RV32I-NEXT: lw s6, 0(sp)
+; RV32I-NEXT: lw s5, 4(sp)
+; RV32I-NEXT: lw s4, 8(sp)
+; RV32I-NEXT: lw s3, 12(sp)
+; RV32I-NEXT: lw s2, 16(sp)
+; RV32I-NEXT: lw s1, 20(sp)
+; RV32I-NEXT: lw s0, 24(sp)
+; RV32I-NEXT: lw ra, 28(sp)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: ctlz_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: bnez a1, .LBB9_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: clz a0, a0
+; RV32IB-NEXT: addi a0, a0, 32
+; RV32IB-NEXT: mv a1, zero
+; RV32IB-NEXT: ret
+; RV32IB-NEXT: .LBB9_2:
+; RV32IB-NEXT: clz a0, a1
+; RV32IB-NEXT: mv a1, zero
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: ctlz_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: bnez a1, .LBB9_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: clz a0, a0
+; RV32IBB-NEXT: addi a0, a0, 32
+; RV32IBB-NEXT: mv a1, zero
+; RV32IBB-NEXT: ret
+; RV32IBB-NEXT: .LBB9_2:
+; RV32IBB-NEXT: clz a0, a1
+; RV32IBB-NEXT: mv a1, zero
+; RV32IBB-NEXT: ret
+ %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+ ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define i32 @cttz_i32(i32 %a) nounwind {
+; RV32I-LABEL: cttz_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp)
+; RV32I-NEXT: beqz a0, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %cond.false
+; RV32I-NEXT: addi a1, a0, -1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a2, a2, 1365
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi a1, a1, 819
+; RV32I-NEXT: and a2, a0, a1
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi a1, a1, -241
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi a1, a1, 257
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: j .LBB10_3
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: addi a0, zero, 32
+; RV32I-NEXT: .LBB10_3: # %cond.end
+; RV32I-NEXT: lw ra, 12(sp)
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: cttz_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: beqz a0, .LBB10_2
+; RV32IB-NEXT: # %bb.1: # %cond.false
+; RV32IB-NEXT: ctz a0, a0
+; RV32IB-NEXT: ret
+; RV32IB-NEXT: .LBB10_2:
+; RV32IB-NEXT: addi a0, zero, 32
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: cttz_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: beqz a0, .LBB10_2
+; RV32IBB-NEXT: # %bb.1: # %cond.false
+; RV32IBB-NEXT: ctz a0, a0
+; RV32IBB-NEXT: ret
+; RV32IBB-NEXT: .LBB10_2:
+; RV32IBB-NEXT: addi a0, zero, 32
+; RV32IBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+ ret i32 %1
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV32I-LABEL: cttz_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp)
+; RV32I-NEXT: sw s0, 24(sp)
+; RV32I-NEXT: sw s1, 20(sp)
+; RV32I-NEXT: sw s2, 16(sp)
+; RV32I-NEXT: sw s3, 12(sp)
+; RV32I-NEXT: sw s4, 8(sp)
+; RV32I-NEXT: sw s5, 4(sp)
+; RV32I-NEXT: sw s6, 0(sp)
+; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: mv s4, a0
+; RV32I-NEXT: addi a0, a0, -1
+; RV32I-NEXT: not a1, s4
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi s5, a2, 1365
+; RV32I-NEXT: and a1, a1, s5
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi s0, a1, 819
+; RV32I-NEXT: and a1, a0, s0
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi s6, a1, -241
+; RV32I-NEXT: and a0, a0, s6
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi s1, a1, 257
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: addi a0, s3, -1
+; RV32I-NEXT: not a1, s3
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a1, a1, s5
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, s0
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, s6
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: bnez s4, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: j .LBB11_3
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srli a0, s2, 24
+; RV32I-NEXT: .LBB11_3:
+; RV32I-NEXT: mv a1, zero
+; RV32I-NEXT: lw s6, 0(sp)
+; RV32I-NEXT: lw s5, 4(sp)
+; RV32I-NEXT: lw s4, 8(sp)
+; RV32I-NEXT: lw s3, 12(sp)
+; RV32I-NEXT: lw s2, 16(sp)
+; RV32I-NEXT: lw s1, 20(sp)
+; RV32I-NEXT: lw s0, 24(sp)
+; RV32I-NEXT: lw ra, 28(sp)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: cttz_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: bnez a0, .LBB11_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: ctz a0, a1
+; RV32IB-NEXT: addi a0, a0, 32
+; RV32IB-NEXT: mv a1, zero
+; RV32IB-NEXT: ret
+; RV32IB-NEXT: .LBB11_2:
+; RV32IB-NEXT: ctz a0, a0
+; RV32IB-NEXT: mv a1, zero
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: cttz_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: bnez a0, .LBB11_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: ctz a0, a1
+; RV32IBB-NEXT: addi a0, a0, 32
+; RV32IBB-NEXT: mv a1, zero
+; RV32IBB-NEXT: ret
+; RV32IBB-NEXT: .LBB11_2:
+; RV32IBB-NEXT: ctz a0, a0
+; RV32IBB-NEXT: mv a1, zero
+; RV32IBB-NEXT: ret
+ %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+ ret i64 %1
+}
+
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @ctpop_i32(i32 %a) nounwind {
+; RV32I-LABEL: ctpop_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp)
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi a2, a2, 1365
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi a1, a1, 819
+; RV32I-NEXT: and a2, a0, a1
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: add a0, a2, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi a1, a1, -241
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi a1, a1, 257
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: lw ra, 12(sp)
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: ctpop_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: pcnt a0, a0
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: ctpop_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: pcnt a0, a0
+; RV32IBB-NEXT: ret
+ %1 = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %1
+}
+
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctpop_i64(i64 %a) nounwind {
+; RV32I-LABEL: ctpop_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: sw ra, 28(sp)
+; RV32I-NEXT: sw s0, 24(sp)
+; RV32I-NEXT: sw s1, 20(sp)
+; RV32I-NEXT: sw s2, 16(sp)
+; RV32I-NEXT: sw s3, 12(sp)
+; RV32I-NEXT: sw s4, 8(sp)
+; RV32I-NEXT: sw s5, 4(sp)
+; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: addi s3, a2, 1365
+; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: sub a0, a1, a0
+; RV32I-NEXT: lui a1, 209715
+; RV32I-NEXT: addi s0, a1, 819
+; RV32I-NEXT: and a1, a0, s0
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lui a1, 61681
+; RV32I-NEXT: addi s4, a1, -241
+; RV32I-NEXT: and a0, a0, s4
+; RV32I-NEXT: lui a1, 4112
+; RV32I-NEXT: addi s1, a1, 257
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: srli s5, a0, 24
+; RV32I-NEXT: srli a0, s2, 1
+; RV32I-NEXT: and a0, a0, s3
+; RV32I-NEXT: sub a0, s2, a0
+; RV32I-NEXT: and a1, a0, s0
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, s0
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, s4
+; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: call __mulsi3
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: add a0, a0, s5
+; RV32I-NEXT: mv a1, zero
+; RV32I-NEXT: lw s5, 4(sp)
+; RV32I-NEXT: lw s4, 8(sp)
+; RV32I-NEXT: lw s3, 12(sp)
+; RV32I-NEXT: lw s2, 16(sp)
+; RV32I-NEXT: lw s1, 20(sp)
+; RV32I-NEXT: lw s0, 24(sp)
+; RV32I-NEXT: lw ra, 28(sp)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: ctpop_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: pcnt a1, a1
+; RV32IB-NEXT: pcnt a0, a0
+; RV32IB-NEXT: add a0, a0, a1
+; RV32IB-NEXT: mv a1, zero
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: ctpop_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: pcnt a1, a1
+; RV32IBB-NEXT: pcnt a0, a0
+; RV32IBB-NEXT: add a0, a0, a1
+; RV32IBB-NEXT: mv a1, zero
+; RV32IBB-NEXT: ret
+ %1 = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %1
+}
+
+define i32 @sextb_i32(i32 %a) nounwind {
+; RV32I-LABEL: sextb_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: srai a0, a0, 24
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sextb_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: sext.b a0, a0
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sextb_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: sext.b a0, a0
+; RV32IBB-NEXT: ret
+ %shl = shl i32 %a, 24
+ %shr = ashr exact i32 %shl, 24
+ ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV32I-LABEL: sextb_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a0, 24
+; RV32I-NEXT: srai a0, a1, 24
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sextb_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: sext.b a2, a0
+; RV32IB-NEXT: slli a0, a0, 24
+; RV32IB-NEXT: srai a1, a0, 31
+; RV32IB-NEXT: mv a0, a2
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sextb_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: sext.b a2, a0
+; RV32IBB-NEXT: slli a0, a0, 24
+; RV32IBB-NEXT: srai a1, a0, 31
+; RV32IBB-NEXT: mv a0, a2
+; RV32IBB-NEXT: ret
+ %shl = shl i64 %a, 56
+ %shr = ashr exact i64 %shl, 56
+ ret i64 %shr
+}
+
+define i32 @sexth_i32(i32 %a) nounwind {
+; RV32I-LABEL: sexth_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srai a0, a0, 16
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sexth_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: sext.h a0, a0
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sexth_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: sext.h a0, a0
+; RV32IBB-NEXT: ret
+ %shl = shl i32 %a, 16
+ %shr = ashr exact i32 %shl, 16
+ ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV32I-LABEL: sexth_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: srai a0, a1, 16
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: sexth_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: sext.h a2, a0
+; RV32IB-NEXT: slli a0, a0, 16
+; RV32IB-NEXT: srai a1, a0, 31
+; RV32IB-NEXT: mv a0, a2
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: sexth_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: sext.h a2, a0
+; RV32IBB-NEXT: slli a0, a0, 16
+; RV32IBB-NEXT: srai a1, a0, 31
+; RV32IBB-NEXT: mv a0, a2
+; RV32IBB-NEXT: ret
+ %shl = shl i64 %a, 48
+ %shr = ashr exact i64 %shl, 48
+ ret i64 %shr
+}
+
+define i32 @min_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: min_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: blt a0, a1, .LBB18_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: .LBB18_2:
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: min_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: min a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: min_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: min a0, a0, a1
+; RV32IBB-NEXT: ret
+ %cmp = icmp slt i32 %a, %b
+ %cond = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @min_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: min_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB19_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a1, a3
+; RV32I-NEXT: beqz a4, .LBB19_3
+; RV32I-NEXT: j .LBB19_4
+; RV32I-NEXT: .LBB19_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: bnez a4, .LBB19_4
+; RV32I-NEXT: .LBB19_3:
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: .LBB19_4:
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: min_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: beq a1, a3, .LBB19_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: slt a4, a1, a3
+; RV32IB-NEXT: beqz a4, .LBB19_3
+; RV32IB-NEXT: j .LBB19_4
+; RV32IB-NEXT: .LBB19_2:
+; RV32IB-NEXT: sltu a4, a0, a2
+; RV32IB-NEXT: bnez a4, .LBB19_4
+; RV32IB-NEXT: .LBB19_3:
+; RV32IB-NEXT: mv a0, a2
+; RV32IB-NEXT: mv a1, a3
+; RV32IB-NEXT: .LBB19_4:
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: min_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: beq a1, a3, .LBB19_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: slt a4, a1, a3
+; RV32IBB-NEXT: beqz a4, .LBB19_3
+; RV32IBB-NEXT: j .LBB19_4
+; RV32IBB-NEXT: .LBB19_2:
+; RV32IBB-NEXT: sltu a4, a0, a2
+; RV32IBB-NEXT: bnez a4, .LBB19_4
+; RV32IBB-NEXT: .LBB19_3:
+; RV32IBB-NEXT: mv a0, a2
+; RV32IBB-NEXT: mv a1, a3
+; RV32IBB-NEXT: .LBB19_4:
+; RV32IBB-NEXT: ret
+ %cmp = icmp slt i64 %a, %b
+ %cond = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %cond
+}
+
+define i32 @max_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: max_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: blt a1, a0, .LBB20_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: .LBB20_2:
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: max_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: max a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: max_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: max a0, a0, a1
+; RV32IBB-NEXT: ret
+ %cmp = icmp sgt i32 %a, %b
+ %cond = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @max_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: max_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB21_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: slt a4, a3, a1
+; RV32I-NEXT: beqz a4, .LBB21_3
+; RV32I-NEXT: j .LBB21_4
+; RV32I-NEXT: .LBB21_2:
+; RV32I-NEXT: sltu a4, a2, a0
+; RV32I-NEXT: bnez a4, .LBB21_4
+; RV32I-NEXT: .LBB21_3:
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: .LBB21_4:
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: max_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: beq a1, a3, .LBB21_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: slt a4, a3, a1
+; RV32IB-NEXT: beqz a4, .LBB21_3
+; RV32IB-NEXT: j .LBB21_4
+; RV32IB-NEXT: .LBB21_2:
+; RV32IB-NEXT: sltu a4, a2, a0
+; RV32IB-NEXT: bnez a4, .LBB21_4
+; RV32IB-NEXT: .LBB21_3:
+; RV32IB-NEXT: mv a0, a2
+; RV32IB-NEXT: mv a1, a3
+; RV32IB-NEXT: .LBB21_4:
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: max_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: beq a1, a3, .LBB21_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: slt a4, a3, a1
+; RV32IBB-NEXT: beqz a4, .LBB21_3
+; RV32IBB-NEXT: j .LBB21_4
+; RV32IBB-NEXT: .LBB21_2:
+; RV32IBB-NEXT: sltu a4, a2, a0
+; RV32IBB-NEXT: bnez a4, .LBB21_4
+; RV32IBB-NEXT: .LBB21_3:
+; RV32IBB-NEXT: mv a0, a2
+; RV32IBB-NEXT: mv a1, a3
+; RV32IBB-NEXT: .LBB21_4:
+; RV32IBB-NEXT: ret
+ %cmp = icmp sgt i64 %a, %b
+ %cond = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %cond
+}
+
+define i32 @minu_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: minu_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: bltu a0, a1, .LBB22_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: .LBB22_2:
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: minu_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: minu a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: minu_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: minu a0, a0, a1
+; RV32IBB-NEXT: ret
+ %cmp = icmp ult i32 %a, %b
+ %cond = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @minu_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: minu_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB23_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a1, a3
+; RV32I-NEXT: beqz a4, .LBB23_3
+; RV32I-NEXT: j .LBB23_4
+; RV32I-NEXT: .LBB23_2:
+; RV32I-NEXT: sltu a4, a0, a2
+; RV32I-NEXT: bnez a4, .LBB23_4
+; RV32I-NEXT: .LBB23_3:
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: .LBB23_4:
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: minu_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: beq a1, a3, .LBB23_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: sltu a4, a1, a3
+; RV32IB-NEXT: beqz a4, .LBB23_3
+; RV32IB-NEXT: j .LBB23_4
+; RV32IB-NEXT: .LBB23_2:
+; RV32IB-NEXT: sltu a4, a0, a2
+; RV32IB-NEXT: bnez a4, .LBB23_4
+; RV32IB-NEXT: .LBB23_3:
+; RV32IB-NEXT: mv a0, a2
+; RV32IB-NEXT: mv a1, a3
+; RV32IB-NEXT: .LBB23_4:
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: minu_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: beq a1, a3, .LBB23_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: sltu a4, a1, a3
+; RV32IBB-NEXT: beqz a4, .LBB23_3
+; RV32IBB-NEXT: j .LBB23_4
+; RV32IBB-NEXT: .LBB23_2:
+; RV32IBB-NEXT: sltu a4, a0, a2
+; RV32IBB-NEXT: bnez a4, .LBB23_4
+; RV32IBB-NEXT: .LBB23_3:
+; RV32IBB-NEXT: mv a0, a2
+; RV32IBB-NEXT: mv a1, a3
+; RV32IBB-NEXT: .LBB23_4:
+; RV32IBB-NEXT: ret
+ %cmp = icmp ult i64 %a, %b
+ %cond = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %cond
+}
+
+define i32 @maxu_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: maxu_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: bltu a1, a0, .LBB24_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: .LBB24_2:
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: maxu_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: maxu a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: maxu_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: maxu a0, a0, a1
+; RV32IBB-NEXT: ret
+ %cmp = icmp ugt i32 %a, %b
+ %cond = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: maxu_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beq a1, a3, .LBB25_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a4, a3, a1
+; RV32I-NEXT: beqz a4, .LBB25_3
+; RV32I-NEXT: j .LBB25_4
+; RV32I-NEXT: .LBB25_2:
+; RV32I-NEXT: sltu a4, a2, a0
+; RV32I-NEXT: bnez a4, .LBB25_4
+; RV32I-NEXT: .LBB25_3:
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: .LBB25_4:
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: maxu_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: beq a1, a3, .LBB25_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: sltu a4, a3, a1
+; RV32IB-NEXT: beqz a4, .LBB25_3
+; RV32IB-NEXT: j .LBB25_4
+; RV32IB-NEXT: .LBB25_2:
+; RV32IB-NEXT: sltu a4, a2, a0
+; RV32IB-NEXT: bnez a4, .LBB25_4
+; RV32IB-NEXT: .LBB25_3:
+; RV32IB-NEXT: mv a0, a2
+; RV32IB-NEXT: mv a1, a3
+; RV32IB-NEXT: .LBB25_4:
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: maxu_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: beq a1, a3, .LBB25_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: sltu a4, a3, a1
+; RV32IBB-NEXT: beqz a4, .LBB25_3
+; RV32IBB-NEXT: j .LBB25_4
+; RV32IBB-NEXT: .LBB25_2:
+; RV32IBB-NEXT: sltu a4, a2, a0
+; RV32IBB-NEXT: bnez a4, .LBB25_4
+; RV32IBB-NEXT: .LBB25_3:
+; RV32IBB-NEXT: mv a0, a2
+; RV32IBB-NEXT: mv a1, a3
+; RV32IBB-NEXT: .LBB25_4:
+; RV32IBB-NEXT: ret
+ %cmp = icmp ugt i64 %a, %b
+ %cond = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %cond
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbb.ll b/llvm/test/CodeGen/RISCV/rv64Zbb.ll
new file mode 100644
index 0000000..2e4b69e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbb.ll
@@ -0,0 +1,1149 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64IBB
+
+define signext i32 @slo_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: slo_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: slo_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: slow a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: slo_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: slow a0, a0, a1
+; RV64IBB-NEXT: ret
+ %neg = xor i32 %a, -1
+ %shl = shl i32 %neg, %b
+ %neg1 = xor i32 %shl, -1
+ ret i32 %neg1
+}
+
+define i64 @slo_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: slo_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: sll a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: slo_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: slo a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: slo_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: slo a0, a0, a1
+; RV64IBB-NEXT: ret
+ %neg = xor i64 %a, -1
+ %shl = shl i64 %neg, %b
+ %neg1 = xor i64 %shl, -1
+ ret i64 %neg1
+}
+
+define signext i32 @sro_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: sro_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srlw a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sro_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: srow a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sro_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: srow a0, a0, a1
+; RV64IBB-NEXT: ret
+ %neg = xor i32 %a, -1
+ %shr = lshr i32 %neg, %b
+ %neg1 = xor i32 %shr, -1
+ ret i32 %neg1
+}
+
+define i64 @sro_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: sro_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srl a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sro_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sro a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sro_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sro a0, a0, a1
+; RV64IBB-NEXT: ret
+ %neg = xor i64 %a, -1
+ %shr = lshr i64 %neg, %b
+ %neg1 = xor i64 %shr, -1
+ ret i64 %neg1
+}
+
+define signext i32 @sloi_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sloi_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: ori a0, a0, 1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sloi_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sloiw a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sloi_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sloiw a0, a0, 1
+; RV64IBB-NEXT: ret
+ %neg = shl i32 %a, 1
+ %neg12 = or i32 %neg, 1
+ ret i32 %neg12
+}
+
+define i64 @sloi_i64(i64 %a) nounwind {
+; RV64I-LABEL: sloi_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: ori a0, a0, 1
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sloi_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sloi a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sloi_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sloi a0, a0, 1
+; RV64IBB-NEXT: ret
+ %neg = shl i64 %a, 1
+ %neg12 = or i64 %neg, 1
+ ret i64 %neg12
+}
+
+define signext i32 @sroi_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sroi_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: lui a1, 524288
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sroi_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sroiw a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sroi_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sroiw a0, a0, 1
+; RV64IBB-NEXT: ret
+ %neg = lshr i32 %a, 1
+ %neg12 = or i32 %neg, -2147483648
+ ret i32 %neg12
+}
+
+define i64 @sroi_i64(i64 %a) nounwind {
+; RV64I-LABEL: sroi_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: addi a1, zero, -1
+; RV64I-NEXT: slli a1, a1, 63
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sroi_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sroi a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sroi_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sroi a0, a0, 1
+; RV64IBB-NEXT: ret
+ %neg = lshr i64 %a, 1
+ %neg12 = or i64 %neg, -9223372036854775808
+ ret i64 %neg12
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define signext i32 @ctlz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctlz_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp)
+; RV64I-NEXT: beqz a0, .LBB8_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: addi a0, a0, -32
+; RV64I-NEXT: j .LBB8_3
+; RV64I-NEXT: .LBB8_2:
+; RV64I-NEXT: addi a0, zero, 32
+; RV64I-NEXT: .LBB8_3: # %cond.end
+; RV64I-NEXT: ld ra, 8(sp)
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: ctlz_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: beqz a0, .LBB8_2
+; RV64IB-NEXT: # %bb.1: # %cond.false
+; RV64IB-NEXT: clzw a0, a0
+; RV64IB-NEXT: ret
+; RV64IB-NEXT: .LBB8_2:
+; RV64IB-NEXT: addi a0, zero, 32
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: ctlz_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: beqz a0, .LBB8_2
+; RV64IBB-NEXT: # %bb.1: # %cond.false
+; RV64IBB-NEXT: clzw a0, a0
+; RV64IBB-NEXT: ret
+; RV64IBB-NEXT: .LBB8_2:
+; RV64IBB-NEXT: addi a0, zero, 32
+; RV64IBB-NEXT: ret
+ %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ ret i32 %1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctlz_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp)
+; RV64I-NEXT: beqz a0, .LBB9_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 8
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 32
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: j .LBB9_3
+; RV64I-NEXT: .LBB9_2:
+; RV64I-NEXT: addi a0, zero, 64
+; RV64I-NEXT: .LBB9_3: # %cond.end
+; RV64I-NEXT: ld ra, 8(sp)
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: ctlz_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: beqz a0, .LBB9_2
+; RV64IB-NEXT: # %bb.1: # %cond.false
+; RV64IB-NEXT: clz a0, a0
+; RV64IB-NEXT: ret
+; RV64IB-NEXT: .LBB9_2:
+; RV64IB-NEXT: addi a0, zero, 64
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: ctlz_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: beqz a0, .LBB9_2
+; RV64IBB-NEXT: # %bb.1: # %cond.false
+; RV64IBB-NEXT: clz a0, a0
+; RV64IBB-NEXT: ret
+; RV64IBB-NEXT: .LBB9_2:
+; RV64IBB-NEXT: addi a0, zero, 64
+; RV64IBB-NEXT: ret
+ %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+ ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define signext i32 @cttz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp)
+; RV64I-NEXT: beqz a0, .LBB10_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: j .LBB10_3
+; RV64I-NEXT: .LBB10_2:
+; RV64I-NEXT: addi a0, zero, 32
+; RV64I-NEXT: .LBB10_3: # %cond.end
+; RV64I-NEXT: ld ra, 8(sp)
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: cttz_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: beqz a0, .LBB10_2
+; RV64IB-NEXT: # %bb.1: # %cond.false
+; RV64IB-NEXT: ctz a0, a0
+; RV64IB-NEXT: ret
+; RV64IB-NEXT: .LBB10_2:
+; RV64IB-NEXT: addi a0, zero, 32
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: cttz_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: beqz a0, .LBB10_2
+; RV64IBB-NEXT: # %bb.1: # %cond.false
+; RV64IBB-NEXT: ctz a0, a0
+; RV64IBB-NEXT: ret
+; RV64IBB-NEXT: .LBB10_2:
+; RV64IBB-NEXT: addi a0, zero, 32
+; RV64IBB-NEXT: ret
+ %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+ ret i32 %1
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV64I-LABEL: cttz_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp)
+; RV64I-NEXT: beqz a0, .LBB11_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: addi a1, a0, -1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: j .LBB11_3
+; RV64I-NEXT: .LBB11_2:
+; RV64I-NEXT: addi a0, zero, 64
+; RV64I-NEXT: .LBB11_3: # %cond.end
+; RV64I-NEXT: ld ra, 8(sp)
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: cttz_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: beqz a0, .LBB11_2
+; RV64IB-NEXT: # %bb.1: # %cond.false
+; RV64IB-NEXT: ctz a0, a0
+; RV64IB-NEXT: ret
+; RV64IB-NEXT: .LBB11_2:
+; RV64IB-NEXT: addi a0, zero, 64
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: cttz_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: beqz a0, .LBB11_2
+; RV64IBB-NEXT: # %bb.1: # %cond.false
+; RV64IBB-NEXT: ctz a0, a0
+; RV64IBB-NEXT: ret
+; RV64IBB-NEXT: .LBB11_2:
+; RV64IBB-NEXT: addi a0, zero, 64
+; RV64IBB-NEXT: ret
+ %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+ ret i64 %1
+}
+
+declare i32 @llvm.ctpop.i32(i32)
+
+define signext i32 @ctpop_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctpop_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp)
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: srliw a0, a0, 1
+; RV64I-NEXT: lui a2, 349525
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: sub a0, a1, a0
+; RV64I-NEXT: srli a1, a0, 2
+; RV64I-NEXT: lui a2, 13107
+; RV64I-NEXT: addiw a2, a2, 819
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 819
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 819
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 819
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: ld ra, 8(sp)
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: ctpop_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: pcntw a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: ctpop_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: pcntw a0, a0
+; RV64IBB-NEXT: ret
+ %1 = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %1
+}
+
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctpop_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctpop_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp)
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 21845
+; RV64I-NEXT: addiw a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: slli a2, a2, 12
+; RV64I-NEXT: addi a2, a2, 1365
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: lui a1, 13107
+; RV64I-NEXT: addiw a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 819
+; RV64I-NEXT: and a2, a0, a1
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lui a1, 3855
+; RV64I-NEXT: addiw a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, 241
+; RV64I-NEXT: slli a1, a1, 12
+; RV64I-NEXT: addi a1, a1, -241
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: lui a1, 4112
+; RV64I-NEXT: addiw a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: addi a1, a1, 257
+; RV64I-NEXT: call __muldi3
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: ld ra, 8(sp)
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: ctpop_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: pcnt a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: ctpop_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: pcnt a0, a0
+; RV64IBB-NEXT: ret
+ %1 = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %1
+}
+
+define signext i32 @sextb_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sextb_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a0, a0, 56
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sextb_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sext.b a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sextb_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sext.b a0, a0
+; RV64IBB-NEXT: ret
+ %shl = shl i32 %a, 24
+ %shr = ashr exact i32 %shl, 24
+ ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV64I-LABEL: sextb_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 56
+; RV64I-NEXT: srai a0, a0, 56
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sextb_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sext.b a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sextb_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sext.b a0, a0
+; RV64IBB-NEXT: ret
+ %shl = shl i64 %a, 56
+ %shr = ashr exact i64 %shl, 56
+ ret i64 %shr
+}
+
+define signext i32 @sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sexth_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sexth_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sext.h a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sexth_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sext.h a0, a0
+; RV64IBB-NEXT: ret
+ %shl = shl i32 %a, 16
+ %shr = ashr exact i32 %shl, 16
+ ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: sexth_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srai a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: sexth_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: sext.h a0, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: sexth_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: sext.h a0, a0
+; RV64IBB-NEXT: ret
+ %shl = shl i64 %a, 48
+ %shr = ashr exact i64 %shl, 48
+ ret i64 %shr
+}
+
+define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: min_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: blt a0, a1, .LBB18_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB18_2:
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: min_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: min a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: min_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: min a0, a0, a1
+; RV64IBB-NEXT: ret
+ %cmp = icmp slt i32 %a, %b
+ %cond = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i64 @min_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: min_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: blt a0, a1, .LBB19_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB19_2:
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: min_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: min a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: min_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: min a0, a0, a1
+; RV64IBB-NEXT: ret
+ %cmp = icmp slt i64 %a, %b
+ %cond = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %cond
+}
+
+define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: max_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: blt a1, a0, .LBB20_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB20_2:
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: max_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: max a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: max_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: max a0, a0, a1
+; RV64IBB-NEXT: ret
+ %cmp = icmp sgt i32 %a, %b
+ %cond = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i64 @max_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: max_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: blt a1, a0, .LBB21_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB21_2:
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: max_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: max a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: max_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: max a0, a0, a1
+; RV64IBB-NEXT: ret
+ %cmp = icmp sgt i64 %a, %b
+ %cond = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %cond
+}
+
+define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: minu_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: bltu a0, a1, .LBB22_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB22_2:
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: minu_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: minu a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: minu_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: minu a0, a0, a1
+; RV64IBB-NEXT: ret
+ %cmp = icmp ult i32 %a, %b
+ %cond = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i64 @minu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: minu_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: bltu a0, a1, .LBB23_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB23_2:
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: minu_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: minu a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: minu_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: minu a0, a0, a1
+; RV64IBB-NEXT: ret
+ %cmp = icmp ult i64 %a, %b
+ %cond = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %cond
+}
+
+define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: maxu_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: bltu a1, a0, .LBB24_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB24_2:
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: maxu_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: maxu a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: maxu_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: maxu a0, a0, a1
+; RV64IBB-NEXT: ret
+ %cmp = icmp ugt i32 %a, %b
+ %cond = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %cond
+}
+
+define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: maxu_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: bltu a1, a0, .LBB25_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: .LBB25_2:
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: maxu_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: maxu a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: maxu_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: maxu a0, a0, a1
+; RV64IBB-NEXT: ret
+ %cmp = icmp ugt i64 %a, %b
+ %cond = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %cond
+}
+
+; We select a i32 addi that zero-extends the result on RV64 as addiwu
+
+define zeroext i32 @zext_add_to_addiwu(i32 signext %a) nounwind {
+; RV64I-LABEL: zext_add_to_addiwu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi a0, a0, 1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: zext_add_to_addiwu:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: addiwu a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: zext_add_to_addiwu:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: addiwu a0, a0, 1
+; RV64IBB-NEXT: ret
+ %add = add i32 %a, 1
+ ret i32 %add
+}
+
+define i64 @addiwu(i64 %a) nounwind {
+; RV64I-LABEL: addiwu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi a0, a0, 1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: addiwu:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: addiwu a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: addiwu:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: addiwu a0, a0, 1
+; RV64IBB-NEXT: ret
+ %conv = add i64 %a, 1
+ %conv1 = and i64 %conv, 4294967295
+ ret i64 %conv1
+}
+
+define i64 @slliuw(i64 %a) nounwind {
+; RV64I-LABEL: slliuw:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: addi a1, zero, 1
+; RV64I-NEXT: slli a1, a1, 33
+; RV64I-NEXT: addi a1, a1, -2
+; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: slliuw:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: slliu.w a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: slliuw:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: slliu.w a0, a0, 1
+; RV64IBB-NEXT: ret
+ %conv1 = shl i64 %a, 1
+ %shl = and i64 %conv1, 8589934590
+ ret i64 %shl
+}
+
+; We select a i32 add that zero-extends the result on RV64 as addwu
+
+define zeroext i32 @zext_add_to_addwu(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: zext_add_to_addwu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: zext_add_to_addwu:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: addwu a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: zext_add_to_addwu:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: addwu a0, a0, a1
+; RV64IBB-NEXT: ret
+ %add = add i32 %a, %b
+ ret i32 %add
+}
+
+define i64 @addwu(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: addwu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: addwu:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: addwu a0, a1, a0
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: addwu:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: addwu a0, a1, a0
+; RV64IBB-NEXT: ret
+ %add = add i64 %b, %a
+ %conv1 = and i64 %add, 4294967295
+ ret i64 %conv1
+}
+
+; We select a i32 sub that zero-extends the result on RV64 as subwu
+
+define zeroext i32 @zext_sub_to_subwu(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: zext_sub_to_subwu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: zext_sub_to_subwu:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: subwu a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: zext_sub_to_subwu:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: subwu a0, a0, a1
+; RV64IBB-NEXT: ret
+ %sub = sub i32 %a, %b
+ ret i32 %sub
+}
+
+define i64 @subwu(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: subwu:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: subwu:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: subwu a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: subwu:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: subwu a0, a0, a1
+; RV64IBB-NEXT: ret
+ %sub = sub i64 %a, %b
+ %conv1 = and i64 %sub, 4294967295
+ ret i64 %conv1
+}
+
+define i64 @adduw(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: adduw:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: adduw:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: addu.w a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: adduw:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: addu.w a0, a0, a1
+; RV64IBB-NEXT: ret
+ %and = and i64 %b, 4294967295
+ %add = add i64 %and, %a
+ ret i64 %add
+}
+
+define i64 @subuw(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: subuw:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: subuw:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: subu.w a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: subuw:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: subu.w a0, a0, a1
+; RV64IBB-NEXT: ret
+ %and = and i64 %b, 4294967295
+ %sub = sub i64 %a, %and
+ ret i64 %sub
+}