aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlewis-revill <lewis.revill@embecosm.com>2020-07-15 11:55:44 +0100
committerHans Wennborg <hans@chromium.org>2020-07-27 13:07:37 +0200
commit7776c991d06e1d84ffb5e709024bfff5e51f7e8e (patch)
tree5cbce2dd4f6f3a55c03ed42bfc27ac8e8fbdd3ee
parentffe85d6c03b91cf9294c7ec1d8192d4cc337cdfd (diff)
downloadllvm-7776c991d06e1d84ffb5e709024bfff5e51f7e8e.zip
llvm-7776c991d06e1d84ffb5e709024bfff5e51f7e8e.tar.gz
llvm-7776c991d06e1d84ffb5e709024bfff5e51f7e8e.tar.bz2
[RISCV] Add matching of codegen patterns to RISCV Bit Manipulation Zbbp asm instructions
This patch provides optimization of bit manipulation operations by enabling the +experimental-b target feature. It adds matching of single block patterns of instructions to specific bit-manip instructions belonging to both the permutation and the base subsets of the experimental B extension of RISC-V. It adds also the correspondent codegen tests. This patch is based on Claire Wolf's proposal for the bit manipulation extension of RISCV: https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf Differential Revision: https://reviews.llvm.org/D79873 (cherry picked from commit 6144f0a1e52e7f5439a67267ca65f2d72c21aaa6)
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp85
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoB.td71
-rw-r--r--llvm/test/CodeGen/RISCV/rv32Zbbp.ll892
-rw-r--r--llvm/test/CodeGen/RISCV/rv64Zbbp.ll517
6 files changed, 1571 insertions, 2 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 99e5135..fd1a91f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -272,6 +272,44 @@ bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
return false;
}
+// Check that it is a RORI (Rotate Right Immediate). We first check that
+// it is the right node tree:
+//
+// (ROTL RS1, VC)
+//
+// The compiler translates immediate rotations to the right given by the call
+// to the rotateright32/rotateright64 intrinsics as rotations to the left.
+// Since the rotation to the left can be easily emulated as a rotation to the
+// right by negating the constant, there is no encoding for ROLI.
+// We then select the immediate left rotations as RORI by the complementary
+// constant:
+//
+// Shamt == XLen - VC
+
+bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) {
+ MVT XLenVT = Subtarget->getXLenVT();
+ if (N.getOpcode() == ISD::ROTL) {
+ if (isa<ConstantSDNode>(N.getOperand(1))) {
+ if (XLenVT == MVT::i64) {
+ uint64_t VC = N.getConstantOperandVal(1);
+ Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N),
+ N.getOperand(1).getValueType());
+ RS1 = N.getOperand(0);
+ return true;
+ }
+ if (XLenVT == MVT::i32) {
+ uint32_t VC = N.getConstantOperandVal(1);
+ Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N),
+ N.getOperand(1).getValueType());
+ RS1 = N.getOperand(0);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
// on RV64).
// SLLIUW is the same as SLLI except for the fact that it clears the bits
@@ -374,6 +412,53 @@ bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
return false;
}
+// Check that it is a RORIW (i32 Right Rotate Immediate on RV64).
+// We first check that it is the right node tree:
+//
+// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
+// (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
+//
+// Then we check that the constant operands respect these constraints:
+//
+// VC2 == 32 - VC1
+// VC3 == maskLeadingOnes<uint32_t>(VC2)
+//
+// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
+// and VC3 a 32 bit mask of (32 - VC1) leading ones.
+
+bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+ if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ Subtarget->getXLenVT() == MVT::i64 &&
+ cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+ if (N.getOperand(0).getOpcode() == ISD::OR) {
+ SDValue Or = N.getOperand(0);
+ if (Or.getOperand(0).getOpcode() == ISD::SHL &&
+ Or.getOperand(1).getOpcode() == ISD::SRL) {
+ SDValue Shl = Or.getOperand(0);
+ SDValue Srl = Or.getOperand(1);
+ if (Srl.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue And = Srl.getOperand(0);
+ if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+ isa<ConstantSDNode>(Shl.getOperand(1)) &&
+ isa<ConstantSDNode>(And.getOperand(1))) {
+ uint32_t VC1 = Srl.getConstantOperandVal(1);
+ uint32_t VC2 = Shl.getConstantOperandVal(1);
+ uint32_t VC3 = And.getConstantOperandVal(1);
+ if (VC2 == (32 - VC1) &&
+ VC3 == maskLeadingOnes<uint32_t>(VC2)) {
+ RS1 = Shl.getOperand(0);
+ Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
+ Srl.getOperand(1).getValueType());
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 4e382ee..bc1655b6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -47,9 +47,11 @@ public:
bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
+ bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+ bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c89bb21..7cad9f9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -149,8 +149,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
- setOperationAction(ISD::ROTL, XLenVT, Expand);
- setOperationAction(ISD::ROTR, XLenVT, Expand);
+ if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) {
+ setOperationAction(ISD::ROTL, XLenVT, Expand);
+ setOperationAction(ISD::ROTR, XLenVT, Expand);
+ }
if (!Subtarget.hasStdExtZbp())
setOperationAction(ISD::BSWAP, XLenVT, Expand);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 09d5f1e..45eb41f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -638,21 +638,46 @@ def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
//===----------------------------------------------------------------------===//
def SLOIPat : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
def SROIPat : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
+def RORIPat : ComplexPattern<XLenVT, 2, "SelectRORI", [rotl]>;
def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
+def RORIWPat : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp]
let Predicates = [HasStdExtZbb] in {
def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1),
(SLO GPR:$rs1, GPR:$rs2)>;
def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1),
(SRO GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
+def : Pat<(fshl GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
+def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp]
+
+let Predicates = [HasStdExtZbb] in {
def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
(SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
(SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
} // Predicates = [HasStdExtZbb]
+// There's no encoding for roli in the current version of the 'B' extension
+// (v0.92) as it can be implemented with rori by negating the immediate.
+// For this reason we pattern-match only against rori[w].
+let Predicates = [HasStdExtZbbOrZbp] in
+def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt),
+ (RORI GPR:$rs1, uimmlog2xlen:$shamt)>;
+
let Predicates = [HasStdExtZbp, IsRV32] in {
def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1),
(and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))),
@@ -772,6 +797,23 @@ def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
(MAXU GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbb]
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+def : Pat<(or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16))),
+ (PACK GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32))),
+ (PACK GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+def : Pat<(or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16))),
+ (PACKU GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))),
+ (PACKU GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp] in
+def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFF00),
+ (and GPR:$rs1, 0x00FF)),
+ (PACKH GPR:$rs1, GPR:$rs2)>;
+
let Predicates = [HasStdExtZbp, IsRV32] in {
def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
(and GPR:$rs1, (i32 0xFF0000FF))),
@@ -831,12 +873,30 @@ def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1),
(SLOW GPR:$rs1, GPR:$rs2)>;
def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1),
(SROW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)),
+ (riscv_srlw (assertsexti32 GPR:$rs1),
+ (sub (i64 0), (assertsexti32 GPR:$rs2)))),
+ (ROLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1),
+ (sub (i64 0), (assertsexti32 GPR:$rs2))),
+ (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2))),
+ (RORW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
(SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
(SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
} // Predicates = [HasStdExtZbb, IsRV64]
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt),
+ (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+
let Predicates = [HasStdExtZbp, IsRV64] in {
def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)),
GPR:$rs1),
@@ -898,3 +958,14 @@ def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
// RV64 CTZ
def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def : Pat<(sext_inreg (or (shl (assertsexti32 GPR:$rs2), (i64 16)),
+ (and (assertsexti32 GPR:$rs1), 0x000000000000FFFF)),
+ i32),
+ (PACKW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
+ (srl (and (assertsexti32 GPR:$rs1), 0x00000000FFFF0000),
+ (i64 16))),
+ (PACKUW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll
new file mode 100644
index 0000000..0e62889
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll
@@ -0,0 +1,892 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IBB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32IBP
+
+define i32 @andn_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: and a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: andn_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: andn a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: andn_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: andn a0, a0, a1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: andn_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: andn a0, a0, a1
+; RV32IBP-NEXT: ret
+ %neg = xor i32 %b, -1
+ %and = and i32 %neg, %a
+ ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: and a0, a2, a0
+; RV32I-NEXT: and a1, a3, a1
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: andn_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: andn a0, a0, a2
+; RV32IB-NEXT: andn a1, a1, a3
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: andn_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: andn a0, a0, a2
+; RV32IBB-NEXT: andn a1, a1, a3
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: andn_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: andn a0, a0, a2
+; RV32IBP-NEXT: andn a1, a1, a3
+; RV32IBP-NEXT: ret
+ %neg = xor i64 %b, -1
+ %and = and i64 %neg, %a
+ ret i64 %and
+}
+
+define i32 @orn_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: orn_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: orn_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: orn a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: orn_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: orn a0, a0, a1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: orn_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: orn a0, a0, a1
+; RV32IBP-NEXT: ret
+ %neg = xor i32 %b, -1
+ %or = or i32 %neg, %a
+ ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: orn_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: or a0, a2, a0
+; RV32I-NEXT: or a1, a3, a1
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: orn_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: orn a0, a0, a2
+; RV32IB-NEXT: orn a1, a1, a3
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: orn_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: orn a0, a0, a2
+; RV32IBB-NEXT: orn a1, a1, a3
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: orn_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: orn a0, a0, a2
+; RV32IBP-NEXT: orn a1, a1, a3
+; RV32IBP-NEXT: ret
+ %neg = xor i64 %b, -1
+ %or = or i64 %neg, %a
+ ret i64 %or
+}
+
+define i32 @xnor_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: xnor_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: xnor_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: xnor a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: xnor_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: xnor a0, a0, a1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: xnor_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: xnor a0, a0, a1
+; RV32IBP-NEXT: ret
+ %neg = xor i32 %a, -1
+ %xor = xor i32 %neg, %b
+ ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: xnor_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: xor a1, a1, a3
+; RV32I-NEXT: xor a0, a0, a2
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: not a1, a1
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: xnor_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: xnor a0, a0, a2
+; RV32IB-NEXT: xnor a1, a1, a3
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: xnor_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: xnor a0, a0, a2
+; RV32IBB-NEXT: xnor a1, a1, a3
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: xnor_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: xnor a0, a0, a2
+; RV32IBP-NEXT: xnor a1, a1, a3
+; RV32IBP-NEXT: ret
+ %neg = xor i64 %a, -1
+ %xor = xor i64 %neg, %b
+ ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define i32 @rol_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: rol_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sll a2, a0, a1
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: srl a0, a0, a1
+; RV32I-NEXT: or a0, a2, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: rol_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: rol a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: rol_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: rol a0, a0, a1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: rol_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: rol a0, a0, a1
+; RV32IBP-NEXT: ret
+ %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+ ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: rol_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a3, a2, 63
+; RV32I-NEXT: addi t1, a3, -32
+; RV32I-NEXT: addi a6, zero, 31
+; RV32I-NEXT: bltz t1, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sll a7, a0, t1
+; RV32I-NEXT: j .LBB7_3
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: sll a4, a1, a2
+; RV32I-NEXT: sub a3, a6, a3
+; RV32I-NEXT: srli a5, a0, 1
+; RV32I-NEXT: srl a3, a5, a3
+; RV32I-NEXT: or a7, a4, a3
+; RV32I-NEXT: .LBB7_3:
+; RV32I-NEXT: neg a4, a2
+; RV32I-NEXT: andi a5, a4, 63
+; RV32I-NEXT: addi a3, a5, -32
+; RV32I-NEXT: bltz a3, .LBB7_7
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: mv t0, zero
+; RV32I-NEXT: bgez a3, .LBB7_8
+; RV32I-NEXT: .LBB7_5:
+; RV32I-NEXT: srl a3, a0, a4
+; RV32I-NEXT: sub a4, a6, a5
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: sll a1, a1, a4
+; RV32I-NEXT: or a4, a3, a1
+; RV32I-NEXT: or a1, a7, t0
+; RV32I-NEXT: bgez t1, .LBB7_9
+; RV32I-NEXT: .LBB7_6:
+; RV32I-NEXT: sll a0, a0, a2
+; RV32I-NEXT: or a0, a0, a4
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB7_7:
+; RV32I-NEXT: srl t0, a1, a4
+; RV32I-NEXT: bltz a3, .LBB7_5
+; RV32I-NEXT: .LBB7_8:
+; RV32I-NEXT: srl a4, a1, a3
+; RV32I-NEXT: or a1, a7, t0
+; RV32I-NEXT: bltz t1, .LBB7_6
+; RV32I-NEXT: .LBB7_9:
+; RV32I-NEXT: or a0, zero, a4
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: rol_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: andi a3, a2, 63
+; RV32IB-NEXT: addi t1, a3, -32
+; RV32IB-NEXT: addi a6, zero, 31
+; RV32IB-NEXT: bltz t1, .LBB7_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: sll a7, a0, t1
+; RV32IB-NEXT: j .LBB7_3
+; RV32IB-NEXT: .LBB7_2:
+; RV32IB-NEXT: sll a4, a1, a2
+; RV32IB-NEXT: sub a3, a6, a3
+; RV32IB-NEXT: srli a5, a0, 1
+; RV32IB-NEXT: srl a3, a5, a3
+; RV32IB-NEXT: or a7, a4, a3
+; RV32IB-NEXT: .LBB7_3:
+; RV32IB-NEXT: neg a4, a2
+; RV32IB-NEXT: andi a5, a4, 63
+; RV32IB-NEXT: addi a3, a5, -32
+; RV32IB-NEXT: bltz a3, .LBB7_7
+; RV32IB-NEXT: # %bb.4:
+; RV32IB-NEXT: mv t0, zero
+; RV32IB-NEXT: bgez a3, .LBB7_8
+; RV32IB-NEXT: .LBB7_5:
+; RV32IB-NEXT: srl a3, a0, a4
+; RV32IB-NEXT: sub a4, a6, a5
+; RV32IB-NEXT: slli a1, a1, 1
+; RV32IB-NEXT: sll a1, a1, a4
+; RV32IB-NEXT: or a4, a3, a1
+; RV32IB-NEXT: or a1, a7, t0
+; RV32IB-NEXT: bgez t1, .LBB7_9
+; RV32IB-NEXT: .LBB7_6:
+; RV32IB-NEXT: sll a0, a0, a2
+; RV32IB-NEXT: or a0, a0, a4
+; RV32IB-NEXT: ret
+; RV32IB-NEXT: .LBB7_7:
+; RV32IB-NEXT: srl t0, a1, a4
+; RV32IB-NEXT: bltz a3, .LBB7_5
+; RV32IB-NEXT: .LBB7_8:
+; RV32IB-NEXT: srl a4, a1, a3
+; RV32IB-NEXT: or a1, a7, t0
+; RV32IB-NEXT: bltz t1, .LBB7_6
+; RV32IB-NEXT: .LBB7_9:
+; RV32IB-NEXT: or a0, zero, a4
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: rol_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: andi a3, a2, 63
+; RV32IBB-NEXT: addi t1, a3, -32
+; RV32IBB-NEXT: addi a6, zero, 31
+; RV32IBB-NEXT: bltz t1, .LBB7_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: sll a7, a0, t1
+; RV32IBB-NEXT: j .LBB7_3
+; RV32IBB-NEXT: .LBB7_2:
+; RV32IBB-NEXT: sll a4, a1, a2
+; RV32IBB-NEXT: sub a3, a6, a3
+; RV32IBB-NEXT: srli a5, a0, 1
+; RV32IBB-NEXT: srl a3, a5, a3
+; RV32IBB-NEXT: or a7, a4, a3
+; RV32IBB-NEXT: .LBB7_3:
+; RV32IBB-NEXT: neg a4, a2
+; RV32IBB-NEXT: andi a5, a4, 63
+; RV32IBB-NEXT: addi a3, a5, -32
+; RV32IBB-NEXT: bltz a3, .LBB7_7
+; RV32IBB-NEXT: # %bb.4:
+; RV32IBB-NEXT: mv t0, zero
+; RV32IBB-NEXT: bgez a3, .LBB7_8
+; RV32IBB-NEXT: .LBB7_5:
+; RV32IBB-NEXT: srl a3, a0, a4
+; RV32IBB-NEXT: sub a4, a6, a5
+; RV32IBB-NEXT: slli a1, a1, 1
+; RV32IBB-NEXT: sll a1, a1, a4
+; RV32IBB-NEXT: or a4, a3, a1
+; RV32IBB-NEXT: or a1, a7, t0
+; RV32IBB-NEXT: bgez t1, .LBB7_9
+; RV32IBB-NEXT: .LBB7_6:
+; RV32IBB-NEXT: sll a0, a0, a2
+; RV32IBB-NEXT: or a0, a0, a4
+; RV32IBB-NEXT: ret
+; RV32IBB-NEXT: .LBB7_7:
+; RV32IBB-NEXT: srl t0, a1, a4
+; RV32IBB-NEXT: bltz a3, .LBB7_5
+; RV32IBB-NEXT: .LBB7_8:
+; RV32IBB-NEXT: srl a4, a1, a3
+; RV32IBB-NEXT: or a1, a7, t0
+; RV32IBB-NEXT: bltz t1, .LBB7_6
+; RV32IBB-NEXT: .LBB7_9:
+; RV32IBB-NEXT: or a0, zero, a4
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: rol_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: andi a3, a2, 63
+; RV32IBP-NEXT: addi t1, a3, -32
+; RV32IBP-NEXT: addi a6, zero, 31
+; RV32IBP-NEXT: bltz t1, .LBB7_2
+; RV32IBP-NEXT: # %bb.1:
+; RV32IBP-NEXT: sll a7, a0, t1
+; RV32IBP-NEXT: j .LBB7_3
+; RV32IBP-NEXT: .LBB7_2:
+; RV32IBP-NEXT: sll a4, a1, a2
+; RV32IBP-NEXT: sub a3, a6, a3
+; RV32IBP-NEXT: srli a5, a0, 1
+; RV32IBP-NEXT: srl a3, a5, a3
+; RV32IBP-NEXT: or a7, a4, a3
+; RV32IBP-NEXT: .LBB7_3:
+; RV32IBP-NEXT: neg a4, a2
+; RV32IBP-NEXT: andi a5, a4, 63
+; RV32IBP-NEXT: addi a3, a5, -32
+; RV32IBP-NEXT: bltz a3, .LBB7_7
+; RV32IBP-NEXT: # %bb.4:
+; RV32IBP-NEXT: mv t0, zero
+; RV32IBP-NEXT: bgez a3, .LBB7_8
+; RV32IBP-NEXT: .LBB7_5:
+; RV32IBP-NEXT: srl a3, a0, a4
+; RV32IBP-NEXT: sub a4, a6, a5
+; RV32IBP-NEXT: slli a1, a1, 1
+; RV32IBP-NEXT: sll a1, a1, a4
+; RV32IBP-NEXT: or a4, a3, a1
+; RV32IBP-NEXT: or a1, a7, t0
+; RV32IBP-NEXT: bgez t1, .LBB7_9
+; RV32IBP-NEXT: .LBB7_6:
+; RV32IBP-NEXT: sll a0, a0, a2
+; RV32IBP-NEXT: or a0, a0, a4
+; RV32IBP-NEXT: ret
+; RV32IBP-NEXT: .LBB7_7:
+; RV32IBP-NEXT: srl t0, a1, a4
+; RV32IBP-NEXT: bltz a3, .LBB7_5
+; RV32IBP-NEXT: .LBB7_8:
+; RV32IBP-NEXT: srl a4, a1, a3
+; RV32IBP-NEXT: or a1, a7, t0
+; RV32IBP-NEXT: bltz t1, .LBB7_6
+; RV32IBP-NEXT: .LBB7_9:
+; RV32IBP-NEXT: or a0, zero, a4
+; RV32IBP-NEXT: ret
+ %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+ ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define i32 @ror_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: ror_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srl a2, a0, a1
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: sll a0, a0, a1
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: ror_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: ror a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: ror_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: ror a0, a0, a1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: ror_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: ror a0, a0, a1
+; RV32IBP-NEXT: ret
+ %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+ ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: ror_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a3, a2, 63
+; RV32I-NEXT: addi t1, a3, -32
+; RV32I-NEXT: addi a6, zero, 31
+; RV32I-NEXT: bltz t1, .LBB9_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srl a7, a1, t1
+; RV32I-NEXT: j .LBB9_3
+; RV32I-NEXT: .LBB9_2:
+; RV32I-NEXT: srl a4, a0, a2
+; RV32I-NEXT: sub a3, a6, a3
+; RV32I-NEXT: slli a5, a1, 1
+; RV32I-NEXT: sll a3, a5, a3
+; RV32I-NEXT: or a7, a4, a3
+; RV32I-NEXT: .LBB9_3:
+; RV32I-NEXT: neg a4, a2
+; RV32I-NEXT: andi a5, a4, 63
+; RV32I-NEXT: addi a3, a5, -32
+; RV32I-NEXT: bltz a3, .LBB9_7
+; RV32I-NEXT: # %bb.4:
+; RV32I-NEXT: mv t0, zero
+; RV32I-NEXT: bgez a3, .LBB9_8
+; RV32I-NEXT: .LBB9_5:
+; RV32I-NEXT: sll a3, a1, a4
+; RV32I-NEXT: sub a4, a6, a5
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: srl a0, a0, a4
+; RV32I-NEXT: or a4, a3, a0
+; RV32I-NEXT: or a0, t0, a7
+; RV32I-NEXT: bgez t1, .LBB9_9
+; RV32I-NEXT: .LBB9_6:
+; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB9_7:
+; RV32I-NEXT: sll t0, a0, a4
+; RV32I-NEXT: bltz a3, .LBB9_5
+; RV32I-NEXT: .LBB9_8:
+; RV32I-NEXT: sll a4, a0, a3
+; RV32I-NEXT: or a0, t0, a7
+; RV32I-NEXT: bltz t1, .LBB9_6
+; RV32I-NEXT: .LBB9_9:
+; RV32I-NEXT: or a1, a4, zero
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: ror_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: andi a3, a2, 63
+; RV32IB-NEXT: addi t1, a3, -32
+; RV32IB-NEXT: addi a6, zero, 31
+; RV32IB-NEXT: bltz t1, .LBB9_2
+; RV32IB-NEXT: # %bb.1:
+; RV32IB-NEXT: srl a7, a1, t1
+; RV32IB-NEXT: j .LBB9_3
+; RV32IB-NEXT: .LBB9_2:
+; RV32IB-NEXT: srl a4, a0, a2
+; RV32IB-NEXT: sub a3, a6, a3
+; RV32IB-NEXT: slli a5, a1, 1
+; RV32IB-NEXT: sll a3, a5, a3
+; RV32IB-NEXT: or a7, a4, a3
+; RV32IB-NEXT: .LBB9_3:
+; RV32IB-NEXT: neg a4, a2
+; RV32IB-NEXT: andi a5, a4, 63
+; RV32IB-NEXT: addi a3, a5, -32
+; RV32IB-NEXT: bltz a3, .LBB9_7
+; RV32IB-NEXT: # %bb.4:
+; RV32IB-NEXT: mv t0, zero
+; RV32IB-NEXT: bgez a3, .LBB9_8
+; RV32IB-NEXT: .LBB9_5:
+; RV32IB-NEXT: sll a3, a1, a4
+; RV32IB-NEXT: sub a4, a6, a5
+; RV32IB-NEXT: srli a0, a0, 1
+; RV32IB-NEXT: srl a0, a0, a4
+; RV32IB-NEXT: or a4, a3, a0
+; RV32IB-NEXT: or a0, t0, a7
+; RV32IB-NEXT: bgez t1, .LBB9_9
+; RV32IB-NEXT: .LBB9_6:
+; RV32IB-NEXT: srl a1, a1, a2
+; RV32IB-NEXT: or a1, a4, a1
+; RV32IB-NEXT: ret
+; RV32IB-NEXT: .LBB9_7:
+; RV32IB-NEXT: sll t0, a0, a4
+; RV32IB-NEXT: bltz a3, .LBB9_5
+; RV32IB-NEXT: .LBB9_8:
+; RV32IB-NEXT: sll a4, a0, a3
+; RV32IB-NEXT: or a0, t0, a7
+; RV32IB-NEXT: bltz t1, .LBB9_6
+; RV32IB-NEXT: .LBB9_9:
+; RV32IB-NEXT: or a1, a4, zero
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: ror_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: andi a3, a2, 63
+; RV32IBB-NEXT: addi t1, a3, -32
+; RV32IBB-NEXT: addi a6, zero, 31
+; RV32IBB-NEXT: bltz t1, .LBB9_2
+; RV32IBB-NEXT: # %bb.1:
+; RV32IBB-NEXT: srl a7, a1, t1
+; RV32IBB-NEXT: j .LBB9_3
+; RV32IBB-NEXT: .LBB9_2:
+; RV32IBB-NEXT: srl a4, a0, a2
+; RV32IBB-NEXT: sub a3, a6, a3
+; RV32IBB-NEXT: slli a5, a1, 1
+; RV32IBB-NEXT: sll a3, a5, a3
+; RV32IBB-NEXT: or a7, a4, a3
+; RV32IBB-NEXT: .LBB9_3:
+; RV32IBB-NEXT: neg a4, a2
+; RV32IBB-NEXT: andi a5, a4, 63
+; RV32IBB-NEXT: addi a3, a5, -32
+; RV32IBB-NEXT: bltz a3, .LBB9_7
+; RV32IBB-NEXT: # %bb.4:
+; RV32IBB-NEXT: mv t0, zero
+; RV32IBB-NEXT: bgez a3, .LBB9_8
+; RV32IBB-NEXT: .LBB9_5:
+; RV32IBB-NEXT: sll a3, a1, a4
+; RV32IBB-NEXT: sub a4, a6, a5
+; RV32IBB-NEXT: srli a0, a0, 1
+; RV32IBB-NEXT: srl a0, a0, a4
+; RV32IBB-NEXT: or a4, a3, a0
+; RV32IBB-NEXT: or a0, t0, a7
+; RV32IBB-NEXT: bgez t1, .LBB9_9
+; RV32IBB-NEXT: .LBB9_6:
+; RV32IBB-NEXT: srl a1, a1, a2
+; RV32IBB-NEXT: or a1, a4, a1
+; RV32IBB-NEXT: ret
+; RV32IBB-NEXT: .LBB9_7:
+; RV32IBB-NEXT: sll t0, a0, a4
+; RV32IBB-NEXT: bltz a3, .LBB9_5
+; RV32IBB-NEXT: .LBB9_8:
+; RV32IBB-NEXT: sll a4, a0, a3
+; RV32IBB-NEXT: or a0, t0, a7
+; RV32IBB-NEXT: bltz t1, .LBB9_6
+; RV32IBB-NEXT: .LBB9_9:
+; RV32IBB-NEXT: or a1, a4, zero
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: ror_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: andi a3, a2, 63
+; RV32IBP-NEXT: addi t1, a3, -32
+; RV32IBP-NEXT: addi a6, zero, 31
+; RV32IBP-NEXT: bltz t1, .LBB9_2
+; RV32IBP-NEXT: # %bb.1:
+; RV32IBP-NEXT: srl a7, a1, t1
+; RV32IBP-NEXT: j .LBB9_3
+; RV32IBP-NEXT: .LBB9_2:
+; RV32IBP-NEXT: srl a4, a0, a2
+; RV32IBP-NEXT: sub a3, a6, a3
+; RV32IBP-NEXT: slli a5, a1, 1
+; RV32IBP-NEXT: sll a3, a5, a3
+; RV32IBP-NEXT: or a7, a4, a3
+; RV32IBP-NEXT: .LBB9_3:
+; RV32IBP-NEXT: neg a4, a2
+; RV32IBP-NEXT: andi a5, a4, 63
+; RV32IBP-NEXT: addi a3, a5, -32
+; RV32IBP-NEXT: bltz a3, .LBB9_7
+; RV32IBP-NEXT: # %bb.4:
+; RV32IBP-NEXT: mv t0, zero
+; RV32IBP-NEXT: bgez a3, .LBB9_8
+; RV32IBP-NEXT: .LBB9_5:
+; RV32IBP-NEXT: sll a3, a1, a4
+; RV32IBP-NEXT: sub a4, a6, a5
+; RV32IBP-NEXT: srli a0, a0, 1
+; RV32IBP-NEXT: srl a0, a0, a4
+; RV32IBP-NEXT: or a4, a3, a0
+; RV32IBP-NEXT: or a0, t0, a7
+; RV32IBP-NEXT: bgez t1, .LBB9_9
+; RV32IBP-NEXT: .LBB9_6:
+; RV32IBP-NEXT: srl a1, a1, a2
+; RV32IBP-NEXT: or a1, a4, a1
+; RV32IBP-NEXT: ret
+; RV32IBP-NEXT: .LBB9_7:
+; RV32IBP-NEXT: sll t0, a0, a4
+; RV32IBP-NEXT: bltz a3, .LBB9_5
+; RV32IBP-NEXT: .LBB9_8:
+; RV32IBP-NEXT: sll a4, a0, a3
+; RV32IBP-NEXT: or a0, t0, a7
+; RV32IBP-NEXT: bltz t1, .LBB9_6
+; RV32IBP-NEXT: .LBB9_9:
+; RV32IBP-NEXT: or a1, a4, zero
+; RV32IBP-NEXT: ret
+ %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+ ret i64 %or
+}
+
+define i32 @rori_i32(i32 %a) nounwind {
+; RV32I-LABEL: rori_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: slli a0, a0, 31
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: rori_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: rori a0, a0, 1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: rori_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: rori a0, a0, 1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: rori_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: rori a0, a0, 1
+; RV32IBP-NEXT: ret
+ %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+ ret i32 %1
+}
+
+define i64 @rori_i64(i64 %a) nounwind {
+; RV32I-LABEL: rori_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a2, a1, 31
+; RV32I-NEXT: srli a3, a0, 1
+; RV32I-NEXT: or a2, a3, a2
+; RV32I-NEXT: srli a1, a1, 1
+; RV32I-NEXT: slli a0, a0, 31
+; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: rori_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: addi a3, zero, 31
+; RV32IB-NEXT: fsl a2, a1, a3, a0
+; RV32IB-NEXT: fsl a1, a0, a3, a1
+; RV32IB-NEXT: mv a0, a2
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: rori_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: slli a2, a1, 31
+; RV32IBB-NEXT: srli a3, a0, 1
+; RV32IBB-NEXT: or a2, a3, a2
+; RV32IBB-NEXT: srli a1, a1, 1
+; RV32IBB-NEXT: slli a0, a0, 31
+; RV32IBB-NEXT: or a1, a0, a1
+; RV32IBB-NEXT: mv a0, a2
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: rori_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: slli a2, a1, 31
+; RV32IBP-NEXT: srli a3, a0, 1
+; RV32IBP-NEXT: or a2, a3, a2
+; RV32IBP-NEXT: srli a1, a1, 1
+; RV32IBP-NEXT: slli a0, a0, 31
+; RV32IBP-NEXT: or a1, a0, a1
+; RV32IBP-NEXT: mv a0, a2
+; RV32IBP-NEXT: ret
+ %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+ ret i64 %1
+}
+
+define i32 @pack_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: pack_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a2, a2, -1
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a1, 16
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: pack_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: pack a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: pack_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: pack a0, a0, a1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: pack_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: pack a0, a0, a1
+; RV32IBP-NEXT: ret
+ %shl = and i32 %a, 65535
+ %shl1 = shl i32 %b, 16
+ %or = or i32 %shl1, %shl
+ ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @pack_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: pack_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: mv a1, a2
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: pack_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: mv a1, a2
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: pack_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: mv a1, a2
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: pack_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: mv a1, a2
+; RV32IBP-NEXT: ret
+ %shl = and i64 %a, 4294967295
+ %shl1 = shl i64 %b, 32
+ %or = or i64 %shl1, %shl
+ ret i64 %or
+}
+
+define i32 @packu_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: packu_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a0, a0, 16
+; RV32I-NEXT: lui a2, 1048560
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: packu_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: packu a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: packu_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: packu a0, a0, a1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: packu_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: packu a0, a0, a1
+; RV32IBP-NEXT: ret
+ %shr = lshr i32 %a, 16
+ %shr1 = and i32 %b, -65536
+ %or = or i32 %shr1, %shr
+ ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @packu_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packu_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: packu_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: mv a0, a1
+; RV32IB-NEXT: mv a1, a3
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: packu_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: mv a0, a1
+; RV32IBB-NEXT: mv a1, a3
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: packu_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: mv a0, a1
+; RV32IBP-NEXT: mv a1, a3
+; RV32IBP-NEXT: ret
+ %shr = lshr i64 %a, 32
+ %shr1 = and i64 %b, -4294967296
+ %or = or i64 %shr1, %shr
+ ret i64 %or
+}
+
+define i32 @packh_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: packh_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: slli a1, a1, 8
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a2, a2, -256
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: packh_i32:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: packh a0, a0, a1
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: packh_i32:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: packh a0, a0, a1
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: packh_i32:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: packh a0, a0, a1
+; RV32IBP-NEXT: ret
+ %and = and i32 %a, 255
+ %and1 = shl i32 %b, 8
+ %shl = and i32 %and1, 65280
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+define i64 @packh_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packh_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, 255
+; RV32I-NEXT: slli a1, a2, 8
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a2, a2, -256
+; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: mv a1, zero
+; RV32I-NEXT: ret
+;
+; RV32IB-LABEL: packh_i64:
+; RV32IB: # %bb.0:
+; RV32IB-NEXT: packh a0, a0, a2
+; RV32IB-NEXT: mv a1, zero
+; RV32IB-NEXT: ret
+;
+; RV32IBB-LABEL: packh_i64:
+; RV32IBB: # %bb.0:
+; RV32IBB-NEXT: packh a0, a0, a2
+; RV32IBB-NEXT: mv a1, zero
+; RV32IBB-NEXT: ret
+;
+; RV32IBP-LABEL: packh_i64:
+; RV32IBP: # %bb.0:
+; RV32IBP-NEXT: packh a0, a0, a2
+; RV32IBP-NEXT: mv a1, zero
+; RV32IBP-NEXT: ret
+ %and = and i64 %a, 255
+ %and1 = shl i64 %b, 8
+ %shl = and i64 %and1, 65280
+ %or = or i64 %shl, %and
+ ret i64 %or
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
new file mode 100644
index 0000000..c3a6799
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
@@ -0,0 +1,517 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64IBB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV64IBP
+
+define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: andn_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: andn a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: andn_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: andn a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: andn_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: andn a0, a0, a1
+; RV64IBP-NEXT: ret
+ %neg = xor i32 %b, -1
+ %and = and i32 %neg, %a
+ ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: and a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: andn_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: andn a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: andn_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: andn a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: andn_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: andn a0, a0, a1
+; RV64IBP-NEXT: ret
+ %neg = xor i64 %b, -1
+ %and = and i64 %neg, %a
+ ret i64 %and
+}
+
+define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: orn_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: orn_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: orn a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: orn_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: orn a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: orn_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: orn a0, a0, a1
+; RV64IBP-NEXT: ret
+ %neg = xor i32 %b, -1
+ %or = or i32 %neg, %a
+ ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: orn_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: not a1, a1
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: orn_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: orn a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: orn_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: orn a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: orn_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: orn a0, a0, a1
+; RV64IBP-NEXT: ret
+ %neg = xor i64 %b, -1
+ %or = or i64 %neg, %a
+ ret i64 %or
+}
+
+define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: xnor_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: xnor_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: xnor a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: xnor_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: xnor a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: xnor_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: xnor a0, a0, a1
+; RV64IBP-NEXT: ret
+ %neg = xor i32 %a, -1
+ %xor = xor i32 %neg, %b
+ ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: xnor_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: xnor_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: xnor a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: xnor_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: xnor a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: xnor_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: xnor a0, a0, a1
+; RV64IBP-NEXT: ret
+ %neg = xor i64 %a, -1
+ %xor = xor i64 %neg, %b
+ ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: rol_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sllw a2, a0, a1
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: srlw a0, a0, a1
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: rol_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: rolw a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: rol_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: rolw a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: rol_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: rolw a0, a0, a1
+; RV64IBP-NEXT: ret
+ %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+ ret i32 %1
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: rol_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sll a2, a0, a1
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: srl a0, a0, a1
+; RV64I-NEXT: or a0, a2, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: rol_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: rol a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: rol_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: rol a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: rol_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: rol a0, a0, a1
+; RV64IBP-NEXT: ret
+ %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+ ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: ror_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srlw a2, a0, a1
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: sllw a0, a0, a1
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: ror_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: rorw a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: ror_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: rorw a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: ror_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: rorw a0, a0, a1
+; RV64IBP-NEXT: ret
+ %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+ ret i32 %1
+}
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: ror_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srl a2, a0, a1
+; RV64I-NEXT: neg a1, a1
+; RV64I-NEXT: sll a0, a0, a1
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: ror_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: ror a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: ror_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: ror a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: ror_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: ror a0, a0, a1
+; RV64IBP-NEXT: ret
+ %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+ ret i64 %or
+}
+
+define signext i32 @rori_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: rori_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a1, a0, 1
+; RV64I-NEXT: slli a0, a0, 31
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: rori_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: fsriw a0, a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: rori_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: roriw a0, a0, 1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: rori_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: roriw a0, a0, 1
+; RV64IBP-NEXT: ret
+ %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+ ret i32 %1
+}
+
+define i64 @rori_i64(i64 %a) nounwind {
+; RV64I-LABEL: rori_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: slli a0, a0, 63
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: rori_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: rori a0, a0, 1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: rori_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: rori a0, a0, 1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: rori_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: rori a0, a0, 1
+; RV64IBP-NEXT: ret
+ %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+ ret i64 %1
+}
+
+define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: pack_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a2, a2, -1
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: pack_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: packw a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: pack_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: packw a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: pack_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: packw a0, a0, a1
+; RV64IBP-NEXT: ret
+ %shl = and i32 %a, 65535
+ %shl1 = shl i32 %b, 16
+ %or = or i32 %shl1, %shl
+ ret i32 %or
+}
+
+define i64 @pack_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: pack_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: pack_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: pack a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: pack_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: pack a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: pack_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: pack a0, a0, a1
+; RV64IBP-NEXT: ret
+ %shl = and i64 %a, 4294967295
+ %shl1 = shl i64 %b, 32
+ %or = or i64 %shl1, %shl
+ ret i64 %or
+}
+
+define signext i32 @packu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: packu_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a0, a0, 16
+; RV64I-NEXT: lui a2, 1048560
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: packu_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: packuw a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: packu_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: packuw a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: packu_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: packuw a0, a0, a1
+; RV64IBP-NEXT: ret
+ %shr = lshr i32 %a, 16
+ %shr1 = and i32 %b, -65536
+ %or = or i32 %shr1, %shr
+ ret i32 %or
+}
+
+define i64 @packu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: packu_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: addi a2, zero, -1
+; RV64I-NEXT: slli a2, a2, 32
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: packu_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: packu a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: packu_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: packu a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: packu_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: packu a0, a0, a1
+; RV64IBP-NEXT: ret
+ %shr = lshr i64 %a, 32
+ %shr1 = and i64 %b, -4294967296
+ %or = or i64 %shr1, %shr
+ ret i64 %or
+}
+
+define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: packh_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a0, a0, 255
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a2, a2, -256
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: packh_i32:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: packh a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: packh_i32:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: packh a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: packh_i32:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: packh a0, a0, a1
+; RV64IBP-NEXT: ret
+ %and = and i32 %a, 255
+ %and1 = shl i32 %b, 8
+ %shl = and i32 %and1, 65280
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+define i64 @packh_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: packh_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a0, a0, 255
+; RV64I-NEXT: slli a1, a1, 8
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a2, a2, -256
+; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: ret
+;
+; RV64IB-LABEL: packh_i64:
+; RV64IB: # %bb.0:
+; RV64IB-NEXT: packh a0, a0, a1
+; RV64IB-NEXT: ret
+;
+; RV64IBB-LABEL: packh_i64:
+; RV64IBB: # %bb.0:
+; RV64IBB-NEXT: packh a0, a0, a1
+; RV64IBB-NEXT: ret
+;
+; RV64IBP-LABEL: packh_i64:
+; RV64IBP: # %bb.0:
+; RV64IBP-NEXT: packh a0, a0, a1
+; RV64IBP-NEXT: ret
+ %and = and i64 %a, 255
+ %and1 = shl i64 %b, 8
+ %shl = and i64 %and1, 65280
+ %or = or i64 %shl, %and
+ ret i64 %or
+}