diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll | 29 |
2 files changed, 32 insertions, 17 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 37d8ada..212ea73 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -297,6 +297,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MUL, MVT::i128, Custom); if (!RV64LegalI32) setOperationAction(ISD::MUL, MVT::i32, Custom); + else + setOperationAction(ISD::SMULO, MVT::i32, Custom); } else { setOperationAction(ISD::MUL, MVT::i64, Custom); } @@ -5352,6 +5354,22 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, return Op; } +// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw. +static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) { + assert(Op.getValueType() == MVT::i32 && RV64LegalI32 && + "Unexpected custom legalisation"); + SDLoc DL(Op); + SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0)); + SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1)); + SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); + SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul, + DAG.getValueType(MVT::i32)); + SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt, + ISD::SETNE); + return DAG.getMergeValues({Res, Ovf}, DL); +} + SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -5855,6 +5873,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); + case ISD::SMULO: + return lowerSMULO(Op, DAG); case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll index 62ea2521..aa3d9bf 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll @@ -310,11 +310,10 @@ entry: define zeroext i1 @smulo.i32(i32 signext %v1, i32 signext %v2, ptr %res) { ; RV64-LABEL: smulo.i32: ; RV64: # %bb.0: # %entry +; RV64-NEXT: mulw a3, a0, a1 ; RV64-NEXT: mul a1, a0, a1 -; RV64-NEXT: srai a0, a1, 32 -; RV64-NEXT: sraiw a3, a1, 31 -; RV64-NEXT: xor a0, a0, a3 -; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xor a3, a1, a3 +; RV64-NEXT: snez a0, a3 ; RV64-NEXT: sw a1, 0(a2) ; RV64-NEXT: ret entry: @@ -329,11 +328,10 @@ define zeroext i1 @smulo2.i32(i32 signext %v1, ptr %res) { ; RV64-LABEL: smulo2.i32: ; RV64: # %bb.0: # %entry ; RV64-NEXT: li a2, 13 +; RV64-NEXT: mulw a3, a0, a2 ; RV64-NEXT: mul a2, a0, a2 -; RV64-NEXT: srai a0, a2, 32 -; RV64-NEXT: sraiw a3, a2, 31 -; RV64-NEXT: xor a0, a0, a3 -; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xor a3, a2, a3 +; RV64-NEXT: snez a0, a3 ; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: ret entry: @@ -736,9 +734,8 @@ entry: define i32 @smulo.select.i32(i32 signext %v1, i32 signext %v2) { ; RV64-LABEL: smulo.select.i32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: mul a2, a0, a1 -; RV64-NEXT: srai a3, a2, 32 -; RV64-NEXT: sraiw a2, a2, 31 +; RV64-NEXT: mulw a2, a0, a1 +; RV64-NEXT: mul a3, a0, a1 ; RV64-NEXT: bne a3, a2, .LBB44_2 ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: mv a0, a1 @@ -754,10 +751,9 @@ entry: define i1 @smulo.not.i32(i32 signext %v1, i32 signext %v2) { ; RV64-LABEL: smulo.not.i32: ; RV64: # %bb.0: # %entry +; RV64-NEXT: mulw a2, a0, a1 ; RV64-NEXT: mul a0, a0, a1 -; RV64-NEXT: srai a1, a0, 32 -; RV64-NEXT: sraiw a0, a0, 31 -; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: xor a0, a0, a2 ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret @@ -1077,10 +1073,9 @@ continue: define zeroext i1 @smulo.br.i32(i32 signext %v1, i32 signext %v2) { ; RV64-LABEL: smulo.br.i32: ; RV64: # %bb.0: # %entry +; RV64-NEXT: mulw a2, a0, a1 ; RV64-NEXT: mul a0, a0, a1 -; RV64-NEXT: srai a1, a0, 32 -; RV64-NEXT: sraiw a0, a0, 31 -; RV64-NEXT: beq a1, a0, .LBB60_2 +; RV64-NEXT: beq a0, a2, .LBB60_2 ; RV64-NEXT: # %bb.1: # %overflow ; RV64-NEXT: li a0, 0 ; RV64-NEXT: ret |
