diff options
-rw-r--r-- | llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 42 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrThumb2.td | 25 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMScheduleM7.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMScheduleM85.td | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll | 48 |
8 files changed, 84 insertions, 68 deletions
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 8e79a0a..3fda15a 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -2590,14 +2590,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::MOVsrl_glue: - case ARM::MOVsra_glue: { + case ARM::LSRs1: + case ARM::ASRs1: { // These are just fancy MOVs instructions. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .add(MI.getOperand(1)) .addImm(ARM_AM::getSORegOpc( - (Opcode == ARM::MOVsrl_glue ? ARM_AM::lsr : ARM_AM::asr), 1)) + (Opcode == ARM::LSRs1 ? ARM_AM::lsr : ARM_AM::asr), 1)) .add(predOps(ARMCC::AL)) .addReg(ARM::CPSR, RegState::Define); MI.eraseFromParent(); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 554f733..84b37ae 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -149,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2)); +/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV). +constexpr MVT FlagsVT = MVT::i32; + // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 @@ -1730,14 +1733,14 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(ARMISD::ASRL) MAKE_CASE(ARMISD::LSRL) MAKE_CASE(ARMISD::LSLL) - MAKE_CASE(ARMISD::SRL_GLUE) - MAKE_CASE(ARMISD::SRA_GLUE) + MAKE_CASE(ARMISD::LSLS) + MAKE_CASE(ARMISD::LSRS1) + MAKE_CASE(ARMISD::ASRS1) MAKE_CASE(ARMISD::RRX) MAKE_CASE(ARMISD::ADDC) MAKE_CASE(ARMISD::ADDE) MAKE_CASE(ARMISD::SUBC) MAKE_CASE(ARMISD::SUBE) - MAKE_CASE(ARMISD::LSLS) MAKE_CASE(ARMISD::VMOVRRD) MAKE_CASE(ARMISD::VMOVDRR) MAKE_CASE(ARMISD::VMOVhr) @@ -6846,10 +6849,10 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32); - // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and - // captures the result into a carry flag. - unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); + // First, build a LSRS1/ASRS1 op, which shifts the top part by one and + // captures the shifted out bit into a carry flag. + unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1; + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 0e086f3..344a0ad 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -101,15 +101,15 @@ class VectorType; BCC_i64, - SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out. - SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out. - RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. + LSLS, // Flag-setting shift left. + LSRS1, // Flag-setting logical shift right by one bit. + ASRS1, // Flag-setting arithmetic shift right by one bit. + RRX, // Shift right one bit with carry in. ADDC, // Add with carry ADDE, // Add using carry SUBC, // Sub with carry SUBE, // Sub using carry - LSLS, // Shift left producing carry VMOVRRD, // double to two gprs. VMOVDRR, // Two gprs to double. diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 72146f2..db38b43 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -14,6 +14,9 @@ // ARM specific DAG Nodes. // +/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV). +defvar FlagsVT = i32; + // Type profiles. def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -77,6 +80,18 @@ def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def SDTIntUnaryOpWithFlagsOut : SDTypeProfile<2, 1, [ + SDTCisInt<0>, // result + SDTCisVT<1, FlagsVT>, // out flags + SDTCisSameAs<2, 0> // operand +]>; + +def SDTIntUnaryOpWithFlagsIn : SDTypeProfile<1, 2, [ + SDTCisInt<0>, // result + SDTCisSameAs<1, 0>, // operand + SDTCisVT<1, FlagsVT> // in flags +]>; + def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, @@ -191,9 +206,9 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>; def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>; def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>; -def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>; -def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>; -def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; +def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>; +def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>; +def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>; def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, [SDNPCommutative]>; @@ -3730,20 +3745,17 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, Requires<[IsARM, HasV6T2]>; let Uses = [CPSR] in -def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, - [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP, - Requires<[IsARM]>, Sched<[WriteALU]>; - -// These aren't really mov instructions, but we have to define them this way -// due to glue operands. +def RRX : PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, + [(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>, + UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>; let Defs = [CPSR] in { -def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - [(set GPR:$dst, (ARMsrl_glue GPR:$src))]>, UnaryDP, - Sched<[WriteALU]>, Requires<[IsARM]>; -def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - [(set GPR:$dst, (ARMsra_glue GPR:$src))]>, UnaryDP, - Sched<[WriteALU]>, Requires<[IsARM]>; + def LSRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + [(set GPR:$dst, CPSR, (ARMlsrs1 GPR:$src))]>, + UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>; + def ASRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + [(set GPR:$dst, CPSR, (ARMasrs1 GPR:$src))]>, + UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 4e9160b..aa5c0a5 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2787,8 +2787,9 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, - "rrx", "\t$Rd, $Rm", - [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> { + "rrx", "\t$Rd, $Rm", + [(set rGPR:$Rd, (ARMrrx rGPR:$Rm, CPSR))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2800,12 +2801,13 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, } } +// These differ from t2LSRri / t2ASRri in that they are flag-setting +// and have a hardcoded shift amount = 1. let isCodeGenOnly = 1, Defs = [CPSR] in { -def t2MOVsrl_glue : T2TwoRegShiftImm< - (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, - "lsrs", ".w\t$Rd, $Rm, #1", - [(set rGPR:$Rd, (ARMsrl_glue rGPR:$Rm))]>, - Sched<[WriteALU]> { +def t2LSRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, + "lsrs", ".w\t$Rd, $Rm, #1", + [(set rGPR:$Rd, CPSR, (ARMlsrs1 rGPR:$Rm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2816,11 +2818,10 @@ def t2MOVsrl_glue : T2TwoRegShiftImm< let Inst{14-12} = 0b000; let Inst{7-6} = 0b01; } -def t2MOVsra_glue : T2TwoRegShiftImm< - (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, - "asrs", ".w\t$Rd, $Rm, #1", - [(set rGPR:$Rd, (ARMsra_glue rGPR:$Rm))]>, - Sched<[WriteALU]> { +def t2ASRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, + "asrs", ".w\t$Rd, $Rm, #1", + [(set rGPR:$Rd, CPSR, (ARMasrs1 rGPR:$Rm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; diff --git a/llvm/lib/Target/ARM/ARMScheduleM7.td b/llvm/lib/Target/ARM/ARMScheduleM7.td index 25bc840..99d2e4a 100644 --- a/llvm/lib/Target/ARM/ARMScheduleM7.td +++ b/llvm/lib/Target/ARM/ARMScheduleM7.td @@ -325,7 +325,7 @@ def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>; def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS], (instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$", "t2(SUB|CMP|CMNz|TEQ|TST)rs$", - "t2MOVsr(a|l)")>; + "t2(A|L)SRs1$")>; def : InstRW<[WriteALUsi, M7Read_ISS], (instregex "t2MVNs")>; @@ -335,7 +335,7 @@ def : InstRW<[WriteALUsi, M7Read_ISS], // but the results prove to be better than trying to get them exact. def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>; -def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>; +def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)r", "tROR")>; // Instructions that use the shifter, but have normal timing. diff --git a/llvm/lib/Target/ARM/ARMScheduleM85.td b/llvm/lib/Target/ARM/ARMScheduleM85.td index cd375a1..e9938d8 100644 --- a/llvm/lib/Target/ARM/ARMScheduleM85.td +++ b/llvm/lib/Target/ARM/ARMScheduleM85.td @@ -436,7 +436,7 @@ def : InstRW<[M85WriteALUsi, M85ReadALUsi], def : InstRW<[M85WriteShift2], (instregex "t2RRX$")>; def : InstRW<[WriteALU], - (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>; + (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)")>; // Instructions that use the shifter, but have normal timing diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll index 8900d5f..b85cb3a 100644 --- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll @@ -628,13 +628,13 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; ARM5-NEXT: mla r0, r1, r12, r4 ; ARM5-NEXT: bic r0, r0, #-2147483648 ; ARM5-NEXT: lsrs r0, r0, #1 -; ARM5-NEXT: rrx r1, r3 +; ARM5-NEXT: rrx r2, r3 ; ARM5-NEXT: orr r0, r0, r3, lsl #30 ; ARM5-NEXT: ldr r3, .LCPI5_2 -; ARM5-NEXT: bic r2, r0, #-2147483648 +; ARM5-NEXT: bic r1, r0, #-2147483648 ; ARM5-NEXT: mov r0, #0 -; ARM5-NEXT: subs r1, r1, r3 -; ARM5-NEXT: sbcs r1, r2, #1 +; ARM5-NEXT: subs r2, r2, r3 +; ARM5-NEXT: sbcs r1, r1, #1 ; ARM5-NEXT: movlo r0, #1 ; ARM5-NEXT: pop {r4, pc} ; ARM5-NEXT: .p2align 2 @@ -656,13 +656,13 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; ARM6-NEXT: mla r0, r1, r12, r0 ; ARM6-NEXT: bic r0, r0, #-2147483648 ; ARM6-NEXT: lsrs r0, r0, #1 -; ARM6-NEXT: rrx r1, r3 +; ARM6-NEXT: rrx r2, r3 ; ARM6-NEXT: orr r0, r0, r3, lsl #30 ; ARM6-NEXT: ldr r3, .LCPI5_2 -; ARM6-NEXT: bic r2, r0, #-2147483648 +; ARM6-NEXT: bic r1, r0, #-2147483648 ; ARM6-NEXT: mov r0, #0 -; ARM6-NEXT: subs r1, r1, r3 -; ARM6-NEXT: sbcs r1, r2, #1 +; ARM6-NEXT: subs r2, r2, r3 +; ARM6-NEXT: sbcs r1, r1, #1 ; ARM6-NEXT: movlo r0, #1 ; ARM6-NEXT: pop {r11, pc} ; ARM6-NEXT: .p2align 2 @@ -686,14 +686,14 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; ARM7-NEXT: mla r0, r1, r12, r0 ; ARM7-NEXT: bic r0, r0, #-2147483648 ; ARM7-NEXT: lsrs r0, r0, #1 -; ARM7-NEXT: rrx r1, r3 +; ARM7-NEXT: rrx r2, r3 ; ARM7-NEXT: orr r0, r0, r3, lsl #30 ; ARM7-NEXT: movw r3, #24026 -; ARM7-NEXT: bic r2, r0, #-2147483648 +; ARM7-NEXT: bic r1, r0, #-2147483648 ; ARM7-NEXT: movt r3, #48461 -; ARM7-NEXT: subs r1, r1, r3 +; ARM7-NEXT: subs r2, r2, r3 ; ARM7-NEXT: mov r0, #0 -; ARM7-NEXT: sbcs r1, r2, #1 +; ARM7-NEXT: sbcs r1, r1, #1 ; ARM7-NEXT: movwlo r0, #1 ; ARM7-NEXT: pop {r11, pc} ; @@ -709,14 +709,14 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; ARM8-NEXT: mla r0, r1, r12, r0 ; ARM8-NEXT: bic r0, r0, #-2147483648 ; ARM8-NEXT: lsrs r0, r0, #1 -; ARM8-NEXT: rrx r1, r3 +; ARM8-NEXT: rrx r2, r3 ; ARM8-NEXT: orr r0, r0, r3, lsl #30 ; ARM8-NEXT: movw r3, #24026 -; ARM8-NEXT: bic r2, r0, #-2147483648 +; ARM8-NEXT: bic r1, r0, #-2147483648 ; ARM8-NEXT: movt r3, #48461 -; ARM8-NEXT: subs r1, r1, r3 +; ARM8-NEXT: subs r2, r2, r3 ; ARM8-NEXT: mov r0, #0 -; ARM8-NEXT: sbcs r1, r2, #1 +; ARM8-NEXT: sbcs r1, r1, #1 ; ARM8-NEXT: movwlo r0, #1 ; ARM8-NEXT: pop {r11, pc} ; @@ -732,14 +732,14 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; NEON7-NEXT: mla r0, r1, r12, r0 ; NEON7-NEXT: bic r0, r0, #-2147483648 ; NEON7-NEXT: lsrs r0, r0, #1 -; NEON7-NEXT: rrx r1, r3 +; NEON7-NEXT: rrx r2, r3 ; NEON7-NEXT: orr r0, r0, r3, lsl #30 ; NEON7-NEXT: movw r3, #24026 -; NEON7-NEXT: bic r2, r0, #-2147483648 +; NEON7-NEXT: bic r1, r0, #-2147483648 ; NEON7-NEXT: movt r3, #48461 -; NEON7-NEXT: subs r1, r1, r3 +; NEON7-NEXT: subs r2, r2, r3 ; NEON7-NEXT: mov r0, #0 -; NEON7-NEXT: sbcs r1, r2, #1 +; NEON7-NEXT: sbcs r1, r1, #1 ; NEON7-NEXT: movwlo r0, #1 ; NEON7-NEXT: pop {r11, pc} ; @@ -755,14 +755,14 @@ define i1 @test_urem_larger(i63 %X) nounwind { ; NEON8-NEXT: mla r0, r1, r12, r0 ; NEON8-NEXT: bic r0, r0, #-2147483648 ; NEON8-NEXT: lsrs r0, r0, #1 -; NEON8-NEXT: rrx r1, r3 +; NEON8-NEXT: rrx r2, r3 ; NEON8-NEXT: orr r0, r0, r3, lsl #30 ; NEON8-NEXT: movw r3, #24026 -; NEON8-NEXT: bic r2, r0, #-2147483648 +; NEON8-NEXT: bic r1, r0, #-2147483648 ; NEON8-NEXT: movt r3, #48461 -; NEON8-NEXT: subs r1, r1, r3 +; NEON8-NEXT: subs r2, r2, r3 ; NEON8-NEXT: mov r0, #0 -; NEON8-NEXT: sbcs r1, r2, #1 +; NEON8-NEXT: sbcs r1, r1, #1 ; NEON8-NEXT: movwlo r0, #1 ; NEON8-NEXT: pop {r11, pc} %urem = urem i63 %X, 1234567890 |