diff options
Diffstat (limited to 'llvm/lib/Target')
21 files changed, 383 insertions, 292 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 076a623..639ddcb 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -69,7 +69,6 @@ def push_mul_through_sext : push_opcode_through_ext<G_MUL, G_SEXT>; def AArch64PreLegalizerCombiner: GICombiner< "AArch64PreLegalizerCombinerImpl", [all_combines, - fconstant_to_constant, icmp_redundant_trunc, fold_global_offset, shuffle_to_extract, @@ -341,7 +340,7 @@ def AArch64PostLegalizerLowering : GICombiner<"AArch64PostLegalizerLoweringImpl", [shuffle_vector_lowering, vashr_vlshr_imm, icmp_lowering, build_vector_lowering, - lower_vector_fcmp, form_truncstore, + lower_vector_fcmp, form_truncstore, fconstant_to_constant, vector_sext_inreg_to_shift, unmerge_ext_to_unmerge, lower_mulv2s64, vector_unmerge_lowering, insertelt_nonconst, diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 0f4bbfc3..1e607f4 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -92,9 +92,18 @@ private: bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); - MachineBasicBlock * - expandCommitOrRestoreZASave(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + struct ConditionalBlocks { + MachineBasicBlock &CondBB; + MachineBasicBlock &EndBB; + }; + ConditionalBlocks expandConditionalPseudo(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, + MachineInstrBuilder &Branch); + MachineBasicBlock *expandRestoreZASave(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + MachineBasicBlock *expandCommitZASave(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; @@ -991,72 +1000,97 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( return true; } -static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111; - -MachineBasicBlock *AArch64ExpandPseudo::expandCommitOrRestoreZASave( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - MachineInstr &MI = *MBBI; - bool IsRestoreZA = MI.getOpcode() == AArch64::RestoreZAPseudo; - assert((MI.getOpcode() == AArch64::RestoreZAPseudo || - MI.getOpcode() == AArch64::CommitZASavePseudo) && - "Expected ZA commit or restore"); +AArch64ExpandPseudo::ConditionalBlocks +AArch64ExpandPseudo::expandConditionalPseudo(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, + MachineInstrBuilder &Branch) { assert((std::next(MBBI) != MBB.end() || - MI.getParent()->successors().begin() != - MI.getParent()->successors().end()) && - "Unexpected unreachable in block that restores ZA"); - - // Compare TPIDR2_EL0 value against 0. - DebugLoc DL = MI.getDebugLoc(); - MachineInstrBuilder Branch = - BuildMI(MBB, MBBI, DL, - TII->get(IsRestoreZA ? AArch64::CBZX : AArch64::CBNZX)) - .add(MI.getOperand(0)); + MBB.successors().begin() != MBB.successors().end()) && + "Unexpected unreachable in block"); // Split MBB and create two new blocks: - // - MBB now contains all instructions before RestoreZAPseudo. - // - SMBB contains the [Commit|RestoreZA]Pseudo instruction only. - // - EndBB contains all instructions after [Commit|RestoreZA]Pseudo. + // - MBB now contains all instructions before the conditional pseudo. + // - CondBB contains the conditional pseudo instruction only. + // - EndBB contains all instructions after the conditional pseudo. MachineInstr &PrevMI = *std::prev(MBBI); - MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); - MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() - ? *SMBB->successors().begin() - : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); - - // Add the SMBB label to the CB[N]Z instruction & create a branch to EndBB. - Branch.addMBB(SMBB); + MachineBasicBlock *CondBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); + MachineBasicBlock *EndBB = + std::next(MBBI) == CondBB->end() + ? *CondBB->successors().begin() + : CondBB->splitAt(*MBBI, /*UpdateLiveIns*/ true); + + // Add the SMBB label to the branch instruction & create a branch to EndBB. + Branch.addMBB(CondBB); BuildMI(&MBB, DL, TII->get(AArch64::B)) .addMBB(EndBB); MBB.addSuccessor(EndBB); + // Create branch from CondBB to EndBB. Users of this helper should insert new + // instructions at CondBB.back() -- i.e. before the branch. + BuildMI(CondBB, DL, TII->get(AArch64::B)).addMBB(EndBB); + return {*CondBB, *EndBB}; +} + +MachineBasicBlock * +AArch64ExpandPseudo::expandRestoreZASave(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + // Compare TPIDR2_EL0 against 0. Restore ZA if TPIDR2_EL0 is zero. + MachineInstrBuilder Branch = + BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX)).add(MI.getOperand(0)); + + auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch); // Replace the pseudo with a call (BL). MachineInstrBuilder MIB = - BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL)); + BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::BL)); // Copy operands (mainly the regmask) from the pseudo. for (unsigned I = 2; I < MI.getNumOperands(); ++I) MIB.add(MI.getOperand(I)); + // Mark the TPIDR2 block pointer (X0) as an implicit use. + MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit); - if (IsRestoreZA) { - // Mark the TPIDR2 block pointer (X0) as an implicit use. - MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit); - } else /*CommitZA*/ { + MI.eraseFromParent(); + return &EndBB; +} + +static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111; + +MachineBasicBlock * +AArch64ExpandPseudo::expandCommitZASave(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + // Compare TPIDR2_EL0 against 0. Commit ZA if TPIDR2_EL0 is non-zero. + MachineInstrBuilder Branch = + BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBNZX)).add(MI.getOperand(0)); + + auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch); + // Replace the pseudo with a call (BL). + MachineInstrBuilder MIB = + BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::BL)); + // Copy operands (mainly the regmask) from the pseudo. + for (unsigned I = 2; I < MI.getNumOperands(); ++I) + MIB.add(MI.getOperand(I)); + // Clear TPIDR2_EL0. + BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::MSR)) + .addImm(AArch64SysReg::TPIDR2_EL0) + .addReg(AArch64::XZR); + bool ZeroZA = MI.getOperand(1).getImm() != 0; + if (ZeroZA) { [[maybe_unused]] auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); - // Clear TPIDR2_EL0. - BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::MSR)) - .addImm(AArch64SysReg::TPIDR2_EL0) - .addReg(AArch64::XZR); - bool ZeroZA = MI.getOperand(1).getImm() != 0; - if (ZeroZA) { - assert(MI.definesRegister(AArch64::ZAB0, TRI) && "should define ZA!"); - BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::ZERO_M)) - .addImm(ZERO_ALL_ZA_MASK) - .addDef(AArch64::ZAB0, RegState::ImplicitDefine); - } + assert(MI.definesRegister(AArch64::ZAB0, TRI) && "should define ZA!"); + BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::ZERO_M)) + .addImm(ZERO_ALL_ZA_MASK) + .addDef(AArch64::ZAB0, RegState::ImplicitDefine); } - BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); MI.eraseFromParent(); - return EndBB; + return &EndBB; } MachineBasicBlock * @@ -1130,24 +1164,9 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, MachineInstrBuilder Tbx = BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0); - // Split MBB and create two new blocks: - // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. - // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only. - // - EndBB contains all instructions after MSRcond_pstatesvcrImm1. - MachineInstr &PrevMI = *std::prev(MBBI); - MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); - MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() - ? *SMBB->successors().begin() - : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); - - // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. - Tbx.addMBB(SMBB); - BuildMI(&MBB, DL, TII->get(AArch64::B)) - .addMBB(EndBB); - MBB.addSuccessor(EndBB); - + auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Tbx); // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB. - MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(), + MachineInstrBuilder MIB = BuildMI(CondBB, CondBB.back(), MI.getDebugLoc(), TII->get(AArch64::MSRpstatesvcrImm1)); // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as // these contain the CopyFromReg for the first argument and the flag to @@ -1157,10 +1176,8 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, for (unsigned i = 4; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i)); - BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); - MI.eraseFromParent(); - return EndBB; + return &EndBB; } bool AArch64ExpandPseudo::expandMultiVecPseudo( @@ -1674,15 +1691,21 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandCALL_BTI(MBB, MBBI); case AArch64::StoreSwiftAsyncContext: return expandStoreSwiftAsyncContext(MBB, MBBI); + case AArch64::RestoreZAPseudo: case AArch64::CommitZASavePseudo: - case AArch64::RestoreZAPseudo: { - auto *NewMBB = expandCommitOrRestoreZASave(MBB, MBBI); - if (NewMBB != &MBB) - NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. - return true; - } case AArch64::MSRpstatePseudo: { - auto *NewMBB = expandCondSMToggle(MBB, MBBI); + auto *NewMBB = [&] { + switch (Opcode) { + case AArch64::RestoreZAPseudo: + return expandRestoreZASave(MBB, MBBI); + case AArch64::CommitZASavePseudo: + return expandCommitZASave(MBB, MBBI); + case AArch64::MSRpstatePseudo: + return expandCondSMToggle(MBB, MBBI); + default: + llvm_unreachable("Unexpected conditional pseudo!"); + } + }(); if (NewMBB != &MBB) NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. return true; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index c197550e..9e2d698 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -678,8 +678,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarToNextPow2(0) .clampScalar(0, s8, s64); getActionDefinitionsBuilder(G_FCONSTANT) - .legalFor({s32, s64, s128}) - .legalFor(HasFP16, {s16}) + // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT + .legalFor({s16, s32, s64, s128}) .clampScalar(0, MinFPScalar, s128); // FIXME: fix moreElementsToNextPow2 diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 63313da..23dcaea 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -75,6 +75,31 @@ struct ShuffleVectorPseudo { ShuffleVectorPseudo() = default; }; +/// Return true if a G_FCONSTANT instruction is known to be better-represented +/// as a G_CONSTANT. +bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) { + assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); + Register DstReg = MI.getOperand(0).getReg(); + const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + if (DstSize != 16 && DstSize != 32 && DstSize != 64) + return false; + + // When we're storing a value, it doesn't matter what register bank it's on. + // Since not all floating point constants can be materialized using a fmov, + // it makes more sense to just use a GPR. + return all_of(MRI.use_nodbg_instructions(DstReg), + [](const MachineInstr &Use) { return Use.mayStore(); }); +} + +/// Change a G_FCONSTANT into a G_CONSTANT. +void applyFConstantToConstant(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); + MachineIRBuilder MIB(MI); + const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF(); + MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt()); + MI.eraseFromParent(); +} + /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector /// sources of the shuffle are different. std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index 8c10673..896eab5 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -44,31 +44,6 @@ namespace { #include "AArch64GenPreLegalizeGICombiner.inc" #undef GET_GICOMBINER_TYPES -/// Return true if a G_FCONSTANT instruction is known to be better-represented -/// as a G_CONSTANT. -bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) { - assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); - Register DstReg = MI.getOperand(0).getReg(); - const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); - if (DstSize != 32 && DstSize != 64) - return false; - - // When we're storing a value, it doesn't matter what register bank it's on. - // Since not all floating point constants can be materialized using a fmov, - // it makes more sense to just use a GPR. - return all_of(MRI.use_nodbg_instructions(DstReg), - [](const MachineInstr &Use) { return Use.mayStore(); }); -} - -/// Change a G_FCONSTANT into a G_CONSTANT. -void applyFConstantToConstant(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); - MachineIRBuilder MIB(MI); - const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF(); - MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt()); - MI.eraseFromParent(); -} - /// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits /// are sign bits. In this case, we can transform the G_ICMP to directly compare /// the wide value with a zero. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 723d07e..c7a91f4c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -929,7 +929,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { ThinOrFullLTOPhase Phase) { if (Level != OptimizationLevel::O0) { if (!isLTOPreLink(Phase)) { - if (getTargetTriple().isAMDGCN()) { + if (EnableAMDGPUAttributor && getTargetTriple().isAMDGCN()) { AMDGPUAttributorOptions Opts; MPM.addPass(AMDGPUAttributorPass(*this, Opts, Phase)); } @@ -966,7 +966,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PM.addPass(InternalizePass(mustPreserveGV)); PM.addPass(GlobalDCEPass()); } - if (EnableAMDGPUAttributor) { + if (EnableAMDGPUAttributor && getTargetTriple().isAMDGCN()) { AMDGPUAttributorOptions Opt; if (HasClosedWorldAssumption) Opt.IsClosedWorld = true; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index a8140c3..99ba043 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2105,6 +2105,10 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { // Only plain immediates are inlinable (e.g. "clamp" attribute is not) return false; } + + if (getModifiers().Lit != LitModifier::None) + return false; + // TODO: We should avoid using host float here. It would be better to // check the float bit values which is what a few other places do. // We've had bot failures before due to weird NaN support on mips hosts. @@ -2339,6 +2343,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo bool CanUse64BitLiterals = AsmParser->has64BitLiterals() && !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)); + LitModifier Lit = getModifiers().Lit; MCContext &Ctx = AsmParser->getContext(); if (Imm.IsFPImm) { // We got fp literal token @@ -2348,7 +2353,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); return; @@ -2372,14 +2378,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 || OpTy == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64) && - CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); + OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64)) { + if (CanUse64BitLiterals && Lit == LitModifier::None && + (isInt<32>(Val) || isUInt<32>(Val))) { + // The floating-point operand will be verbalized as an + // integer one. If that integer happens to fit 32 bits, on + // re-assembling it will be intepreted as the high half of + // the actual value, so we have to wrap it into lit64(). + Lit = LitModifier::Lit64; + } else if (Lit == LitModifier::Lit) { + // For FP64 operands lit() specifies the high half of the value. + Val = Hi_32(Val); + } } - return; + break; } // We don't allow fp literals in 64-bit integer instructions. It is @@ -2388,19 +2400,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo llvm_unreachable("fp literal in 64-bit integer instruction."); case AMDGPU::OPERAND_KIMM64: - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + if (CanUse64BitLiterals && Lit == LitModifier::None && + (isInt<32>(Val) || isUInt<32>(Val))) + Lit = LitModifier::Lit64; + break; case AMDGPU::OPERAND_REG_IMM_BF16: case AMDGPU::OPERAND_REG_INLINE_C_BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2BF16: - if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) { + if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() && + Literal == 0x3fc45f306725feed) { // This is the 1/(2*pi) which is going to be truncated to bf16 with the // loss of precision. The constant represents ideomatic fp32 value of // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 @@ -2438,14 +2448,19 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // We allow precision lost but not overflow or underflow. This should be // checked earlier in isLiteralImm() - uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); - Inst.addOperand(MCOperand::createImm(ImmVal)); - return; + Val = FPLiteral.bitcastToAPInt().getZExtValue(); + break; } default: llvm_unreachable("invalid operand size"); } + if (Lit != LitModifier::None) { + Inst.addOperand( + MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx))); + } else { + Inst.addOperand(MCOperand::createImm(Val)); + } return; } @@ -2465,12 +2480,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: - Inst.addOperand(MCOperand::createImm(Val)); - return; + break; case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: - if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); return; } @@ -2479,22 +2494,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // truncated to uint32_t), if the target doesn't support 64-bit literals, or // the lit modifier is explicitly used, we need to truncate it to the 32 // LSBs. - if (!AsmParser->has64BitLiterals() || - getModifiers().Lit == LitModifier::Lit) + if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit) Val = Lo_32(Val); - - if (CanUse64BitLiterals && (!isInt<32>(Val) || !isUInt<32>(Val))) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + break; case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); return; } @@ -2509,19 +2517,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // 1) explicitly forced by using lit modifier; // 2) the value is a valid 32-bit representation (signed or unsigned), // meanwhile not forced by lit64 modifier. - if (getModifiers().Lit == LitModifier::Lit || - (getModifiers().Lit != LitModifier::Lit64 && - (isInt<32>(Val) || isUInt<32>(Val)))) + if (Lit == LitModifier::Lit || + (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val)))) Val = static_cast<uint64_t>(Val) << 32; } - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + // For FP64 operands lit() specifies the high half of the value. + if (Lit == LitModifier::Lit) + Val = Hi_32(Val); + break; case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: @@ -2534,25 +2538,23 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_KIMM32: case AMDGPU::OPERAND_KIMM16: - Inst.addOperand(MCOperand::createImm(Val)); - return; + break; case AMDGPU::OPERAND_KIMM64: - if ((isInt<32>(Val) || isUInt<32>(Val)) && - getModifiers().Lit != LitModifier::Lit64) + if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64) Val <<= 32; - - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + break; default: llvm_unreachable("invalid operand type"); } + + if (Lit != LitModifier::None) { + Inst.addOperand( + MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx))); + } else { + Inst.addOperand(MCOperand::createImm(Val)); + } } void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { @@ -4821,12 +4823,15 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst, const MCOperand &MO = Inst.getOperand(OpIdx); // Exclude special imm operands (like that used by s_set_gpr_idx_on) if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { + bool IsLit = false; std::optional<int64_t> Imm; if (MO.isImm()) { Imm = MO.getImm(); } else if (MO.isExpr()) { - if (isLitExpr(MO.getExpr())) + if (isLitExpr(MO.getExpr())) { + IsLit = true; Imm = getLitValue(MO.getExpr()); + } } else { continue; } @@ -4836,7 +4841,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst, } else if (!isInlineConstant(Inst, OpIdx)) { auto OpType = static_cast<AMDGPU::OperandType>( Desc.operands()[OpIdx].OperandType); - int64_t Value = encode32BitLiteral(*Imm, OpType); + int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit); if (NumLiterals == 0 || LiteralValue != Value) { LiteralValue = Value; ++NumLiterals; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index f11b373..be62395 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1551,7 +1551,7 @@ AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const { HasLiteral = true; Literal = Literal64 = Val; - bool UseLit64 = Lo_32(Literal64) != 0; + bool UseLit64 = Hi_32(Literal64) == 0; return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( LitModifier::Lit64, Literal64, getContext())) : MCOperand::createImm(Literal64); @@ -1584,11 +1584,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, if (CanUse64BitLiterals) { if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) - UseLit64 = !isInt<32>(Val) || !isUInt<32>(Val); + UseLit64 = false; else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64) - UseLit64 = Lo_32(Val) != 0; + UseLit64 = Hi_32(Literal64) == 0; } return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( @@ -1614,12 +1614,12 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const { const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()]; if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) { - UseLit64 = !isInt<32>(Literal64) || !isUInt<32>(Literal64); + UseLit64 = false; } else { assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64); - UseLit64 = Lo_32(Literal64) != 0; + UseLit64 = Hi_32(Literal64) == 0; } return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index e82f998..703ec0a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -73,7 +73,13 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff); + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isExpr()) { + MAI.printExpr(O, *Op.getExpr()); + return; + } + + O << formatHex(Op.getImm() & 0xffffffff); } void AMDGPUInstPrinter::printFP64ImmOperand(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index f2879116..ea758bb 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -270,10 +270,19 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding( const MCInstrDesc &Desc, const MCOperand &MO, unsigned OpNo, const MCSubtargetInfo &STI, bool HasMandatoryLiteral) const { const MCOperandInfo &OpInfo = Desc.operands()[OpNo]; - int64_t Imm; + int64_t Imm = 0; if (MO.isExpr()) { - if (!MO.getExpr()->evaluateAsAbsolute(Imm)) - return AMDGPU::getOperandSize(OpInfo) == 8 ? 254 : 255; + if (!MO.getExpr()->evaluateAsAbsolute(Imm) || + AMDGPU::isLitExpr(MO.getExpr())) { + if (OpInfo.OperandType == AMDGPU::OPERAND_KIMM16 || + OpInfo.OperandType == AMDGPU::OPERAND_KIMM32 || + OpInfo.OperandType == AMDGPU::OPERAND_KIMM64) + return Imm; + if (STI.hasFeature(AMDGPU::Feature64BitLiterals) && + AMDGPU::getOperandSize(OpInfo) == 8) + return 254; + return 255; + } } else { assert(!MO.isDFPImm()); @@ -452,13 +461,16 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, // Yes! Encode it int64_t Imm = 0; + bool IsLit = false; if (Op.isImm()) Imm = Op.getImm(); else if (Op.isExpr()) { - if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr())) + if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr())) { Imm = C->getValue(); - else if (AMDGPU::isLitExpr(Op.getExpr())) + } else if (AMDGPU::isLitExpr(Op.getExpr())) { + IsLit = true; Imm = AMDGPU::getLitValue(Op.getExpr()); + } } else // Exprs will be replaced with a fixup value. llvm_unreachable("Must be immediate or expr"); @@ -468,7 +480,7 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, } else { auto OpType = static_cast<AMDGPU::OperandType>(Desc.operands()[i].OperandType); - Imm = AMDGPU::encode32BitLiteral(Imm, OpType); + Imm = AMDGPU::encode32BitLiteral(Imm, OpType, IsLit); support::endian::write<uint32_t>(CB, Imm, llvm::endianness::little); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 76023d2..3e1b058 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3145,7 +3145,7 @@ bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { return isUInt<32>(Val) || isInt<32>(Val); } -int64_t encode32BitLiteral(int64_t Imm, OperandType Type) { +int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) { switch (Type) { default: break; @@ -3168,7 +3168,7 @@ int64_t encode32BitLiteral(int64_t Imm, OperandType Type) { case OPERAND_REG_INLINE_C_INT32: return Lo_32(Imm); case OPERAND_REG_IMM_FP64: - return Hi_32(Imm); + return IsLit ? Imm : Hi_32(Imm); } return Imm; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 49b4d02..a01a5fd 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1727,7 +1727,7 @@ LLVM_READNONE bool isValid32BitLiteral(uint64_t Val, bool IsFP64); LLVM_READNONE -int64_t encode32BitLiteral(int64_t Imm, OperandType Type); +int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit); bool isArgPassedInSGPR(const Argument *Arg); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 186fdd1..53633ea 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -675,6 +675,45 @@ static void getOperandsForBranch(Register CondReg, RISCVCC::CondCode &CC, CC = getRISCVCCFromICmp(Pred); } +/// Select the RISC-V Zalasr opcode for the G_LOAD or G_STORE operation +/// \p GenericOpc, appropriate for the GPR register bank and of memory access +/// size \p OpSize. +static unsigned selectZalasrLoadStoreOp(unsigned GenericOpc, unsigned OpSize) { + const bool IsStore = GenericOpc == TargetOpcode::G_STORE; + switch (OpSize) { + default: + llvm_unreachable("Unexpected memory size"); + case 8: + return IsStore ? RISCV::SB_RL : RISCV::LB_AQ; + case 16: + return IsStore ? RISCV::SH_RL : RISCV::LH_AQ; + case 32: + return IsStore ? RISCV::SW_RL : RISCV::LW_AQ; + case 64: + return IsStore ? RISCV::SD_RL : RISCV::LD_AQ; + } +} + +/// Select the RISC-V regimm opcode for the G_LOAD or G_STORE operation +/// \p GenericOpc, appropriate for the GPR register bank and of memory access +/// size \p OpSize. \returns \p GenericOpc if the combination is unsupported. +static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) { + const bool IsStore = GenericOpc == TargetOpcode::G_STORE; + switch (OpSize) { + case 8: + // Prefer unsigned due to no c.lb in Zcb. + return IsStore ? RISCV::SB : RISCV::LBU; + case 16: + return IsStore ? RISCV::SH : RISCV::LH; + case 32: + return IsStore ? RISCV::SW : RISCV::LW; + case 64: + return IsStore ? RISCV::SD : RISCV::LD; + } + + return GenericOpc; +} + bool RISCVInstructionSelector::select(MachineInstr &MI) { MachineIRBuilder MIB(MI); @@ -892,6 +931,59 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) { return selectImplicitDef(MI, MIB); case TargetOpcode::G_UNMERGE_VALUES: return selectUnmergeValues(MI, MIB); + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: { + GLoadStore &LdSt = cast<GLoadStore>(MI); + const Register ValReg = LdSt.getReg(0); + const Register PtrReg = LdSt.getPointerReg(); + LLT PtrTy = MRI->getType(PtrReg); + + const RegisterBank &RB = *RBI.getRegBank(ValReg, *MRI, TRI); + if (RB.getID() != RISCV::GPRBRegBankID) + return false; + +#ifndef NDEBUG + const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, *MRI, TRI); + // Check that the pointer register is valid. + assert(PtrRB.getID() == RISCV::GPRBRegBankID && + "Load/Store pointer operand isn't a GPR"); + assert(PtrTy.isPointer() && "Load/Store pointer operand isn't a pointer"); +#endif + + // Can only handle AddressSpace 0. + if (PtrTy.getAddressSpace() != 0) + return false; + + unsigned MemSize = LdSt.getMemSizeInBits().getValue(); + AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); + + if (isStrongerThanMonotonic(Order)) { + MI.setDesc(TII.get(selectZalasrLoadStoreOp(Opc, MemSize))); + return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); + } + + const unsigned NewOpc = selectRegImmLoadStoreOp(MI.getOpcode(), MemSize); + if (NewOpc == MI.getOpcode()) + return false; + + // Check if we can fold anything into the addressing mode. + auto AddrModeFns = selectAddrRegImm(MI.getOperand(1)); + if (!AddrModeFns) + return false; + + // Folded something. Create a new instruction and return it. + auto NewInst = MIB.buildInstr(NewOpc, {}, {}, MI.getFlags()); + if (isa<GStore>(MI)) + NewInst.addUse(ValReg); + else + NewInst.addDef(ValReg); + NewInst.cloneMemRefs(MI); + for (auto &Fn : *AddrModeFns) + Fn(NewInst); + MI.eraseFromParent(); + + return constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI); + } default: return false; } diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index 1c7cbb9..5dd4bf4 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -287,8 +287,8 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI, break; } BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -375,8 +375,8 @@ static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII, ScratchReg); BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -535,8 +535,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp( // sc.w scratch1, scratch1, (addr) // bnez scratch1, loop BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg) - .addReg(AddrReg) - .addReg(Scratch1Reg); + .addReg(Scratch1Reg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(Scratch1Reg) .addReg(RISCV::X0) @@ -674,8 +674,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( // bnez scratch, loophead BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(NewValReg); + .addReg(NewValReg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) @@ -707,8 +707,8 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg( MaskReg, ScratchReg); BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg) - .addReg(AddrReg) - .addReg(ScratchReg); + .addReg(ScratchReg) + .addReg(AddrReg); BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE)) .addReg(ScratchReg) .addReg(RISCV::X0) diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 7dd3385..eba35ef 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -100,65 +100,11 @@ def : LdPat<load, LD, PtrVT>; def : StPat<store, SD, GPR, PtrVT>; } -// Load and store patterns for i16, needed because Zfh makes s16 load/store -// legal and regbank select may not constrain registers to FP. -def : LdPat<load, LH, i16>; -def : StPat<store, SH, GPR, i16>; - -def : LdPat<extloadi8, LBU, i16>; // Prefer unsigned due to no c.lb in Zcb. -def : StPat<truncstorei8, SB, GPR, i16>; - -let Predicates = [HasAtomicLdSt] in { - // Prefer unsigned due to no c.lb in Zcb. - def : LdPat<relaxed_load<atomic_load_aext_8>, LBU, i16>; - def : LdPat<relaxed_load<atomic_load_nonext_16>, LH, i16>; - - def : StPat<relaxed_store<atomic_store_8>, SB, GPR, i16>; - def : StPat<relaxed_store<atomic_store_16>, SH, GPR, i16>; -} - -let Predicates = [HasAtomicLdSt, IsRV64] in { - // Load pattern is in RISCVInstrInfoA.td and shared with RV32. - def : StPat<relaxed_store<atomic_store_32>, SW, GPR, i32>; -} - //===----------------------------------------------------------------------===// // RV64 i32 patterns not used by SelectionDAG //===----------------------------------------------------------------------===// let Predicates = [IsRV64] in { -def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb. -def : LdPat<extloadi16, LH, i32>; - -def : StPat<truncstorei8, SB, GPR, i32>; -def : StPat<truncstorei16, SH, GPR, i32>; - def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32), (ADDIW GPR:$rs1, simm12_lo:$imm)>; } - -//===----------------------------------------------------------------------===// -// Zalasr patterns not used by SelectionDAG -//===----------------------------------------------------------------------===// - -let Predicates = [HasStdExtZalasr] in { - // the sequentially consistent loads use - // .aq instead of .aqrl to match the psABI/A.7 - def : PatLAQ<acquiring_load<atomic_load_aext_8>, LB_AQ, i16>; - def : PatLAQ<seq_cst_load<atomic_load_aext_8>, LB_AQ, i16>; - - def : PatLAQ<acquiring_load<atomic_load_nonext_16>, LH_AQ, i16>; - def : PatLAQ<seq_cst_load<atomic_load_nonext_16>, LH_AQ, i16>; - - def : PatSRL<releasing_store<atomic_store_8>, SB_RL, i16>; - def : PatSRL<seq_cst_store<atomic_store_8>, SB_RL, i16>; - - def : PatSRL<releasing_store<atomic_store_16>, SH_RL, i16>; - def : PatSRL<seq_cst_store<atomic_store_16>, SH_RL, i16>; -} - -let Predicates = [HasStdExtZalasr, IsRV64] in { - // Load pattern is in RISCVInstrInfoZalasr.td and shared with RV32. - def : PatSRL<releasing_store<atomic_store_32>, SW_RL, i32>; - def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL, i32>; -} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 9855c47..7a14929 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1980,7 +1980,7 @@ def : LdPat<sextloadi8, LB>; def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb. def : LdPat<sextloadi16, LH>; def : LdPat<extloadi16, LH>; -def : LdPat<load, LW, i32>; +def : LdPat<load, LW, i32>, Requires<[IsRV32]>; def : LdPat<zextloadi8, LBU>; def : LdPat<zextloadi16, LHU>; @@ -1994,7 +1994,7 @@ class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy, def : StPat<truncstorei8, SB, GPR, XLenVT>; def : StPat<truncstorei16, SH, GPR, XLenVT>; -def : StPat<store, SW, GPR, i32>; +def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>; /// Fences diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 2e4326f..571d72f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -33,7 +33,7 @@ multiclass LR_r_aq_rl<bits<3> funct3, string opcodestr> { let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class SC_r<bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<0b00011, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs GPR:$rd), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass SC_r_aq_rl<bits<3> funct3, string opcodestr> { @@ -46,7 +46,7 @@ multiclass SC_r_aq_rl<bits<3> funct3, string opcodestr> { let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in class AMO_rr<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr> : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, - (outs GPR:$rd), (ins GPRMemZeroOffset:$rs1, GPR:$rs2), + (outs GPR:$rd), (ins GPR:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> { @@ -174,8 +174,9 @@ let Predicates = [HasAtomicLdSt] in { def : StPat<relaxed_store<atomic_store_8>, SB, GPR, XLenVT>; def : StPat<relaxed_store<atomic_store_16>, SH, GPR, XLenVT>; def : StPat<relaxed_store<atomic_store_32>, SW, GPR, XLenVT>; +} - // Used by GISel for RV32 and RV64. +let Predicates = [HasAtomicLdSt, IsRV32] in { def : LdPat<relaxed_load<atomic_load_nonext_32>, LW, i32>; } @@ -188,31 +189,34 @@ let Predicates = [HasAtomicLdSt, IsRV64] in { /// AMOs +class PatAMO<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT> + : Pat<(vt (OpNode (XLenVT GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs2, GPR:$rs1)>; + multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, list<Predicate> ExtraPreds = []> { let Predicates = !listconcat([HasStdExtA, NoStdExtZtso], ExtraPreds) in { - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), - !cast<RVInst>(BaseInst#"_AQ"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"), - !cast<RVInst>(BaseInst#"_RL"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"), - !cast<RVInst>(BaseInst#"_AQRL"), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), - !cast<RVInst>(BaseInst#"_AQRL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst#"_AQ"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst#"_RL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst#"_AQRL"), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst#"_AQRL"), vt>; } let Predicates = !listconcat([HasStdExtA, HasStdExtZtso], ExtraPreds) in { - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"), - !cast<RVInst>(BaseInst), vt>; - def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"), - !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_monotonic"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acquire"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_release"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_acq_rel"), + !cast<RVInst>(BaseInst), vt>; + def : PatAMO<!cast<PatFrag>(AtomicOp#"_seq_cst"), + !cast<RVInst>(BaseInst), vt>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index c691aa6..20e2142 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -44,7 +44,7 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb" class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr, DAGOperand RC> : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, - (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2), + (outs RC:$rd_wb), (ins RC:$rd, RC:$rs2, GPRMemZeroOffset:$rs1), opcodestr, "$rd, $rs2, $rs1">; multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr, @@ -71,48 +71,48 @@ defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>; multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, list<Predicate> ExtraPreds = []> { let Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) in { - def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$new, GPR:$addr)>; def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst#"_AQRL") GPR:$cmp, GPR:$new, GPR:$addr)>; } // Predicates = !listconcat([HasStdExtZacas, NoStdExtZtso], ExtraPreds) let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in { - def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; - def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (XLenVT GPR:$addr), (vt GPR:$cmp), (vt GPR:$new)), - (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$new, GPR:$addr)>; } // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td index f7ceb0d..5f944034 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td @@ -94,11 +94,12 @@ let Predicates = [HasStdExtZalasr] in { def : PatSRL<releasing_store<atomic_store_32>, SW_RL>; def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL>; +} - // Used by GISel for RV32 and RV64. +let Predicates = [HasStdExtZalasr, IsRV32] in { def : PatLAQ<acquiring_load<atomic_load_nonext_32>, LW_AQ, i32>; def : PatLAQ<seq_cst_load<atomic_load_nonext_32>, LW_AQ, i32>; -} // Predicates = [HasStdExtZalasr] +} // Predicates = [HasStdExtZalasr, IsRV32] let Predicates = [HasStdExtZalasr, IsRV64] in { def : PatLAQ<acquiring_load<atomic_load_asext_32>, LW_AQ, i64>; diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp index fc14a03..f7be2a1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp @@ -32,7 +32,9 @@ class SPIRVLegalizeImplicitBinding : public ModulePass { public: static char ID; SPIRVLegalizeImplicitBinding() : ModulePass(ID) {} - + StringRef getPassName() const override { + return "SPIRV Legalize Implicit Binding"; + } bool runOnModule(Module &M) override; private: diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index c0cd0176e..f66eb9d 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -668,6 +668,7 @@ bool SparcInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addImm(-1); MIBundleBuilder(MBB, InstSTBAR, InstLDSTUB); MBB.erase(MI); + return true; } } return false; |