diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 149 |
1 files changed, 81 insertions, 68 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index d0c0822..99ba043 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1347,6 +1347,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool ForcedDPP = false; bool ForcedSDWA = false; KernelScopeInfo KernelScope; + const unsigned HwMode; /// @name Auto-generated Match Functions /// { @@ -1356,6 +1357,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser { /// } + /// Get size of register operand + unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const { + assert(OpNo < Desc.NumOperands); + int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode); + return getRegBitWidth(RCID) / 8; + } + private: void createConstantSymbol(StringRef Id, int64_t Val); @@ -1442,9 +1450,9 @@ public: using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, STI, MII), Parser(_Parser), + HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) { MCAsmParserExtension::Initialize(Parser); setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); @@ -2097,6 +2105,10 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { // Only plain immediates are inlinable (e.g. "clamp" attribute is not) return false; } + + if (getModifiers().Lit != LitModifier::None) + return false; + // TODO: We should avoid using host float here. It would be better to // check the float bit values which is what a few other places do. // We've had bot failures before due to weird NaN support on mips hosts. @@ -2331,6 +2343,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo bool CanUse64BitLiterals = AsmParser->has64BitLiterals() && !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)); + LitModifier Lit = getModifiers().Lit; MCContext &Ctx = AsmParser->getContext(); if (Imm.IsFPImm) { // We got fp literal token @@ -2340,7 +2353,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); return; @@ -2364,14 +2378,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 || OpTy == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64) && - CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); + OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64)) { + if (CanUse64BitLiterals && Lit == LitModifier::None && + (isInt<32>(Val) || isUInt<32>(Val))) { + // The floating-point operand will be verbalized as an + // integer one. If that integer happens to fit 32 bits, on + // re-assembling it will be intepreted as the high half of + // the actual value, so we have to wrap it into lit64(). + Lit = LitModifier::Lit64; + } else if (Lit == LitModifier::Lit) { + // For FP64 operands lit() specifies the high half of the value. + Val = Hi_32(Val); + } } - return; + break; } // We don't allow fp literals in 64-bit integer instructions. It is @@ -2380,19 +2400,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo llvm_unreachable("fp literal in 64-bit integer instruction."); case AMDGPU::OPERAND_KIMM64: - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + if (CanUse64BitLiterals && Lit == LitModifier::None && + (isInt<32>(Val) || isUInt<32>(Val))) + Lit = LitModifier::Lit64; + break; case AMDGPU::OPERAND_REG_IMM_BF16: case AMDGPU::OPERAND_REG_INLINE_C_BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2BF16: - if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) { + if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() && + Literal == 0x3fc45f306725feed) { // This is the 1/(2*pi) which is going to be truncated to bf16 with the // loss of precision. The constant represents ideomatic fp32 value of // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 @@ -2430,14 +2448,19 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // We allow precision lost but not overflow or underflow. This should be // checked earlier in isLiteralImm() - uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); - Inst.addOperand(MCOperand::createImm(ImmVal)); - return; + Val = FPLiteral.bitcastToAPInt().getZExtValue(); + break; } default: llvm_unreachable("invalid operand size"); } + if (Lit != LitModifier::None) { + Inst.addOperand( + MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx))); + } else { + Inst.addOperand(MCOperand::createImm(Val)); + } return; } @@ -2457,12 +2480,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: - Inst.addOperand(MCOperand::createImm(Val)); - return; + break; case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: - if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); return; } @@ -2471,22 +2494,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // truncated to uint32_t), if the target doesn't support 64-bit literals, or // the lit modifier is explicitly used, we need to truncate it to the 32 // LSBs. - if (!AsmParser->has64BitLiterals() || - getModifiers().Lit == LitModifier::Lit) + if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit) Val = Lo_32(Val); - - if (CanUse64BitLiterals && (!isInt<32>(Val) || !isUInt<32>(Val))) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + break; case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); return; } @@ -2501,19 +2517,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // 1) explicitly forced by using lit modifier; // 2) the value is a valid 32-bit representation (signed or unsigned), // meanwhile not forced by lit64 modifier. - if (getModifiers().Lit == LitModifier::Lit || - (getModifiers().Lit != LitModifier::Lit64 && - (isInt<32>(Val) || isUInt<32>(Val)))) + if (Lit == LitModifier::Lit || + (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val)))) Val = static_cast<uint64_t>(Val) << 32; } - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + // For FP64 operands lit() specifies the high half of the value. + if (Lit == LitModifier::Lit) + Val = Hi_32(Val); + break; case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: @@ -2526,25 +2538,23 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_KIMM32: case AMDGPU::OPERAND_KIMM16: - Inst.addOperand(MCOperand::createImm(Val)); - return; + break; case AMDGPU::OPERAND_KIMM64: - if ((isInt<32>(Val) || isUInt<32>(Val)) && - getModifiers().Lit != LitModifier::Lit64) + if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64) Val <<= 32; - - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + break; default: llvm_unreachable("invalid operand type"); } + + if (Lit != LitModifier::None) { + Inst.addOperand( + MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx))); + } else { + Inst.addOperand(MCOperand::createImm(Val)); + } } void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { @@ -4107,7 +4117,7 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) { if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray return true; - unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); + unsigned VDataSize = getRegOperandSize(Desc, VDataIdx); unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; if (DMask == 0) @@ -4171,8 +4181,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) { const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); bool IsNSA = SrsrcIdx - VAddr0Idx > 1; unsigned ActualAddrSize = - IsNSA ? SrsrcIdx - VAddr0Idx - : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; + IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4; unsigned ExpectedAddrSize = AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); @@ -4182,8 +4191,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) { ExpectedAddrSize > getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) { int VAddrLastIdx = SrsrcIdx - 1; - unsigned VAddrLastSize = - AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; + unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4; ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; } @@ -4429,7 +4437,8 @@ bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, return true; const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) + if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode)) + .getSizeInBits() <= 128) return true; if (TRI->regsOverlap(Src2Reg, DstReg)) { @@ -4814,12 +4823,15 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst, const MCOperand &MO = Inst.getOperand(OpIdx); // Exclude special imm operands (like that used by s_set_gpr_idx_on) if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { + bool IsLit = false; std::optional<int64_t> Imm; if (MO.isImm()) { Imm = MO.getImm(); } else if (MO.isExpr()) { - if (isLitExpr(MO.getExpr())) + if (isLitExpr(MO.getExpr())) { + IsLit = true; Imm = getLitValue(MO.getExpr()); + } } else { continue; } @@ -4829,7 +4841,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst, } else if (!isInlineConstant(Inst, OpIdx)) { auto OpType = static_cast<AMDGPU::OperandType>( Desc.operands()[OpIdx].OperandType); - int64_t Value = encode32BitLiteral(*Imm, OpType); + int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit); if (NumLiterals == 0 || LiteralValue != Value) { LiteralValue = Value; ++NumLiterals; @@ -5000,7 +5012,7 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) && - AMDGPU::isDPALU_DPP(MII.get(Opc), getSTI())) { + AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) { // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share // only on GFX12. SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); @@ -5523,7 +5535,8 @@ bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst, unsigned Fmt = Inst.getOperand(FmtIdx).getImm(); int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp); unsigned RegSize = - TRI->getRegClass(Desc.operands()[SrcIdx].RegClass).getSizeInBits(); + TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode)) + .getSizeInBits(); if (RegSize == AMDGPU::wmmaScaleF8F6F4FormatToNumRegs(Fmt) * 32) return true; |