diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
7 files changed, 97 insertions, 74 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 723d07e..c7a91f4c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -929,7 +929,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { ThinOrFullLTOPhase Phase) { if (Level != OptimizationLevel::O0) { if (!isLTOPreLink(Phase)) { - if (getTargetTriple().isAMDGCN()) { + if (EnableAMDGPUAttributor && getTargetTriple().isAMDGCN()) { AMDGPUAttributorOptions Opts; MPM.addPass(AMDGPUAttributorPass(*this, Opts, Phase)); } @@ -966,7 +966,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PM.addPass(InternalizePass(mustPreserveGV)); PM.addPass(GlobalDCEPass()); } - if (EnableAMDGPUAttributor) { + if (EnableAMDGPUAttributor && getTargetTriple().isAMDGCN()) { AMDGPUAttributorOptions Opt; if (HasClosedWorldAssumption) Opt.IsClosedWorld = true; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index a8140c3..99ba043 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2105,6 +2105,10 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const { // Only plain immediates are inlinable (e.g. "clamp" attribute is not) return false; } + + if (getModifiers().Lit != LitModifier::None) + return false; + // TODO: We should avoid using host float here. It would be better to // check the float bit values which is what a few other places do. // We've had bot failures before due to weird NaN support on mips hosts. @@ -2339,6 +2343,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo bool CanUse64BitLiterals = AsmParser->has64BitLiterals() && !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)); + LitModifier Lit = getModifiers().Lit; MCContext &Ctx = AsmParser->getContext(); if (Imm.IsFPImm) { // We got fp literal token @@ -2348,7 +2353,8 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_INT64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); return; @@ -2372,14 +2378,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 || OpTy == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64) && - CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); + OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64)) { + if (CanUse64BitLiterals && Lit == LitModifier::None && + (isInt<32>(Val) || isUInt<32>(Val))) { + // The floating-point operand will be verbalized as an + // integer one. If that integer happens to fit 32 bits, on + // re-assembling it will be intepreted as the high half of + // the actual value, so we have to wrap it into lit64(). + Lit = LitModifier::Lit64; + } else if (Lit == LitModifier::Lit) { + // For FP64 operands lit() specifies the high half of the value. + Val = Hi_32(Val); + } } - return; + break; } // We don't allow fp literals in 64-bit integer instructions. It is @@ -2388,19 +2400,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo llvm_unreachable("fp literal in 64-bit integer instruction."); case AMDGPU::OPERAND_KIMM64: - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + if (CanUse64BitLiterals && Lit == LitModifier::None && + (isInt<32>(Val) || isUInt<32>(Val))) + Lit = LitModifier::Lit64; + break; case AMDGPU::OPERAND_REG_IMM_BF16: case AMDGPU::OPERAND_REG_INLINE_C_BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2BF16: - if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) { + if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() && + Literal == 0x3fc45f306725feed) { // This is the 1/(2*pi) which is going to be truncated to bf16 with the // loss of precision. The constant represents ideomatic fp32 value of // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 @@ -2438,14 +2448,19 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // We allow precision lost but not overflow or underflow. This should be // checked earlier in isLiteralImm() - uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); - Inst.addOperand(MCOperand::createImm(ImmVal)); - return; + Val = FPLiteral.bitcastToAPInt().getZExtValue(); + break; } default: llvm_unreachable("invalid operand size"); } + if (Lit != LitModifier::None) { + Inst.addOperand( + MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx))); + } else { + Inst.addOperand(MCOperand::createImm(Val)); + } return; } @@ -2465,12 +2480,12 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: - Inst.addOperand(MCOperand::createImm(Val)); - return; + break; case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: - if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); return; } @@ -2479,22 +2494,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // truncated to uint32_t), if the target doesn't support 64-bit literals, or // the lit modifier is explicitly used, we need to truncate it to the 32 // LSBs. - if (!AsmParser->has64BitLiterals() || - getModifiers().Lit == LitModifier::Lit) + if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit) Val = Lo_32(Val); - - if (CanUse64BitLiterals && (!isInt<32>(Val) || !isUInt<32>(Val))) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + break; case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: - if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { + if (Lit == LitModifier::None && + AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { Inst.addOperand(MCOperand::createImm(Val)); return; } @@ -2509,19 +2517,15 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo // 1) explicitly forced by using lit modifier; // 2) the value is a valid 32-bit representation (signed or unsigned), // meanwhile not forced by lit64 modifier. - if (getModifiers().Lit == LitModifier::Lit || - (getModifiers().Lit != LitModifier::Lit64 && - (isInt<32>(Val) || isUInt<32>(Val)))) + if (Lit == LitModifier::Lit || + (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val)))) Val = static_cast<uint64_t>(Val) << 32; } - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + // For FP64 operands lit() specifies the high half of the value. + if (Lit == LitModifier::Lit) + Val = Hi_32(Val); + break; case AMDGPU::OPERAND_REG_IMM_INT16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: @@ -2534,25 +2538,23 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: case AMDGPU::OPERAND_KIMM32: case AMDGPU::OPERAND_KIMM16: - Inst.addOperand(MCOperand::createImm(Val)); - return; + break; case AMDGPU::OPERAND_KIMM64: - if ((isInt<32>(Val) || isUInt<32>(Val)) && - getModifiers().Lit != LitModifier::Lit64) + if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64) Val <<= 32; - - if (CanUse64BitLiterals && Lo_32(Val) != 0) { - Inst.addOperand(MCOperand::createExpr( - AMDGPUMCExpr::createLit(LitModifier::Lit64, Val, Ctx))); - } else { - Inst.addOperand(MCOperand::createImm(Val)); - } - return; + break; default: llvm_unreachable("invalid operand type"); } + + if (Lit != LitModifier::None) { + Inst.addOperand( + MCOperand::createExpr(AMDGPUMCExpr::createLit(Lit, Val, Ctx))); + } else { + Inst.addOperand(MCOperand::createImm(Val)); + } } void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { @@ -4821,12 +4823,15 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst, const MCOperand &MO = Inst.getOperand(OpIdx); // Exclude special imm operands (like that used by s_set_gpr_idx_on) if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { + bool IsLit = false; std::optional<int64_t> Imm; if (MO.isImm()) { Imm = MO.getImm(); } else if (MO.isExpr()) { - if (isLitExpr(MO.getExpr())) + if (isLitExpr(MO.getExpr())) { + IsLit = true; Imm = getLitValue(MO.getExpr()); + } } else { continue; } @@ -4836,7 +4841,7 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst, } else if (!isInlineConstant(Inst, OpIdx)) { auto OpType = static_cast<AMDGPU::OperandType>( Desc.operands()[OpIdx].OperandType); - int64_t Value = encode32BitLiteral(*Imm, OpType); + int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit); if (NumLiterals == 0 || LiteralValue != Value) { LiteralValue = Value; ++NumLiterals; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index f11b373..be62395 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -1551,7 +1551,7 @@ AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const { HasLiteral = true; Literal = Literal64 = Val; - bool UseLit64 = Lo_32(Literal64) != 0; + bool UseLit64 = Hi_32(Literal64) == 0; return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( LitModifier::Lit64, Literal64, getContext())) : MCOperand::createImm(Literal64); @@ -1584,11 +1584,11 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, if (CanUse64BitLiterals) { if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) - UseLit64 = !isInt<32>(Val) || !isUInt<32>(Val); + UseLit64 = false; else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64) - UseLit64 = Lo_32(Val) != 0; + UseLit64 = Hi_32(Literal64) == 0; } return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( @@ -1614,12 +1614,12 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const { const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()]; if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) { - UseLit64 = !isInt<32>(Literal64) || !isUInt<32>(Literal64); + UseLit64 = false; } else { assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64); - UseLit64 = Lo_32(Literal64) != 0; + UseLit64 = Hi_32(Literal64) == 0; } return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index e82f998..703ec0a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -73,7 +73,13 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { - O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff); + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isExpr()) { + MAI.printExpr(O, *Op.getExpr()); + return; + } + + O << formatHex(Op.getImm() & 0xffffffff); } void AMDGPUInstPrinter::printFP64ImmOperand(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index f2879116..ea758bb 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -270,10 +270,19 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding( const MCInstrDesc &Desc, const MCOperand &MO, unsigned OpNo, const MCSubtargetInfo &STI, bool HasMandatoryLiteral) const { const MCOperandInfo &OpInfo = Desc.operands()[OpNo]; - int64_t Imm; + int64_t Imm = 0; if (MO.isExpr()) { - if (!MO.getExpr()->evaluateAsAbsolute(Imm)) - return AMDGPU::getOperandSize(OpInfo) == 8 ? 254 : 255; + if (!MO.getExpr()->evaluateAsAbsolute(Imm) || + AMDGPU::isLitExpr(MO.getExpr())) { + if (OpInfo.OperandType == AMDGPU::OPERAND_KIMM16 || + OpInfo.OperandType == AMDGPU::OPERAND_KIMM32 || + OpInfo.OperandType == AMDGPU::OPERAND_KIMM64) + return Imm; + if (STI.hasFeature(AMDGPU::Feature64BitLiterals) && + AMDGPU::getOperandSize(OpInfo) == 8) + return 254; + return 255; + } } else { assert(!MO.isDFPImm()); @@ -452,13 +461,16 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, // Yes! Encode it int64_t Imm = 0; + bool IsLit = false; if (Op.isImm()) Imm = Op.getImm(); else if (Op.isExpr()) { - if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr())) + if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr())) { Imm = C->getValue(); - else if (AMDGPU::isLitExpr(Op.getExpr())) + } else if (AMDGPU::isLitExpr(Op.getExpr())) { + IsLit = true; Imm = AMDGPU::getLitValue(Op.getExpr()); + } } else // Exprs will be replaced with a fixup value. llvm_unreachable("Must be immediate or expr"); @@ -468,7 +480,7 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI, } else { auto OpType = static_cast<AMDGPU::OperandType>(Desc.operands()[i].OperandType); - Imm = AMDGPU::encode32BitLiteral(Imm, OpType); + Imm = AMDGPU::encode32BitLiteral(Imm, OpType, IsLit); support::endian::write<uint32_t>(CB, Imm, llvm::endianness::little); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 76023d2..3e1b058 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3145,7 +3145,7 @@ bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { return isUInt<32>(Val) || isInt<32>(Val); } -int64_t encode32BitLiteral(int64_t Imm, OperandType Type) { +int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) { switch (Type) { default: break; @@ -3168,7 +3168,7 @@ int64_t encode32BitLiteral(int64_t Imm, OperandType Type) { case OPERAND_REG_INLINE_C_INT32: return Lo_32(Imm); case OPERAND_REG_IMM_FP64: - return Hi_32(Imm); + return IsLit ? Imm : Hi_32(Imm); } return Imm; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 49b4d02..a01a5fd 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1727,7 +1727,7 @@ LLVM_READNONE bool isValid32BitLiteral(uint64_t Val, bool IsFP64); LLVM_READNONE -int64_t encode32BitLiteral(int64_t Imm, OperandType Type); +int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit); bool isArgPassedInSGPR(const Argument *Arg); |