diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 129 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/DirectX/DXIL.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/DirectX/DXILOpLowering.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrCompiler.td | 13 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86MCInstLower.cpp | 36 |
16 files changed, 234 insertions, 90 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index be2f2e4..91c1f59 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1561,6 +1561,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); + setOperationAction(ISD::VECREDUCE_MUL, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); @@ -1717,6 +1718,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom); setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom); setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); @@ -7775,6 +7777,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: return LowerVECREDUCE(Op, DAG); + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_FMUL: + return LowerVECREDUCE_MUL(Op, DAG); case ISD::ATOMIC_LOAD_AND: return LowerATOMIC_LOAD_AND(Op, DAG); case ISD::DYNAMIC_STACKALLOC: @@ -16794,6 +16799,33 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, } } +SDValue AArch64TargetLowering::LowerVECREDUCE_MUL(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + assert(SrcVT.isScalableVector() && "Unexpected operand type!"); + + SDVTList SrcVTs = DAG.getVTList(SrcVT, SrcVT); + unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode()); + SDValue Identity = DAG.getNeutralElement(BaseOpc, DL, SrcVT, Op->getFlags()); + + // Whilst we don't know the size of the vector we do know the maximum size so + // can perform a tree reduction with an identity vector, which means once we + // arrive at the result the remaining stages (when the vector is smaller than + // the maximum) have no affect. + + unsigned Segments = AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; + unsigned Stages = llvm::Log2_32(Segments * SrcVT.getVectorMinNumElements()); + + for (unsigned I = 0; I < Stages; ++I) { + Src = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, SrcVTs, Src, Identity); + Src = DAG.getNode(BaseOpc, DL, SrcVT, Src.getValue(0), Src.getValue(1)); + } + + return DAG.getExtractVectorElt(DL, Op.getValueType(), Src, 0); +} + SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const { auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 00956fd..9495c9f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -752,6 +752,7 @@ private: SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECREDUCE_MUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 7322212..fe84193 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_USDOT : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; +} + // Generic instruction for the BSP pseudo. It is expanded into BSP, which // expands into BSL/BIT/BIF after register allocation. def G_BSP : AArch64GenericInstruction { @@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>; def : GINodeEquiv<G_UDOT, AArch64udot>; def : GINodeEquiv<G_SDOT, AArch64sdot>; +def : GINodeEquiv<G_USDOT, AArch64usdot>; def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 9e2d698..05a4313 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerTriOp(AArch64::G_UDOT); case Intrinsic::aarch64_neon_sdot: return LowerTriOp(AArch64::G_SDOT); + case Intrinsic::aarch64_neon_usdot: + return LowerTriOp(AArch64::G_USDOT); case Intrinsic::aarch64_neon_sqxtn: return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S); case Intrinsic::aarch64_neon_sqxtun: diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index be62395..e0375ea 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -513,8 +513,7 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI, } if (Imm == AMDGPU::EncValues::LITERAL_CONST) { - Op = decodeLiteralConstant( - Desc, OpDesc, OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64); + Op = decodeLiteralConstant(Desc, OpDesc); continue; } @@ -1545,21 +1544,21 @@ AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { MCOperand AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const { if (HasLiteral) { - if (Literal64 != Val) + if (Literal != Val) return errOperand(Val, "More than one unique literal is illegal"); } HasLiteral = true; - Literal = Literal64 = Val; + Literal = Val; - bool UseLit64 = Hi_32(Literal64) == 0; + bool UseLit64 = Hi_32(Literal) == 0; return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Literal64, getContext())) - : MCOperand::createImm(Literal64); + LitModifier::Lit64, Literal, getContext())) + : MCOperand::createImm(Literal); } -MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, - const MCOperandInfo &OpDesc, - bool ExtendFP64) const { +MCOperand +AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, + const MCOperandInfo &OpDesc) const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants @@ -1569,35 +1568,79 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, Twine(Bytes.size())); } HasLiteral = true; - Literal = Literal64 = eatBytes<uint32_t>(Bytes); - if (ExtendFP64) - Literal64 <<= 32; + Literal = eatBytes<uint32_t>(Bytes); } - int64_t Val = ExtendFP64 ? Literal64 : Literal; + // For disassembling always assume all inline constants are available. + bool HasInv2Pi = true; - bool CanUse64BitLiterals = - STI.hasFeature(AMDGPU::Feature64BitLiterals) && - !(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)); - - bool UseLit64 = false; - if (CanUse64BitLiterals) { - if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) - UseLit64 = false; - else if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64) - UseLit64 = Hi_32(Literal64) == 0; + // Invalid instruction codes may contain literals for inline-only + // operands, so we support them here as well. + int64_t Val = Literal; + bool UseLit = false; + switch (OpDesc.OperandType) { + default: + llvm_unreachable("Unexpected operand type!"); + case AMDGPU::OPERAND_REG_IMM_BF16: + case AMDGPU::OPERAND_REG_INLINE_C_BF16: + case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: + UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2BF16: + UseLit = AMDGPU::isInlinableLiteralV2BF16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2FP16: + UseLit = AMDGPU::isInlinableLiteralV2F16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: + break; + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_V2INT16: + UseLit = AMDGPU::isInlinableLiteralV2I16(Val); + break; + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_IMM_V2FP32: + case AMDGPU::OPERAND_REG_IMM_V2INT32: + case AMDGPU::OPERAND_KIMM32: + UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi); + break; + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + Val <<= 32; + break; + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi); + break; + case MCOI::OPERAND_REGISTER: + // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits + // decoding a literal in a position of a register operand. Give + // it special handling in the caller, decodeImmOperands(), instead + // of quietly allowing it here. + break; } - return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Val, getContext())) - : MCOperand::createImm(Val); + return UseLit ? MCOperand::createExpr(AMDGPUMCExpr::createLit( + LitModifier::Lit, Val, getContext())) + : MCOperand::createImm(Val); } -MCOperand -AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const { +MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const { assert(STI.hasFeature(AMDGPU::Feature64BitLiterals)); if (!HasLiteral) { @@ -1606,25 +1649,13 @@ AMDGPUDisassembler::decodeLiteral64Constant(const MCInst &Inst) const { Twine(Bytes.size())); } HasLiteral = true; - Literal64 = eatBytes<uint64_t>(Bytes); - } - - bool UseLit64 = false; - const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); - const MCOperandInfo &OpDesc = Desc.operands()[Inst.getNumOperands()]; - if (OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_INT64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT64) { - UseLit64 = false; - } else { - assert(OpDesc.OperandType == AMDGPU::OPERAND_REG_IMM_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP64 || - OpDesc.OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP64); - UseLit64 = Hi_32(Literal64) == 0; + Literal = eatBytes<uint64_t>(Bytes); } + bool UseLit64 = Hi_32(Literal) == 0; return UseLit64 ? MCOperand::createExpr(AMDGPUMCExpr::createLit( - LitModifier::Lit64, Literal64, getContext())) - : MCOperand::createImm(Literal64); + LitModifier::Lit64, Literal, getContext())) + : MCOperand::createImm(Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -1913,7 +1944,7 @@ MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst, return MCOperand::createImm(Val); if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) { - return decodeLiteral64Constant(Inst); + return decodeLiteral64Constant(); } switch (Width) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 2751857..d103d79 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -44,8 +44,7 @@ private: const unsigned HwModeRegClass; const unsigned TargetMaxInstBytes; mutable ArrayRef<uint8_t> Bytes; - mutable uint32_t Literal; - mutable uint64_t Literal64; + mutable uint64_t Literal; mutable bool HasLiteral; mutable std::optional<bool> EnableWavefrontSize32; unsigned CodeObjectVersion; @@ -144,9 +143,8 @@ public: MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const; MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const; MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, - const MCOperandInfo &OpDesc, - bool ExtendFP64) const; - MCOperand decodeLiteral64Constant(const MCInst &Inst) const; + const MCOperandInfo &OpDesc) const; + MCOperand decodeLiteral64Constant() const; MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 228114c..44c4830 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -57,6 +57,7 @@ def ResBindTy : DXILOpParamType; def ResPropsTy : DXILOpParamType; def SplitDoubleTy : DXILOpParamType; def BinaryWithCarryTy : DXILOpParamType; +def DimensionsTy : DXILOpParamType; class DXILOpClass; @@ -901,6 +902,13 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> { let attributes = [Attributes<DXIL1_0, [ReadOnly]>]; } +def GetDimensions : DXILOp<72, getDimensions> { + let Doc = "gets the dimensions of a buffer or texture"; + let arguments = [HandleTy, Int32Ty]; + let result = DimensionsTy; + let stages = [Stages<DXIL1_0, [all_stages]>]; +} + def Barrier : DXILOp<80, barrier> { let Doc = "inserts a memory barrier in the shader"; let intrinsics = [ diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index 1aed8f9..944b2e6 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -261,6 +261,12 @@ static StructType *getBinaryWithCarryType(LLVMContext &Context) { return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c"); } +static StructType *getDimensionsType(LLVMContext &Ctx) { + Type *Int32Ty = Type::getInt32Ty(Ctx); + return getOrCreateStructType("dx.types.Dimensions", + {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Ctx); +} + static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, Type *OverloadTy) { switch (Kind) { @@ -318,6 +324,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, return getSplitDoubleType(Ctx); case OpParamType::BinaryWithCarryTy: return getBinaryWithCarryType(Ctx); + case OpParamType::DimensionsTy: + return getDimensionsType(Ctx); } llvm_unreachable("Invalid parameter kind"); return nullptr; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 610d8b6..e46a393 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -627,6 +627,28 @@ public: }); } + [[nodiscard]] bool lowerGetDimensionsX(Function &F) { + IRBuilder<> &IRB = OpBuilder.getIRB(); + Type *Int32Ty = IRB.getInt32Ty(); + + return replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + Value *Handle = + createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); + Value *Undef = UndefValue::get(Int32Ty); + + Expected<CallInst *> OpCall = OpBuilder.tryCreateOp( + OpCode::GetDimensions, {Handle, Undef}, CI->getName(), Int32Ty); + if (Error E = OpCall.takeError()) + return E; + Value *Dim = IRB.CreateExtractValue(*OpCall, 0); + + CI->replaceAllUsesWith(Dim); + CI->eraseFromParent(); + return Error::success(); + }); + } + [[nodiscard]] bool lowerGetPointer(Function &F) { // These should have already been handled in DXILResourceAccess, so we can // just clean up the dead prototype. @@ -934,6 +956,9 @@ public: case Intrinsic::dx_resource_updatecounter: HasErrors |= lowerUpdateCounter(F); break; + case Intrinsic::dx_resource_getdimensions_x: + HasErrors |= lowerGetDimensionsX(F); + break; case Intrinsic::ctpop: HasErrors |= lowerCtpopToCountBits(F); break; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 82c43ff..26a8728 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {} /// Returns the bit offset to backpatch with the location of the real VST. void DXILBitcodeWriter::writeModuleInfo() { // Emit various pieces of data attached to a module. - if (!M.getTargetTriple().empty()) - writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, - M.getTargetTriple().str(), 0 /*TODO*/); - const std::string &DL = M.getDataLayoutStr(); - if (!DL.empty()) - writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + + // We need to hardcode a triple and datalayout that's compatible with the + // historical DXIL triple and datalayout from DXC. + StringRef Triple = "dxil-ms-dx"; + StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-" + "f16:32-f32:32-f64:64-n8:16:32:64"; + writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/); + writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + if (!M.getModuleInlineAsm().empty()) writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), 0 /*TODO*/); diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp index 1eb03bf..725f2b1 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp @@ -149,11 +149,6 @@ public: std::string Data; llvm::raw_string_ostream OS(Data); - Triple OriginalTriple = M.getTargetTriple(); - // Set to DXIL triple when write to bitcode. - // Only the output bitcode need to be DXIL triple. - M.setTargetTriple(Triple("dxil-ms-dx")); - // Perform late legalization of lifetime intrinsics that would otherwise // fail the Module Verifier if performed in an earlier pass legalizeLifetimeIntrinsics(M); @@ -165,9 +160,6 @@ public: // not-so-legal legalizations removeLifetimeIntrinsics(M); - // Recover triple. - M.setTargetTriple(OriginalTriple); - Constant *ModuleConstant = ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data)); auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true, diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 5f180d6..3bd6ed4 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -66,6 +66,10 @@ public: void remapInstruction(MCInst &Instr) const; + Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address) const override; + private: bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address, uint64_t &BytesToSkip, raw_ostream &CS) const; @@ -567,6 +571,18 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB, return Result; } +Expected<bool> HexagonDisassembler::onSymbolStart(SymbolInfoTy &Symbol, + uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address) const { + // At the start of a symbol, force a fresh packet by resetting any + // in-progress bundle state. This prevents packets from straddling label + // boundaries when data (e.g. jump tables) appears in between. + Size = 0; + resetBundle(); + return true; +} + static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, ArrayRef<MCPhysReg> Table) { if (RegNo < Table.size()) { diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 662d3f6..b1794b7 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -717,6 +717,18 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .clampScalar(0, sXLen, sXLen) .lower(); + LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[0]; + LLT EltTy = Query.Types[1]; + return VecTy.getElementType() == EltTy; + }; + + getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) + .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), + InsertVectorEltPred, typeIs(2, sXLen))) + .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), InsertVectorEltPred, + typeIs(2, sXLen))); + getLegacyLegalizerInfo().computeTables(); verify(*ST.getInstrInfo()); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 0f6e1ca..eedfdb3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1771,6 +1771,10 @@ defm RELAXED_DOT_ADD : "i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc", "i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>; +def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs), + (v16i8 V128:$rhs))), + (RELAXED_DOT_ADD $lhs, $rhs, $acc)>, Requires<[HasRelaxedSIMD]>; + //===----------------------------------------------------------------------===// // Relaxed BFloat16 dot product //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 0fd44b7..ec31675 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool :$dst)), (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; + +// If the globaladdr is an absolute_symbol, don't bother using the sign extending +// instruction since there's no benefit to using it with absolute symbols. +def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{ + auto *GA = cast<GlobalAddressSDNode>(N); + return !GA->getGlobal()->getAbsoluteSymbolRange(); +}]>; +def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)), + (MOV64ri32 tglobaladdr:$dst)>, + Requires<[KernelCode]>; + def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper mcsym:$dst)), diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 481a9be..1fca466f 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1946,16 +1946,14 @@ static void addConstantComments(const MachineInstr *MI, CASE_ARITH_RM(PMADDUBSW) { unsigned SrcIdx = getSrcIdx(MI, 1); if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { - if (C->getType()->getScalarSizeInBits() == 8) { - std::string Comment; - raw_string_ostream CS(Comment); - unsigned VectorWidth = - X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); - CS << "["; - printConstant(C, VectorWidth, CS); - CS << "]"; - OutStreamer.AddComment(CS.str()); - } + std::string Comment; + raw_string_ostream CS(Comment); + unsigned VectorWidth = + X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); + CS << "["; + printConstant(C, VectorWidth, CS); + CS << "]"; + OutStreamer.AddComment(CS.str()); } break; } @@ -1967,16 +1965,14 @@ static void addConstantComments(const MachineInstr *MI, CASE_ARITH_RM(PMULHRSW) { unsigned SrcIdx = getSrcIdx(MI, 1); if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { - if (C->getType()->getScalarSizeInBits() == 16) { - std::string Comment; - raw_string_ostream CS(Comment); - unsigned VectorWidth = - X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); - CS << "["; - printConstant(C, VectorWidth, CS); - CS << "]"; - OutStreamer.AddComment(CS.str()); - } + std::string Comment; + raw_string_ostream CS(Comment); + unsigned VectorWidth = + X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); + CS << "["; + printConstant(C, VectorWidth, CS); + CS << "]"; + OutStreamer.AddComment(CS.str()); } break; } |