diff options
author | James Y Knight <jyknight@google.com> | 2024-07-25 09:19:22 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-25 09:19:22 -0400 |
commit | dfeb3991fb489a703f631ab0c34b58f80568038d (patch) | |
tree | 7117ce620e5bf49aef8810d5651c4aba2b31499e /llvm/lib | |
parent | 0fedfd83d75415837eb91f56ec24f4b392bf6c57 (diff) | |
download | llvm-dfeb3991fb489a703f631ab0c34b58f80568038d.zip llvm-dfeb3991fb489a703f631ab0c34b58f80568038d.tar.gz llvm-dfeb3991fb489a703f631ab0c34b58f80568038d.tar.bz2 |
Remove the `x86_mmx` IR type. (#98505)
It is now translated to `<1 x i64>`, which allows the removal of a bunch
of special casing.
This _incompatibly_ changes the ABI of any LLVM IR function with
`x86_mmx` arguments or returns: instead of passing in mmx registers,
they will now be passed via integer registers. However, the real-world
incompatibility caused by this is expected to be minimal, because Clang
never uses the x86_mmx type -- it lowers `__m64` to either `<1 x i64>`
or `double`, depending on ABI.
This change does _not_ eliminate the SelectionDAG `MVT::x86mmx` type.
That type simply no longer corresponds to an IR type, and is used only
by MMX intrinsics and inline-asm operands.
Because SelectionDAGBuilder only knows how to generate the
operands/results of intrinsics based on the IR type, it thus now
generates the intrinsics with the type MVT::v1i64, instead of
MVT::x86mmx. We need to fix this before the DAG LegalizeTypes, and thus
have the X86 backend fix them up in DAGCombine. (This may be a
short-lived hack, if all the MMX intrinsics can be removed in upcoming
changes.)
Works towards issue #98272.
Diffstat (limited to 'llvm/lib')
23 files changed, 245 insertions, 130 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index df75745..85ee231 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -564,16 +564,14 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, Type *MapTy = Type::getIntNTy(C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedValue()); if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) { - if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && - !LoadTy->isX86_AMXTy()) + if (Res->isNullValue() && !LoadTy->isX86_AMXTy()) // Materializing a zero can be done trivially without a bitcast return Constant::getNullValue(LoadTy); Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; Res = FoldBitCast(Res, CastTy, DL); if (LoadTy->isPtrOrPtrVectorTy()) { // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr - if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && - !LoadTy->isX86_AMXTy()) + if (Res->isNullValue() && !LoadTy->isX86_AMXTy()) return Constant::getNullValue(LoadTy); if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) // Be careful not to replace a load of an addrspace value with an inttoptr here @@ -764,7 +762,7 @@ Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty, // uniform. if (!DL.typeSizeEqualsStoreSize(C->getType())) return nullptr; - if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy()) + if (C->isNullValue() && !Ty->isX86_AMXTy()) return Constant::getNullValue(Ty); if (C->isAllOnesValue() && (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy())) diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 7d7fe19..c82e749 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -838,7 +838,8 @@ lltok::Kind LLLexer::LexIdentifier() { TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context)); TYPEKEYWORD("label", Type::getLabelTy(Context)); TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); - TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); + TYPEKEYWORD("x86_mmx", llvm::FixedVectorType::get( + llvm::IntegerType::get(Context, 64), 1)); TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context)); TYPEKEYWORD("token", Type::getTokenTy(Context)); TYPEKEYWORD("ptr", PointerType::getUnqual(Context)); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 84d624f..fd4ae109 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2496,7 +2496,9 @@ Error BitcodeReader::parseTypeTableBody() { ResultTy = Type::getMetadataTy(Context); break; case bitc::TYPE_CODE_X86_MMX: // X86_MMX - ResultTy = Type::getX86_MMXTy(Context); + // Deprecated: decodes as <1 x i64> + ResultTy = + llvm::FixedVectorType::get(llvm::IntegerType::get(Context, 64), 1); break; case bitc::TYPE_CODE_X86_AMX: // X86_AMX ResultTy = Type::getX86_AMXTy(Context); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 324dcbc..52e15e6 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1088,8 +1088,9 @@ void ModuleBitcodeWriter::writeTypeTable() { case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break; case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; - case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; - case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; + case Type::MetadataTyID: + Code = bitc::TYPE_CODE_METADATA; + break; case Type::X86_AMXTyID: Code = bitc::TYPE_CODE_X86_AMX; break; case Type::TokenTyID: Code = bitc::TYPE_CODE_TOKEN; break; case Type::IntegerTyID: diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index b0f736a..0c6b726 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -207,7 +207,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { assert(isExtended() && "Type is not extended!"); return LLVMTy; case MVT::isVoid: return Type::getVoidTy(Context); - case MVT::x86mmx: return Type::getX86_MMXTy(Context); + case MVT::x86mmx: return llvm::FixedVectorType::get(llvm::IntegerType::get(Context, 64), 1); case MVT::aarch64svcount: return TargetExtType::get(Context, "aarch64.svcount"); case MVT::x86amx: return Type::getX86_AMXTy(Context); @@ -241,8 +241,8 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::BFloatTyID: return MVT(MVT::bf16); case Type::FloatTyID: return MVT(MVT::f32); case Type::DoubleTyID: return MVT(MVT::f64); - case Type::X86_FP80TyID: return MVT(MVT::f80); - case Type::X86_MMXTyID: return MVT(MVT::x86mmx); + case Type::X86_FP80TyID: + return MVT(MVT::f80); case Type::TargetExtTyID: { TargetExtType *TargetExtTy = cast<TargetExtType>(Ty); if (TargetExtTy->getName() == "aarch64.svcount") @@ -302,4 +302,3 @@ void MVT::print(raw_ostream &OS) const { else OS << EVT(*this).getEVTString(); } - diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 6599730..01a16cc 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -573,8 +573,9 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) { case Type::FP128TyID: OS << "fp128"; return; case Type::PPC_FP128TyID: OS << "ppc_fp128"; return; case Type::LabelTyID: OS << "label"; return; - case Type::MetadataTyID: OS << "metadata"; return; - case Type::X86_MMXTyID: OS << "x86_mmx"; return; + case Type::MetadataTyID: + OS << "metadata"; + return; case Type::X86_AMXTyID: OS << "x86_amx"; return; case Type::TokenTyID: OS << "token"; return; case Type::IntegerTyID: diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 693674a..05ab096 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -142,7 +142,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, return UndefValue::get(DestTy); } - if (V->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy() && + if (V->isNullValue() && !DestTy->isX86_AMXTy() && opc != Instruction::AddrSpaceCast) return Constant::getNullValue(DestTy); diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index cf7bbf6..17c0bf7 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -609,8 +609,6 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { return LLVMPointerTypeKind; case Type::FixedVectorTyID: return LLVMVectorTypeKind; - case Type::X86_MMXTyID: - return LLVMX86_MMXTypeKind; case Type::X86_AMXTyID: return LLVMX86_AMXTypeKind; case Type::TokenTyID: @@ -725,9 +723,6 @@ LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) { LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C)); } -LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) { - return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C)); -} LLVMTypeRef LLVMX86AMXTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getX86_AMXTy(*unwrap(C)); } @@ -753,9 +748,6 @@ LLVMTypeRef LLVMFP128Type(void) { LLVMTypeRef LLVMPPCFP128Type(void) { return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext()); } -LLVMTypeRef LLVMX86MMXType(void) { - return LLVMX86MMXTypeInContext(LLVMGetGlobalContext()); -} LLVMTypeRef LLVMX86AMXType(void) { return LLVMX86AMXTypeInContext(LLVMGetGlobalContext()); } diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 27411653..17897f7 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -835,7 +835,6 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { // layout. return Align(PowerOf2Ceil(BitWidth / 8)); } - case Type::X86_MMXTyID: case Type::FixedVectorTyID: case Type::ScalableVectorTyID: { unsigned BitWidth = getTypeSizeInBits(Ty).getKnownMinValue(); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 2087198..9b0dd5f 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1052,8 +1052,9 @@ static std::string getMangledTypeStr(Type *Ty, bool &HasUnnamedType) { case Type::DoubleTyID: Result += "f64"; break; case Type::X86_FP80TyID: Result += "f80"; break; case Type::FP128TyID: Result += "f128"; break; - case Type::PPC_FP128TyID: Result += "ppcf128"; break; - case Type::X86_MMXTyID: Result += "x86mmx"; break; + case Type::PPC_FP128TyID: + Result += "ppcf128"; + break; case Type::X86_AMXTyID: Result += "x86amx"; break; case Type::IntegerTyID: Result += "i" + utostr(cast<IntegerType>(Ty)->getBitWidth()); @@ -1397,7 +1398,8 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos, switch (D.Kind) { case IITDescriptor::Void: return Type::getVoidTy(Context); case IITDescriptor::VarArg: return Type::getVoidTy(Context); - case IITDescriptor::MMX: return Type::getX86_MMXTy(Context); + case IITDescriptor::MMX: + return llvm::FixedVectorType::get(llvm::IntegerType::get(Context, 64), 1); case IITDescriptor::AMX: return Type::getX86_AMXTy(Context); case IITDescriptor::Token: return Type::getTokenTy(Context); case IITDescriptor::Metadata: return Type::getMetadataTy(Context); @@ -1580,7 +1582,11 @@ static bool matchIntrinsicType( switch (D.Kind) { case IITDescriptor::Void: return !Ty->isVoidTy(); case IITDescriptor::VarArg: return true; - case IITDescriptor::MMX: return !Ty->isX86_MMXTy(); + case IITDescriptor::MMX: { + FixedVectorType *VT = dyn_cast<FixedVectorType>(Ty); + return !VT || VT->getNumElements() != 1 || + !VT->getElementType()->isIntegerTy(64); + } case IITDescriptor::AMX: return !Ty->isX86_AMXTy(); case IITDescriptor::Token: return !Ty->isTokenTy(); case IITDescriptor::Metadata: return !Ty->isMetadataTy(); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 7a8cf8c..58ebe7e 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -3116,9 +3116,6 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { if (SrcBits != DestBits) return false; - if (DestTy->isX86_MMXTy() || SrcTy->isX86_MMXTy()) - return false; - return true; } @@ -3228,12 +3225,6 @@ CastInst::getCastOpcode( return IntToPtr; // int -> ptr } llvm_unreachable("Casting pointer to other than pointer or int"); - } else if (DestTy->isX86_MMXTy()) { - if (SrcTy->isVectorTy()) { - assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX"); - return BitCast; // 64-bit vector to MMX - } - llvm_unreachable("Illegal cast to X86_MMX"); } llvm_unreachable("Casting to type that is not first-class"); } diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 0a37617..4f1ef8c 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -40,9 +40,9 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) FloatTy(C, Type::FloatTyID), DoubleTy(C, Type::DoubleTyID), MetadataTy(C, Type::MetadataTyID), TokenTy(C, Type::TokenTyID), X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID), - PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID), - X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8), - Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {} + PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_AMXTy(C, Type::X86_AMXTyID), + Int1Ty(C, 1), Int8Ty(C, 8), Int16Ty(C, 16), Int32Ty(C, 32), + Int64Ty(C, 64), Int128Ty(C, 128) {} LLVMContextImpl::~LLVMContextImpl() { #ifndef NDEBUG diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 937a87d..8e9ca21 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1582,7 +1582,7 @@ public: // Basic type instances. Type VoidTy, LabelTy, HalfTy, BFloatTy, FloatTy, DoubleTy, MetadataTy, TokenTy; - Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy, X86_AMXTy; + Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_AMXTy; IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty; std::unique_ptr<ConstantTokenNone> TheNoneToken; diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 5c61ad9..9ddccce 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -45,7 +45,6 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) { case PPC_FP128TyID : return getPPC_FP128Ty(C); case LabelTyID : return getLabelTy(C); case MetadataTyID : return getMetadataTy(C); - case X86_MMXTyID : return getX86_MMXTy(C); case X86_AMXTyID : return getX86_AMXTy(C); case TokenTyID : return getTokenTy(C); default: @@ -125,14 +124,6 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const { if (isa<VectorType>(this) && isa<VectorType>(Ty)) return getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits(); - // 64-bit fixed width vector types can be losslessly converted to x86mmx. - if (((isa<FixedVectorType>(this)) && Ty->isX86_MMXTy()) && - getPrimitiveSizeInBits().getFixedValue() == 64) - return true; - if ((isX86_MMXTy() && isa<FixedVectorType>(Ty)) && - Ty->getPrimitiveSizeInBits().getFixedValue() == 64) - return true; - // 8192-bit fixed width vector types can be losslessly converted to x86amx. if (((isa<FixedVectorType>(this)) && Ty->isX86_AMXTy()) && getPrimitiveSizeInBits().getFixedValue() == 8192) @@ -179,8 +170,6 @@ TypeSize Type::getPrimitiveSizeInBits() const { return TypeSize::getFixed(128); case Type::PPC_FP128TyID: return TypeSize::getFixed(128); - case Type::X86_MMXTyID: - return TypeSize::getFixed(64); case Type::X86_AMXTyID: return TypeSize::getFixed(8192); case Type::IntegerTyID: @@ -245,7 +234,6 @@ Type *Type::getTokenTy(LLVMContext &C) { return &C.pImpl->TokenTy; } Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; } Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; } Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; } -Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; } Type *Type::getX86_AMXTy(LLVMContext &C) { return &C.pImpl->X86_AMXTy; } IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; } diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 3433408..cd0d6d3 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -1048,9 +1048,6 @@ void DXILBitcodeWriter::writeTypeTable() { case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; - case Type::X86_MMXTyID: - Code = bitc::TYPE_CODE_X86_MMX; - break; case Type::IntegerTyID: // INTEGER: [width] Code = bitc::TYPE_CODE_INTEGER; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index e5d10a7..0c1b0ae 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -329,7 +329,6 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty, case Type::PPC_FP128TyID: case Type::LabelTyID: case Type::MetadataTyID: - case Type::X86_MMXTyID: case Type::X86_AMXTyID: case Type::TokenTyID: case Type::TypedPointerTyID: diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 9ec68bf..c55ff3d 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -168,10 +168,6 @@ def CC_#NAME : CallingConv<[ CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64], CCAssignToStack<8, 4>>, - // MMX type gets 8 byte slot in stack , while alignment depends on target - CCIfSubtarget<"is64Bit()", CCIfType<[x86mmx], CCAssignToStack<8, 8>>>, - CCIfType<[x86mmx], CCAssignToStack<8, 4>>, - // float 128 get stack slots whose size and alignment depends // on the subtarget. CCIfType<[f80, f128], CCAssignToStack<0, 0>>, @@ -286,10 +282,6 @@ def RetCC_X86Common : CallingConv<[ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64], CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>, - // MMX vector types are always returned in MM0. If the target doesn't have - // MM0, it doesn't support these vector types. - CCIfType<[x86mmx], CCAssignToReg<[MM0]>>, - // Long double types are always returned in FP0 (even with SSE), // except on Win64. CCIfNotSubtarget<"isTargetWin64()", CCIfType<[f80], CCAssignToReg<[FP0, FP1]>>> @@ -376,9 +368,6 @@ def RetCC_X86_64_C : CallingConv<[ CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>, CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>, - // MMX vector types are always returned in XMM0. - CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, - // Pointers are always returned in full 64-bit registers. CCIfPtr<CCCustom<"CC_X86_64_Pointer">>, @@ -389,9 +378,6 @@ def RetCC_X86_64_C : CallingConv<[ // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ - // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[x86mmx], CCBitConvertToType<i64>>, - // GCC returns FP values in RAX on Win64. CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i32>>>, CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>, @@ -436,8 +422,6 @@ def RetCC_X86_64_Swift : CallingConv<[ CCIfType<[f64], CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>, CCIfType<[f128], CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>, - // MMX vector types are returned in XMM0, XMM1, XMM2 and XMM3. - CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>, CCDelegateTo<RetCC_X86Common> ]>; @@ -572,12 +556,6 @@ def CC_X86_64_C : CallingConv<[ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>, - // The first 8 MMX vector arguments are passed in XMM registers on Darwin. - CCIfType<[x86mmx], - CCIfSubtarget<"isTargetDarwin()", - CCIfSubtarget<"hasSSE2()", - CCPromoteToType<v2i64>>>>, - // Boolean vectors of AVX-512 are passed in SIMD registers. // The call from AVX to AVX-512 function should work, // since the boolean types in AVX/AVX2 are promoted by default. @@ -666,9 +644,6 @@ def CC_X86_Win64_C : CallingConv<[ // Long doubles are passed by pointer CCIfType<[f80], CCPassIndirect<i64>>, - // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[x86mmx], CCBitConvertToType<i64>>, - // If SSE was disabled, pass FP values smaller than 64-bits as integers in // GPRs or on the stack. CCIfType<[f32], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i32>>>, @@ -843,11 +818,6 @@ def CC_X86_32_Common : CallingConv<[ CCIfNotVarArg<CCIfInReg<CCIfType<[f16], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, - // The first 3 __m64 vector arguments are passed in mmx registers if the - // call is not a vararg call. - CCIfNotVarArg<CCIfType<[x86mmx], - CCAssignToReg<[MM0, MM1, MM2]>>>, - CCIfType<[f16], CCAssignToStack<4, 4>>, // Integer/Float values get stored in stack slots that are 4 bytes in @@ -870,10 +840,6 @@ def CC_X86_32_Common : CallingConv<[ CCIfType<[v32i1], CCPromoteToType<v32i8>>, CCIfType<[v64i1], CCPromoteToType<v64i8>>, - // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are - // passed in the parameter area. - CCIfType<[x86mmx], CCAssignToStack<8, 4>>, - // Darwin passes vectors in a form that differs from the i386 psABI CCIfSubtarget<"isTargetDarwin()", CCDelegateTo<CC_X86_32_Vector_Darwin>>, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7f2c98d..ad59b13 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2554,7 +2554,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::FP_EXTEND, ISD::STRICT_FP_EXTEND, ISD::FP_ROUND, - ISD::STRICT_FP_ROUND}); + ISD::STRICT_FP_ROUND, + ISD::INTRINSIC_VOID, + ISD::INTRINSIC_WO_CHAIN, + ISD::INTRINSIC_W_CHAIN}); computeRegisterProperties(Subtarget.getRegisterInfo()); @@ -27269,6 +27272,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, llvm_unreachable("Unsupported truncstore intrinsic"); } } + case INTR_TYPE_CAST_MMX: + return SDValue(); // handled in combineINTRINSIC_* } } @@ -57761,6 +57766,86 @@ static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Fixup the MMX intrinsics' types: in IR they are expressed with <1 x i64>, +// and so SelectionDAGBuilder creates them with v1i64 types, but they need to +// use x86mmx instead. +static SDValue FixupMMXIntrinsicTypes(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + + bool MadeChange = false, CastReturnVal = false; + SmallVector<SDValue, 8> Args; + for (const SDValue &Arg : N->op_values()) { + if (Arg.getValueType() == MVT::v1i64) { + MadeChange = true; + Args.push_back(DAG.getBitcast(MVT::x86mmx, Arg)); + } else + Args.push_back(Arg); + } + SDVTList VTs = N->getVTList(); + SDVTList NewVTs = VTs; + if (VTs.NumVTs > 0 && VTs.VTs[0] == MVT::v1i64) { + SmallVector<EVT> NewVTArr(ArrayRef<EVT>(VTs.VTs, VTs.NumVTs)); + NewVTArr[0] = MVT::x86mmx; + NewVTs = DAG.getVTList(NewVTArr); + MadeChange = true; + CastReturnVal = true; + } + + if (MadeChange) { + SDValue Result = DAG.getNode(N->getOpcode(), dl, NewVTs, Args); + if (CastReturnVal) { + SmallVector<SDValue, 2> Returns; + for (unsigned i = 0, e = Result->getNumValues(); i != e; ++i) + Returns.push_back(Result.getValue(i)); + Returns[0] = DAG.getBitcast(MVT::v1i64, Returns[0]); + return DAG.getMergeValues(Returns, dl); + } + return Result; + } + return SDValue(); +} +static SDValue combineINTRINSIC_WO_CHAIN(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (!DCI.isBeforeLegalize()) + return SDValue(); + + unsigned IntNo = N->getConstantOperandVal(0); + const IntrinsicData *IntrData = getIntrinsicWithoutChain(IntNo); + + if (IntrData && IntrData->Type == INTR_TYPE_CAST_MMX) + return FixupMMXIntrinsicTypes(N, DAG); + + return SDValue(); +} + +static SDValue combineINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (!DCI.isBeforeLegalize()) + return SDValue(); + + unsigned IntNo = N->getConstantOperandVal(1); + const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); + + if (IntrData && IntrData->Type == INTR_TYPE_CAST_MMX) + return FixupMMXIntrinsicTypes(N, DAG); + + return SDValue(); +} + +static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (!DCI.isBeforeLegalize()) + return SDValue(); + + unsigned IntNo = N->getConstantOperandVal(1); + const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); + + if (IntrData && IntrData->Type == INTR_TYPE_CAST_MMX) + return FixupMMXIntrinsicTypes(N, DAG); + + return SDValue(); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -57951,7 +58036,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI); case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG); case X86ISD::PDEP: return combinePDEP(N, DAG, DCI); - // clang-format on + case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI); + case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI); + case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI); + // clang-format on } return SDValue(); diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 322cb6f..793d62b 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -623,11 +623,13 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II, if (isa<UndefValue>(Arg)) return Constant::getNullValue(ResTy); - auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType()); - // We can't easily peek through x86_mmx types. - if (!ArgTy) + // Preserve previous behavior and give up. + // TODO: treat as <8 x i8>. + if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) return nullptr; + auto *ArgTy = cast<FixedVectorType>(Arg->getType()); + // Expand MOVMSK to compare/bitcast/zext: // e.g. PMOVMSKB(v16i8 x): // %cmp = icmp slt <16 x i8> %x, zeroinitializer diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 717541c..685daca3 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -73,7 +73,8 @@ enum IntrinsicType : uint16_t { GATHER_AVX2, ROUNDP, ROUNDS, - RDPRU + RDPRU, + INTR_TYPE_CAST_MMX }; struct IntrinsicData { @@ -323,6 +324,8 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, 0, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0), X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0), + X86_INTRINSIC_DATA(mmx_maskmovq, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_movnt_dq, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(rdpmc, RDPMC, X86::RDPMC, 0), X86_INTRINSIC_DATA(rdpru, RDPRU, X86::RDPRU, 0), X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0), @@ -1495,6 +1498,75 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_ps_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), + + X86_INTRINSIC_DATA(mmx_packssdw, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_packsswb, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_packuswb, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_padd_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_padd_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_padd_q, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_padd_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_padds_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_padds_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_paddus_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_paddus_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_palignr_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pand, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pandn, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pavg_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pavg_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pcmpeq_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pcmpeq_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pcmpeq_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pcmpgt_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pcmpgt_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pcmpgt_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pextr_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pinsr_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmadd_wd, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmaxs_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmaxu_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmins_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pminu_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmovmskb, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmulh_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmulhu_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmull_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pmulu_dq, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_por, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psad_bw, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psll_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psll_q, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psll_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pslli_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pslli_q, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pslli_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psra_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psra_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psrai_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psrai_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psrl_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psrl_q, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psrl_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psrli_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psrli_q, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psrli_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psub_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psub_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psub_q, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psub_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psubs_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psubs_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psubus_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_psubus_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_punpckhbw, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_punpckhdq, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_punpckhwd, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_punpcklbw, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_punpckldq, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_punpcklwd, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(mmx_pxor, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(sse_cmp_ss, INTR_TYPE_3OP, X86ISD::FSETCC, 0), X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ), @@ -1503,8 +1575,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse_comile_ss, COMI, X86ISD::COMI, ISD::SETLE), X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT), X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE), + X86_INTRINSIC_DATA(sse_cvtpd2pi, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(sse_cvtpi2pd, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(sse_cvtpi2ps, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(sse_cvtps2pi, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(sse_cvtss2si, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0), X86_INTRINSIC_DATA(sse_cvtss2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0), + X86_INTRINSIC_DATA(sse_cvttpd2pi, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(sse_cvttps2pi, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(sse_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0), X86_INTRINSIC_DATA(sse_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0), X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0), @@ -1512,6 +1590,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0), X86_INTRINSIC_DATA(sse_min_ss, INTR_TYPE_2OP, X86ISD::FMINS, 0), X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), + X86_INTRINSIC_DATA(sse_pshuf_w, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0), X86_INTRINSIC_DATA(sse_ucomieq_ss, COMI, X86ISD::UCOMI, ISD::SETEQ), @@ -1593,14 +1672,29 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse41_round_ss, ROUNDS, X86ISD::VRNDSCALES, 0), X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0), X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP_IMM8, X86ISD::INSERTQI, 0), + X86_INTRINSIC_DATA(ssse3_pabs_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(ssse3_pabs_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(ssse3_pabs_w, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(ssse3_phadd_d, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0), + X86_INTRINSIC_DATA(ssse3_phadd_sw, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(ssse3_phadd_w, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0), + X86_INTRINSIC_DATA(ssse3_phsub_d, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0), + X86_INTRINSIC_DATA(ssse3_phsub_sw, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(ssse3_phsub_w, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(ssse3_phsub_w_128, INTR_TYPE_2OP, X86ISD::HSUB, 0), + X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0), + X86_INTRINSIC_DATA(ssse3_pmul_hr_sw, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0), + X86_INTRINSIC_DATA(ssse3_pshuf_b, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(ssse3_psign_b, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(ssse3_psign_d, INTR_TYPE_CAST_MMX, 0, 0), + X86_INTRINSIC_DATA(ssse3_psign_w, INTR_TYPE_CAST_MMX, 0, 0), X86_INTRINSIC_DATA(subborrow_32, ADX, X86ISD::SBB, X86ISD::SUB), X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB), X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTRI, 0), diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index a164c82..f5a7ab2 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -962,8 +962,7 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) { } else if (NewCB->getType()->isVoidTy()) { // If the return value is dead, replace any uses of it with poison // (any non-debug value uses will get removed later on). - if (!CB.getType()->isX86_MMXTy()) - CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); + CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); } else { assert((RetTy->isStructTy() || RetTy->isArrayTy()) && "Return type changed, but not into a void. The old return type" @@ -1027,8 +1026,7 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) { } else { // If this argument is dead, replace any uses of it with poison // (any non-debug value uses will get removed later on). - if (!I->getType()->isX86_MMXTy()) - I->replaceAllUsesWith(PoisonValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); } // If we change the return value of the function we must rewrite any return diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 8f83047..ed78201 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2679,13 +2679,6 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { return replaceInstUsesWith(CI, Src); if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) { - // Beware: messing with this target-specific oddity may cause trouble. - if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) { - Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType()); - return InsertElementInst::Create(PoisonValue::get(DestTy), Elem, - Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - } - if (isa<IntegerType>(SrcTy)) { // If this is a cast from an integer to vector, check to see if the input // is a trunc or zext of a bitcast from vector. If so, we can replace all diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index c979e81..910c36f 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2987,8 +2987,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// Caller guarantees that this intrinsic does not access memory. bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) { Type *RetTy = I.getType(); - if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy() || - RetTy->isX86_MMXTy())) + if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy())) return false; unsigned NumArgOperands = I.arg_size(); @@ -3218,7 +3217,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } - // Get an X86_MMX-sized vector type. + // Get an MMX-sized vector type. Type *getMMXVectorTy(unsigned EltSizeInBits) { const unsigned X86_MMXSizeInBits = 64; assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 && @@ -3264,20 +3263,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // packs elements of 2 input vectors into half as many bits with saturation. // Shadow is propagated with the signed variant of the same intrinsic applied // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer). - // EltSizeInBits is used only for x86mmx arguments. - void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) { + // MMXEltSizeInBits is used only for x86mmx arguments. + void handleVectorPackIntrinsic(IntrinsicInst &I, + unsigned MMXEltSizeInBits = 0) { assert(I.arg_size() == 2); - bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); IRBuilder<> IRB(&I); Value *S1 = getShadow(&I, 0); Value *S2 = getShadow(&I, 1); - assert(isX86_MMX || S1->getType()->isVectorTy()); + assert(S1->getType()->isVectorTy()); // SExt and ICmpNE below must apply to individual elements of input vectors. // In case of x86mmx arguments, cast them to appropriate vector types and // back. - Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType(); - if (isX86_MMX) { + Type *T = + MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits) : S1->getType(); + if (MMXEltSizeInBits) { S1 = IRB.CreateBitCast(S1, T); S2 = IRB.CreateBitCast(S2, T); } @@ -3285,10 +3285,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRB.CreateSExt(IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T); Value *S2_ext = IRB.CreateSExt(IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T); - if (isX86_MMX) { - Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C); - S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy); - S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy); + if (MMXEltSizeInBits) { + S1_ext = IRB.CreateBitCast(S1_ext, getMMXVectorTy(64)); + S2_ext = IRB.CreateBitCast(S2_ext, getMMXVectorTy(64)); } Function *ShadowFn = Intrinsic::getDeclaration( @@ -3296,7 +3295,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *S = IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack"); - if (isX86_MMX) + if (MMXEltSizeInBits) S = IRB.CreateBitCast(S, getShadowTy(&I)); setShadow(&I, S); setOriginForNaryOp(I); @@ -3403,10 +3402,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } // Instrument sum-of-absolute-differences intrinsic. - void handleVectorSadIntrinsic(IntrinsicInst &I) { + void handleVectorSadIntrinsic(IntrinsicInst &I, bool IsMMX = false) { const unsigned SignificantBitsPerResultElement = 16; - bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); - Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType(); + Type *ResTy = IsMMX ? IntegerType::get(*MS.C, 64) : I.getType(); unsigned ZeroBitsPerResultElement = ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement; @@ -3425,9 +3423,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Instrument multiply-add intrinsic. void handleVectorPmaddIntrinsic(IntrinsicInst &I, - unsigned EltSizeInBits = 0) { - bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); - Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType(); + unsigned MMXEltSizeInBits = 0) { + Type *ResTy = + MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType(); IRBuilder<> IRB(&I); auto *Shadow0 = getShadow(&I, 0); auto *Shadow1 = getShadow(&I, 1); @@ -4161,6 +4159,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { break; case Intrinsic::x86_mmx_psad_bw: + handleVectorSadIntrinsic(I, true); + break; case Intrinsic::x86_sse2_psad_bw: case Intrinsic::x86_avx2_psad_bw: handleVectorSadIntrinsic(I); @@ -5048,7 +5048,7 @@ struct VarArgAMD64Helper : public VarArgHelperBase { Type *T = arg->getType(); if (T->isX86_FP80Ty()) return AK_Memory; - if (T->isFPOrFPVectorTy() || T->isX86_MMXTy()) + if (T->isFPOrFPVectorTy()) return AK_FloatingPoint; if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) return AK_GeneralPurpose; |