aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/AST/ByteCode/InterpBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/AST/ByteCode/InterpBuiltin.cpp')
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp343
1 files changed, 306 insertions, 37 deletions
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 922d679..5838cf8 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -23,7 +23,7 @@
namespace clang {
namespace interp {
-LLVM_ATTRIBUTE_UNUSED static bool isNoopBuiltin(unsigned ID) {
+[[maybe_unused]] static bool isNoopBuiltin(unsigned ID) {
switch (ID) {
case Builtin::BIas_const:
case Builtin::BIforward:
@@ -1633,8 +1633,8 @@ static bool interp__builtin_elementwise_countzeroes(InterpState &S,
const InterpFrame *Frame,
const CallExpr *Call,
unsigned BuiltinID) {
- const bool HasZeroArg = Call->getNumArgs() == 2;
- const bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
+ bool HasZeroArg = Call->getNumArgs() == 2;
+ bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
assert(Call->getNumArgs() == 1 || HasZeroArg);
if (Call->getArg(0)->getType()->isIntegerType()) {
PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
@@ -2447,18 +2447,18 @@ interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
const Pointer &Dst = S.Stk.peek<Pointer>();
const ASTContext &ASTCtx = S.getASTContext();
- const unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType());
- const unsigned LHSVecLen = VT0->getNumElements();
- const unsigned SrcPerLane = 128 / SrcBits;
- const unsigned Lanes = LHSVecLen * SrcBits / 128;
+ unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType());
+ unsigned LHSVecLen = VT0->getNumElements();
+ unsigned SrcPerLane = 128 / SrcBits;
+ unsigned Lanes = LHSVecLen * SrcBits / 128;
PrimType SrcT = *S.getContext().classify(VT0->getElementType());
PrimType DstT = *S.getContext().classify(getElemType(Dst));
- const bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType();
+ bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType();
for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
- const unsigned BaseSrc = Lane * SrcPerLane;
- const unsigned BaseDst = Lane * (2 * SrcPerLane);
+ unsigned BaseSrc = Lane * SrcPerLane;
+ unsigned BaseDst = Lane * (2 * SrcPerLane);
for (unsigned I = 0; I != SrcPerLane; ++I) {
INT_TYPE_SWITCH_NO_BOOL(SrcT, {
@@ -2587,6 +2587,82 @@ static bool interp__builtin_ia32_pmul(
return true;
}
+static bool interp_builtin_horizontal_int_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VT->getElementType());
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ unsigned NumElts = VT->getNumElements();
+ unsigned EltBits = S.getASTContext().getIntWidth(VT->getElementType());
+ unsigned EltsPerLane = 128 / EltBits;
+ unsigned Lanes = NumElts * EltBits / 128;
+ unsigned DestIndex = 0;
+
+ for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+ unsigned LaneStart = Lane * EltsPerLane;
+ for (unsigned I = 0; I < EltsPerLane; I += 2) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
+ APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt();
+ APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned);
+ Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
+ });
+ }
+
+ for (unsigned I = 0; I < EltsPerLane; I += 2) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
+ APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt();
+ APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned);
+ Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
+ });
+ }
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp_builtin_horizontal_fp_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APFloat(const APFloat &, const APFloat &,
+ llvm::RoundingMode)>
+ Fn) {
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+ llvm::RoundingMode RM = getRoundingMode(FPO);
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+
+ unsigned NumElts = VT->getNumElements();
+ unsigned EltBits = S.getASTContext().getTypeSize(VT->getElementType());
+ unsigned NumLanes = NumElts * EltBits / 128;
+ unsigned NumElemsPerLane = NumElts / NumLanes;
+ unsigned HalfElemsPerLane = NumElemsPerLane / 2;
+
+ for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
+ using T = PrimConv<PT_Float>::T;
+ for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
+ APFloat Elem1 = LHS.elem<T>(L + (2 * E) + 0).getAPFloat();
+ APFloat Elem2 = LHS.elem<T>(L + (2 * E) + 1).getAPFloat();
+ Dst.elem<T>(L + E) = static_cast<T>(Fn(Elem1, Elem2, RM));
+ }
+ for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
+ APFloat Elem1 = RHS.elem<T>(L + (2 * E) + 0).getAPFloat();
+ APFloat Elem2 = RHS.elem<T>(L + (2 * E) + 1).getAPFloat();
+ Dst.elem<T>(L + E + HalfElemsPerLane) =
+ static_cast<T>(Fn(Elem1, Elem2, RM));
+ }
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_elementwise_triop_fp(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APFloat(const APFloat &, const APFloat &,
@@ -2596,9 +2672,9 @@ static bool interp__builtin_elementwise_triop_fp(
FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
llvm::RoundingMode RM = getRoundingMode(FPO);
- const QualType Arg1Type = Call->getArg(0)->getType();
- const QualType Arg2Type = Call->getArg(1)->getType();
- const QualType Arg3Type = Call->getArg(2)->getType();
+ QualType Arg1Type = Call->getArg(0)->getType();
+ QualType Arg2Type = Call->getArg(1)->getType();
+ QualType Arg3Type = Call->getArg(2)->getType();
// Non-vector floating point types.
if (!Arg1Type->isVectorType()) {
@@ -2621,16 +2697,16 @@ static bool interp__builtin_elementwise_triop_fp(
assert(Arg1Type->isVectorType() && Arg2Type->isVectorType() &&
Arg3Type->isVectorType());
- const VectorType *VecT = Arg1Type->castAs<VectorType>();
- const QualType ElemT = VecT->getElementType();
- unsigned NumElems = VecT->getNumElements();
+ const VectorType *VecTy = Arg1Type->castAs<VectorType>();
+ QualType ElemQT = VecTy->getElementType();
+ unsigned NumElems = VecTy->getNumElements();
- assert(ElemT == Arg2Type->castAs<VectorType>()->getElementType() &&
- ElemT == Arg3Type->castAs<VectorType>()->getElementType());
+ assert(ElemQT == Arg2Type->castAs<VectorType>()->getElementType() &&
+ ElemQT == Arg3Type->castAs<VectorType>()->getElementType());
assert(NumElems == Arg2Type->castAs<VectorType>()->getNumElements() &&
NumElems == Arg3Type->castAs<VectorType>()->getNumElements());
- assert(ElemT->isRealFloatingType());
- (void)ElemT;
+ assert(ElemQT->isRealFloatingType());
+ (void)ElemQT;
const Pointer &VZ = S.Stk.pop<Pointer>();
const Pointer &VY = S.Stk.pop<Pointer>();
@@ -2714,6 +2790,34 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
+ const Pointer &Control = S.Stk.pop<Pointer>();
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ unsigned NumElems = Dst.getNumElems();
+ assert(NumElems == Control.getNumElems());
+ assert(NumElems == Dst.getNumElems());
+
+ for (unsigned Idx = 0; Idx != NumElems; ++Idx) {
+ uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx));
+
+ if (Ctlb & 0x80) {
+ Dst.elem<int8_t>(Idx) = 0;
+ } else {
+ unsigned LaneBase = (Idx / 16) * 16;
+ unsigned SrcOffset = Ctlb & 0x0F;
+ unsigned SrcIdx = LaneBase + SrcOffset;
+
+ Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx);
+ }
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool IsShufHW) {
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
@@ -2756,6 +2860,45 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_test_op(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+
+ assert(LHS.getNumElems() == RHS.getNumElems());
+
+ unsigned SourceLen = LHS.getNumElems();
+ QualType ElemQT = getElemType(LHS);
+ OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
+
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+
+ if (ElemQT->isIntegerType()) { // Get value.
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ ALane = LHS.elem<T>(I).toAPSInt();
+ BLane = RHS.elem<T>(I).toAPSInt();
+ });
+ } else if (ElemQT->isFloatingType()) { // Get only sign bit.
+ using T = PrimConv<PT_Float>::T;
+ ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
+ BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt().isNegative();
+ } else { // Must be integer or floating type.
+ return false;
+ }
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ pushInteger(S, Fn(AWide, BWide), Call->getType());
+ return true;
+}
+
static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -2775,7 +2918,7 @@ static bool interp__builtin_elementwise_triop(
}
const auto *VecT = Arg0Type->castAs<VectorType>();
- const PrimType &ElemT = *S.getContext().classify(VecT->getElementType());
+ PrimType ElemT = *S.getContext().classify(VecT->getElementType());
unsigned NumElems = VecT->getNumElements();
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
@@ -2847,9 +2990,9 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
unsigned Lane = static_cast<unsigned>(Index % NumLanes);
unsigned InsertPos = Lane * SubElements;
- PrimType ElemPT = BaseVec.getFieldDesc()->getPrimType();
+ PrimType ElemT = BaseVec.getFieldDesc()->getPrimType();
- TYPE_SWITCH(ElemPT, {
+ TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != BaseElements; ++I)
Dst.elem<T>(I) = BaseVec.elem<T>(I);
for (unsigned I = 0; I != SubElements; ++I)
@@ -2872,12 +3015,12 @@ static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
const Pointer &Dst = S.Stk.peek<Pointer>();
unsigned DstLen = A.getNumElems();
- const QualType ElemQT = getElemType(A);
- const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ QualType ElemQT = getElemType(A);
+ OptPrimType ElemT = S.getContext().classify(ElemQT);
unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
- INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
for (unsigned I = 0; I != DstLen; ++I) {
APInt ALane = A.elem<T>(I).toAPSInt();
APInt BLane = B.elem<T>(I).toAPSInt();
@@ -2916,13 +3059,13 @@ static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
unsigned Index =
static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
- PrimType ElemPT = Vec.getFieldDesc()->getPrimType();
+ PrimType ElemT = Vec.getFieldDesc()->getPrimType();
// FIXME(#161685): Replace float+int split with a numeric-only type switch
- if (ElemPT == PT_Float) {
+ if (ElemT == PT_Float) {
S.Stk.push<Floating>(Vec.elem<Floating>(Index));
return true;
}
- INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
APSInt V = Vec.elem<T>(Index).toAPSInt();
pushInteger(S, V, Call->getType());
});
@@ -2947,8 +3090,8 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
unsigned Index =
static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
- PrimType ElemPT = Base.getFieldDesc()->getPrimType();
- INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
+ PrimType ElemT = Base.getFieldDesc()->getPrimType();
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
for (unsigned I = 0; I != NumElems; ++I)
Dst.elem<T>(I) = Base.elem<T>(I);
Dst.elem<T>(Index) = static_cast<T>(ValAPS);
@@ -2958,6 +3101,33 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 1);
+
+ QualType Arg0Type = Call->getArg(0)->getType();
+ const auto *VecT = Arg0Type->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VecT->getElementType());
+ unsigned NumElems = VecT->getNumElements();
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ for (unsigned I = 0; I != NumElems; ++I) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt ElemI = Src.elem<T>(I).toAPSInt();
+ APInt ConflictMask(ElemI.getBitWidth(), 0);
+ for (unsigned J = 0; J != I; ++J) {
+ APSInt ElemJ = Src.elem<T>(J).toAPSInt();
+ ConflictMask.setBitVal(J, ElemI == ElemJ);
+ }
+ Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
+ });
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3115,14 +3285,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__builtin_parityl:
case Builtin::BI__builtin_parityll:
return interp__builtin_elementwise_int_unaryop(
- S, OpPC, Call, [](const APSInt &Val) -> APInt {
+ S, OpPC, Call, [](const APSInt &Val) {
return APInt(Val.getBitWidth(), Val.popcount() % 2);
});
case Builtin::BI__builtin_clrsb:
case Builtin::BI__builtin_clrsbl:
case Builtin::BI__builtin_clrsbll:
return interp__builtin_elementwise_int_unaryop(
- S, OpPC, Call, [](const APSInt &Val) -> APInt {
+ S, OpPC, Call, [](const APSInt &Val) {
return APInt(Val.getBitWidth(),
Val.getBitWidth() - Val.getSignificantBits());
});
@@ -3131,8 +3301,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__builtin_bitreverse32:
case Builtin::BI__builtin_bitreverse64:
return interp__builtin_elementwise_int_unaryop(
- S, OpPC, Call,
- [](const APSInt &Val) -> APInt { return Val.reverseBits(); });
+ S, OpPC, Call, [](const APSInt &Val) { return Val.reverseBits(); });
case Builtin::BI__builtin_classify_type:
return interp__builtin_classify_type(S, OpPC, Frame, Call);
@@ -3626,6 +3795,53 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
+ case clang::X86::BI__builtin_ia32_phaddw128:
+ case clang::X86::BI__builtin_ia32_phaddw256:
+ case clang::X86::BI__builtin_ia32_phaddd128:
+ case clang::X86::BI__builtin_ia32_phaddd256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
+ case clang::X86::BI__builtin_ia32_phaddsw128:
+ case clang::X86::BI__builtin_ia32_phaddsw256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
+ case clang::X86::BI__builtin_ia32_phsubw128:
+ case clang::X86::BI__builtin_ia32_phsubw256:
+ case clang::X86::BI__builtin_ia32_phsubd128:
+ case clang::X86::BI__builtin_ia32_phsubd256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
+ case clang::X86::BI__builtin_ia32_phsubsw128:
+ case clang::X86::BI__builtin_ia32_phsubsw256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
+ case clang::X86::BI__builtin_ia32_haddpd:
+ case clang::X86::BI__builtin_ia32_haddps:
+ case clang::X86::BI__builtin_ia32_haddpd256:
+ case clang::X86::BI__builtin_ia32_haddps256:
+ return interp_builtin_horizontal_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
+ APFloat F = LHS;
+ F.add(RHS, RM);
+ return F;
+ });
+ case clang::X86::BI__builtin_ia32_hsubpd:
+ case clang::X86::BI__builtin_ia32_hsubps:
+ case clang::X86::BI__builtin_ia32_hsubpd256:
+ case clang::X86::BI__builtin_ia32_hsubps256:
+ return interp_builtin_horizontal_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
+ APFloat F = LHS;
+ F.subtract(RHS, RM);
+ return F;
+ });
+
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
@@ -3656,6 +3872,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return F;
});
+ case X86::BI__builtin_ia32_vpmadd52luq128:
+ case X86::BI__builtin_ia32_vpmadd52luq256:
+ case X86::BI__builtin_ia32_vpmadd52luq512:
+ return interp__builtin_elementwise_triop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+ return A + (B.trunc(52) * C.trunc(52)).zext(64);
+ });
+ case X86::BI__builtin_ia32_vpmadd52huq128:
+ case X86::BI__builtin_ia32_vpmadd52huq256:
+ case X86::BI__builtin_ia32_vpmadd52huq512:
+ return interp__builtin_elementwise_triop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+ return A + llvm::APIntOps::mulhu(B.trunc(52), C.trunc(52)).zext(64);
+ });
+
case X86::BI__builtin_ia32_vpshldd128:
case X86::BI__builtin_ia32_vpshldd256:
case X86::BI__builtin_ia32_vpshldd512:
@@ -3686,7 +3917,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
[](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
return llvm::APIntOps::fshr(Hi, Lo, Amt);
});
-
+ case X86::BI__builtin_ia32_vpconflictsi_128:
+ case X86::BI__builtin_ia32_vpconflictsi_256:
+ case X86::BI__builtin_ia32_vpconflictsi_512:
+ case X86::BI__builtin_ia32_vpconflictdi_128:
+ case X86::BI__builtin_ia32_vpconflictdi_256:
+ case X86::BI__builtin_ia32_vpconflictdi_512:
+ return interp__builtin_ia32_vpconflict(S, OpPC, Call);
case clang::X86::BI__builtin_ia32_blendpd:
case clang::X86::BI__builtin_ia32_blendpd256:
case clang::X86::BI__builtin_ia32_blendps:
@@ -3712,7 +3949,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});
-
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call,
+ [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call,
+ [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call, [](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
+ });
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
@@ -3739,6 +4003,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_selectpd_512:
return interp__builtin_select(S, OpPC, Call);
+ case X86::BI__builtin_ia32_pshufb128:
+ case X86::BI__builtin_ia32_pshufb256:
+ case X86::BI__builtin_ia32_pshufb512:
+ return interp__builtin_ia32_pshufb(S, OpPC, Call);
+
case X86::BI__builtin_ia32_pshuflw:
case X86::BI__builtin_ia32_pshuflw256:
case X86::BI__builtin_ia32_pshuflw512: