aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/AST/ByteCode/InterpBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/AST/ByteCode/InterpBuiltin.cpp')
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp138
1 files changed, 138 insertions, 0 deletions
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 84c5ecc..2d3cb6a 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2587,6 +2587,82 @@ static bool interp__builtin_ia32_pmul(
return true;
}
+static bool interp_builtin_horizontal_int_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APInt(const APSInt &, const APSInt &)> Fn) {
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VT->getElementType());
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ unsigned NumElts = VT->getNumElements();
+ unsigned EltBits = S.getASTContext().getIntWidth(VT->getElementType());
+ unsigned EltsPerLane = 128 / EltBits;
+ unsigned Lanes = NumElts * EltBits / 128;
+ unsigned DestIndex = 0;
+
+ for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+ unsigned LaneStart = Lane * EltsPerLane;
+ for (unsigned I = 0; I < EltsPerLane; I += 2) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt Elem1 = LHS.elem<T>(LaneStart + I).toAPSInt();
+ APSInt Elem2 = LHS.elem<T>(LaneStart + I + 1).toAPSInt();
+ APSInt ResL = APSInt(Fn(Elem1, Elem2), DestUnsigned);
+ Dst.elem<T>(DestIndex++) = static_cast<T>(ResL);
+ });
+ }
+
+ for (unsigned I = 0; I < EltsPerLane; I += 2) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt Elem1 = RHS.elem<T>(LaneStart + I).toAPSInt();
+ APSInt Elem2 = RHS.elem<T>(LaneStart + I + 1).toAPSInt();
+ APSInt ResR = APSInt(Fn(Elem1, Elem2), DestUnsigned);
+ Dst.elem<T>(DestIndex++) = static_cast<T>(ResR);
+ });
+ }
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp_builtin_horizontal_fp_binop(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<APFloat(const APFloat &, const APFloat &,
+ llvm::RoundingMode)>
+ Fn) {
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
+ llvm::RoundingMode RM = getRoundingMode(FPO);
+ const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
+
+ unsigned NumElts = VT->getNumElements();
+ unsigned EltBits = S.getASTContext().getTypeSize(VT->getElementType());
+ unsigned NumLanes = NumElts * EltBits / 128;
+ unsigned NumElemsPerLane = NumElts / NumLanes;
+ unsigned HalfElemsPerLane = NumElemsPerLane / 2;
+
+ for (unsigned L = 0; L != NumElts; L += NumElemsPerLane) {
+ using T = PrimConv<PT_Float>::T;
+ for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
+ APFloat Elem1 = LHS.elem<T>(L + (2 * E) + 0).getAPFloat();
+ APFloat Elem2 = LHS.elem<T>(L + (2 * E) + 1).getAPFloat();
+ Dst.elem<T>(L + E) = static_cast<T>(Fn(Elem1, Elem2, RM));
+ }
+ for (unsigned E = 0; E != HalfElemsPerLane; ++E) {
+ APFloat Elem1 = RHS.elem<T>(L + (2 * E) + 0).getAPFloat();
+ APFloat Elem2 = RHS.elem<T>(L + (2 * E) + 1).getAPFloat();
+ Dst.elem<T>(L + E + HalfElemsPerLane) =
+ static_cast<T>(Fn(Elem1, Elem2, RM));
+ }
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
static bool interp__builtin_elementwise_triop_fp(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APFloat(const APFloat &, const APFloat &,
@@ -3665,6 +3741,53 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI__builtin_elementwise_min:
return interp__builtin_elementwise_maxmin(S, OpPC, Call, BuiltinID);
+ case clang::X86::BI__builtin_ia32_phaddw128:
+ case clang::X86::BI__builtin_ia32_phaddw256:
+ case clang::X86::BI__builtin_ia32_phaddd128:
+ case clang::X86::BI__builtin_ia32_phaddd256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
+ case clang::X86::BI__builtin_ia32_phaddsw128:
+ case clang::X86::BI__builtin_ia32_phaddsw256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS.sadd_sat(RHS); });
+ case clang::X86::BI__builtin_ia32_phsubw128:
+ case clang::X86::BI__builtin_ia32_phsubw256:
+ case clang::X86::BI__builtin_ia32_phsubd128:
+ case clang::X86::BI__builtin_ia32_phsubd256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS - RHS; });
+ case clang::X86::BI__builtin_ia32_phsubsw128:
+ case clang::X86::BI__builtin_ia32_phsubsw256:
+ return interp_builtin_horizontal_int_binop(
+ S, OpPC, Call,
+ [](const APSInt &LHS, const APSInt &RHS) { return LHS.ssub_sat(RHS); });
+ case clang::X86::BI__builtin_ia32_haddpd:
+ case clang::X86::BI__builtin_ia32_haddps:
+ case clang::X86::BI__builtin_ia32_haddpd256:
+ case clang::X86::BI__builtin_ia32_haddps256:
+ return interp_builtin_horizontal_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
+ APFloat F = LHS;
+ F.add(RHS, RM);
+ return F;
+ });
+ case clang::X86::BI__builtin_ia32_hsubpd:
+ case clang::X86::BI__builtin_ia32_hsubps:
+ case clang::X86::BI__builtin_ia32_hsubpd256:
+ case clang::X86::BI__builtin_ia32_hsubps256:
+ return interp_builtin_horizontal_fp_binop(
+ S, OpPC, Call,
+ [](const APFloat &LHS, const APFloat &RHS, llvm::RoundingMode RM) {
+ APFloat F = LHS;
+ F.subtract(RHS, RM);
+ return F;
+ });
+
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
@@ -3695,6 +3818,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return F;
});
+ case X86::BI__builtin_ia32_vpmadd52luq128:
+ case X86::BI__builtin_ia32_vpmadd52luq256:
+ case X86::BI__builtin_ia32_vpmadd52luq512:
+ return interp__builtin_elementwise_triop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+ return A + (B.trunc(52) * C.trunc(52)).zext(64);
+ });
+ case X86::BI__builtin_ia32_vpmadd52huq128:
+ case X86::BI__builtin_ia32_vpmadd52huq256:
+ case X86::BI__builtin_ia32_vpmadd52huq512:
+ return interp__builtin_elementwise_triop(
+ S, OpPC, Call, [](const APSInt &A, const APSInt &B, const APSInt &C) {
+ return A + llvm::APIntOps::mulhu(B.trunc(52), C.trunc(52)).zext(64);
+ });
+
case X86::BI__builtin_ia32_vpshldd128:
case X86::BI__builtin_ia32_vpshldd256:
case X86::BI__builtin_ia32_vpshldd512: