aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/AST/ByteCode/InterpBuiltin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/AST/ByteCode/InterpBuiltin.cpp')
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp128
1 files changed, 128 insertions, 0 deletions
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index ff50e6d..d0b97a1 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3320,6 +3320,65 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_x86_byteshift(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID,
+ llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I,
+ unsigned Shift)>
+ Fn) {
+ assert(Call->getNumArgs() == 2);
+
+ APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
+ uint64_t Shift = ImmAPS.getZExtValue() & 0xff;
+
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ if (!Src.getFieldDesc()->isPrimitiveArray())
+ return false;
+
+ unsigned NumElems = Src.getNumElems();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ PrimType ElemT = Src.getFieldDesc()->getPrimType();
+
+ for (unsigned Lane = 0; Lane != NumElems; Lane += 16) {
+ for (unsigned I = 0; I != 16; ++I) {
+ unsigned Base = Lane + I;
+ APSInt Result = APSInt(Fn(Src, Lane, I, Shift));
+ INT_TYPE_SWITCH_NO_BOOL(ElemT,
+ { Dst.elem<T>(Base) = static_cast<T>(Result); });
+ }
+ }
+
+ Dst.initializeAllElements();
+
+ return true;
+}
+
+static bool interp__builtin_ia32_shuffle_generic(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)>
+ GetSourceIndex) {
+
+ assert(Call->getNumArgs() == 3);
+ unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
+
+ QualType Arg0Type = Call->getArg(0)->getType();
+ const auto *VecT = Arg0Type->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VecT->getElementType());
+ unsigned NumElems = VecT->getNumElements();
+
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
+ auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
+ const Pointer &Src = (SrcVecIdx == 0) ? A : B;
+ TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
+ }
+ Dst.initializeAllElements();
+
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4250,6 +4309,42 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_selectpd_512:
return interp__builtin_select(S, OpPC, Call);
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned NumElemPerLane = 4;
+ unsigned NumSelectableElems = NumElemPerLane / 2;
+ unsigned BitsPerElem = 2;
+ unsigned IndexMask = 0x3;
+ unsigned MaskBits = 8;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned ElemInLane = DstIdx % NumElemPerLane;
+ unsigned LaneOffset = Lane * NumElemPerLane;
+ unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
+ unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
+ unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
+ return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
+ });
+ case X86::BI__builtin_ia32_shufpd:
+ case X86::BI__builtin_ia32_shufpd256:
+ case X86::BI__builtin_ia32_shufpd512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned NumElemPerLane = 2;
+ unsigned NumSelectableElems = NumElemPerLane / 2;
+ unsigned BitsPerElem = 1;
+ unsigned IndexMask = 0x1;
+ unsigned MaskBits = 8;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned ElemInLane = DstIdx % NumElemPerLane;
+ unsigned LaneOffset = Lane * NumElemPerLane;
+ unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
+ unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
+ unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
+ return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
+ });
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
case X86::BI__builtin_ia32_pshufb512:
@@ -4390,6 +4485,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);
+ case X86::BI__builtin_ia32_pslldqi128_byteshift:
+ case X86::BI__builtin_ia32_pslldqi256_byteshift:
+ case X86::BI__builtin_ia32_pslldqi512_byteshift:
+ // These SLLDQ intrinsics always operate on byte elements (8 bits).
+ // The lane width is hardcoded to 16 to match the SIMD register size,
+ // but the algorithm processes one byte per iteration,
+ // so APInt(8, ...) is correct and intentional.
+ return interp__builtin_x86_byteshift(
+ S, OpPC, Call, BuiltinID,
+ [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) {
+ if (I < Shift) {
+ return APInt(8, 0);
+ }
+ return APInt(8, Src.elem<uint8_t>(Lane + I - Shift));
+ });
+
+ case X86::BI__builtin_ia32_psrldqi128_byteshift:
+ case X86::BI__builtin_ia32_psrldqi256_byteshift:
+ case X86::BI__builtin_ia32_psrldqi512_byteshift:
+ // These SRLDQ intrinsics always operate on byte elements (8 bits).
+ // The lane width is hardcoded to 16 to match the SIMD register size,
+ // but the algorithm processes one byte per iteration,
+ // so APInt(8, ...) is correct and intentional.
+ return interp__builtin_x86_byteshift(
+ S, OpPC, Call, BuiltinID,
+ [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) {
+ if (I + Shift < 16) {
+ return APInt(8, Src.elem<uint8_t>(Lane + I + Shift));
+ }
+
+ return APInt(8, 0);
+ });
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)