diff options
author | Aditya Chaudhari <98672108+AdityaC4@users.noreply.github.com> | 2025-09-18 11:41:49 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-18 16:41:49 +0000 |
commit | 0ade3461ffd78ba91e7abf69ee499d4befe009df (patch) | |
tree | 92fa242af4f408a875aa21b7021fab514bc31aad /clang/lib/AST/ByteCode/InterpBuiltin.cpp | |
parent | b8649098a7fcf598406d8d8b7d68891d1444e9c8 (diff) | |
download | llvm-0ade3461ffd78ba91e7abf69ee499d4befe009df.zip llvm-0ade3461ffd78ba91e7abf69ee499d4befe009df.tar.gz llvm-0ade3461ffd78ba91e7abf69ee499d4befe009df.tar.bz2 |
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX/AVX512 subvector insertion intrinsics to be used in constexpr #157709 (#158778)
AVX/AVX512 vector insert intrinsics now support constexpr evaluation in both the AST evaluator and bytecode interpreter paths.
FIXES: #157709
Diffstat (limited to 'clang/lib/AST/ByteCode/InterpBuiltin.cpp')
-rw-r--r-- | clang/lib/AST/ByteCode/InterpBuiltin.cpp | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b7b6d65..64962ee 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2914,6 +2914,48 @@ static bool interp__builtin_elementwise_triop( return true; } +static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 3); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(2)); + uint64_t Index = ImmAPS.getZExtValue(); + + const Pointer &SubVec = S.Stk.pop<Pointer>(); + if (!SubVec.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &BaseVec = S.Stk.pop<Pointer>(); + if (!BaseVec.getFieldDesc()->isPrimitiveArray()) + return false; + + const Pointer &Dst = S.Stk.peek<Pointer>(); + + unsigned BaseElements = BaseVec.getNumElems(); + unsigned SubElements = SubVec.getNumElems(); + + assert(SubElements != 0 && BaseElements != 0 && + (BaseElements % SubElements) == 0); + + unsigned NumLanes = BaseElements / SubElements; + unsigned Lane = static_cast<unsigned>(Index % NumLanes); + unsigned InsertPos = Lane * SubElements; + + PrimType ElemPT = BaseVec.getFieldDesc()->getPrimType(); + + TYPE_SWITCH(ElemPT, { + for (unsigned I = 0; I != BaseElements; ++I) + Dst.elem<T>(I) = BaseVec.elem<T>(I); + for (unsigned I = 0; I != SubElements; ++I) + Dst.elem<T>(InsertPos + I) = SubVec.elem<T>(I); + }); + + Dst.initializeAllElements(); + + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3572,6 +3614,24 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshr); + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti32x4: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: + return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) |