diff options
Diffstat (limited to 'clang/lib/AST')
| -rw-r--r-- | clang/lib/AST/ByteCode/InterpBuiltin.cpp | 128 | ||||
| -rw-r--r-- | clang/lib/AST/CommentSema.cpp | 4 | ||||
| -rw-r--r-- | clang/lib/AST/ExprConstant.cpp | 144 | ||||
| -rw-r--r-- | clang/lib/AST/StmtOpenACC.cpp | 44 | 
4 files changed, 309 insertions, 11 deletions
| diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ff50e6d..d0b97a1 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3320,6 +3320,65 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,    return true;  } +static bool interp__builtin_x86_byteshift( +    InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID, +    llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I, +                             unsigned Shift)> +        Fn) { +  assert(Call->getNumArgs() == 2); + +  APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); +  uint64_t Shift = ImmAPS.getZExtValue() & 0xff; + +  const Pointer &Src = S.Stk.pop<Pointer>(); +  if (!Src.getFieldDesc()->isPrimitiveArray()) +    return false; + +  unsigned NumElems = Src.getNumElems(); +  const Pointer &Dst = S.Stk.peek<Pointer>(); +  PrimType ElemT = Src.getFieldDesc()->getPrimType(); + +  for (unsigned Lane = 0; Lane != NumElems; Lane += 16) { +    for (unsigned I = 0; I != 16; ++I) { +      unsigned Base = Lane + I; +      APSInt Result = APSInt(Fn(Src, Lane, I, Shift)); +      INT_TYPE_SWITCH_NO_BOOL(ElemT, +                              { Dst.elem<T>(Base) = static_cast<T>(Result); }); +    } +  } + +  Dst.initializeAllElements(); + +  return true; +} + +static bool interp__builtin_ia32_shuffle_generic( +    InterpState &S, CodePtr OpPC, const CallExpr *Call, +    llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> +        GetSourceIndex) { + +  assert(Call->getNumArgs() == 3); +  unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue(); + +  QualType Arg0Type = Call->getArg(0)->getType(); +  const auto *VecT = Arg0Type->castAs<VectorType>(); +  PrimType ElemT = *S.getContext().classify(VecT->getElementType()); +  unsigned NumElems = VecT->getNumElements(); + +  const Pointer &B = S.Stk.pop<Pointer>(); +  const Pointer &A = S.Stk.pop<Pointer>(); +  const Pointer &Dst = S.Stk.peek<Pointer>(); + +  for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { +    auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); +    const Pointer &Src = (SrcVecIdx == 0) ? A : B; +    TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); }); +  } +  Dst.initializeAllElements(); + +  return true; +} +  bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,                        uint32_t BuiltinID) {    if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -4250,6 +4309,42 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,    case X86::BI__builtin_ia32_selectpd_512:      return interp__builtin_select(S, OpPC, Call); +  case X86::BI__builtin_ia32_shufps: +  case X86::BI__builtin_ia32_shufps256: +  case X86::BI__builtin_ia32_shufps512: +    return interp__builtin_ia32_shuffle_generic( +        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { +          unsigned NumElemPerLane = 4; +          unsigned NumSelectableElems = NumElemPerLane / 2; +          unsigned BitsPerElem = 2; +          unsigned IndexMask = 0x3; +          unsigned MaskBits = 8; +          unsigned Lane = DstIdx / NumElemPerLane; +          unsigned ElemInLane = DstIdx % NumElemPerLane; +          unsigned LaneOffset = Lane * NumElemPerLane; +          unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; +          unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; +          unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; +          return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; +        }); +  case X86::BI__builtin_ia32_shufpd: +  case X86::BI__builtin_ia32_shufpd256: +  case X86::BI__builtin_ia32_shufpd512: +    return interp__builtin_ia32_shuffle_generic( +        S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) { +          unsigned NumElemPerLane = 2; +          unsigned NumSelectableElems = NumElemPerLane / 2; +          unsigned BitsPerElem = 1; +          unsigned IndexMask = 0x1; +          unsigned MaskBits = 8; +          unsigned Lane = DstIdx / NumElemPerLane; +          unsigned ElemInLane = DstIdx % NumElemPerLane; +          unsigned LaneOffset = Lane * NumElemPerLane; +          unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; +          unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; +          unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; +          return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index}; +        });    case X86::BI__builtin_ia32_pshufb128:    case X86::BI__builtin_ia32_pshufb256:    case X86::BI__builtin_ia32_pshufb512: @@ -4390,6 +4485,39 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,    case X86::BI__builtin_ia32_vec_set_v4di:      return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); +  case X86::BI__builtin_ia32_pslldqi128_byteshift: +  case X86::BI__builtin_ia32_pslldqi256_byteshift: +  case X86::BI__builtin_ia32_pslldqi512_byteshift: +    // These SLLDQ intrinsics always operate on byte elements (8 bits). +    // The lane width is hardcoded to 16 to match the SIMD register size, +    // but the algorithm processes one byte per iteration, +    // so APInt(8, ...) is correct and intentional. +    return interp__builtin_x86_byteshift( +        S, OpPC, Call, BuiltinID, +        [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) { +          if (I < Shift) { +            return APInt(8, 0); +          } +          return APInt(8, Src.elem<uint8_t>(Lane + I - Shift)); +        }); + +  case X86::BI__builtin_ia32_psrldqi128_byteshift: +  case X86::BI__builtin_ia32_psrldqi256_byteshift: +  case X86::BI__builtin_ia32_psrldqi512_byteshift: +    // These SRLDQ intrinsics always operate on byte elements (8 bits). +    // The lane width is hardcoded to 16 to match the SIMD register size, +    // but the algorithm processes one byte per iteration, +    // so APInt(8, ...) is correct and intentional. +    return interp__builtin_x86_byteshift( +        S, OpPC, Call, BuiltinID, +        [](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) { +          if (I + Shift < 16) { +            return APInt(8, Src.elem<uint8_t>(Lane + I + Shift)); +          } + +          return APInt(8, 0); +        }); +    default:      S.FFDiag(S.Current->getLocation(OpPC),               diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/CommentSema.cpp b/clang/lib/AST/CommentSema.cpp index 649fba9..27ff5ab 100644 --- a/clang/lib/AST/CommentSema.cpp +++ b/clang/lib/AST/CommentSema.cpp @@ -1061,8 +1061,8 @@ InlineCommandRenderKind Sema::getInlineCommandRenderKind(StringRef Name) const {    return llvm::StringSwitch<InlineCommandRenderKind>(Name)        .Case("b", InlineCommandRenderKind::Bold) -      .Cases("c", "p", InlineCommandRenderKind::Monospaced) -      .Cases("a", "e", "em", InlineCommandRenderKind::Emphasized) +      .Cases({"c", "p"}, InlineCommandRenderKind::Monospaced) +      .Cases({"a", "e", "em"}, InlineCommandRenderKind::Emphasized)        .Case("anchor", InlineCommandRenderKind::Anchor)        .Default(InlineCommandRenderKind::Normal);  } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 2bd4476..29ee089 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11619,6 +11619,39 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,    return true;  } +static bool evalShuffleGeneric( +    EvalInfo &Info, const CallExpr *Call, APValue &Out, +    llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned)> +        GetSourceIndex) { + +  const auto *VT = Call->getType()->getAs<VectorType>(); +  if (!VT) +    return false; + +  APSInt MaskImm; +  if (!EvaluateInteger(Call->getArg(2), MaskImm, Info)) +    return false; +  unsigned ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue()); + +  APValue A, B; +  if (!EvaluateAsRValue(Info, Call->getArg(0), A) || +      !EvaluateAsRValue(Info, Call->getArg(1), B)) +    return false; + +  unsigned NumElts = VT->getNumElements(); +  SmallVector<APValue, 16> ResultElements; +  ResultElements.reserve(NumElts); + +  for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) { +    auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); +    const APValue &Src = (SrcVecIdx == 0) ? A : B; +    ResultElements.push_back(Src.getVectorElt(SrcIdx)); +  } + +  Out = APValue(ResultElements.data(), ResultElements.size()); +  return true; +} +  static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call,                                APValue &Out) {    APValue SrcVec, ControlVec; @@ -12398,7 +12431,56 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {      return Success(APValue(ResultElements.data(), ResultElements.size()), E);    } - +  case X86::BI__builtin_ia32_shufps: +  case X86::BI__builtin_ia32_shufps256: +  case X86::BI__builtin_ia32_shufps512: { +    APValue R; +    if (!evalShuffleGeneric( +            Info, E, R, +            [](unsigned DstIdx, +               unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { +              constexpr unsigned LaneBits = 128u; +              unsigned NumElemPerLane = LaneBits / 32; +              unsigned NumSelectableElems = NumElemPerLane / 2; +              unsigned BitsPerElem = 2; +              unsigned IndexMask = (1u << BitsPerElem) - 1; +              unsigned MaskBits = 8; +              unsigned Lane = DstIdx / NumElemPerLane; +              unsigned ElemInLane = DstIdx % NumElemPerLane; +              unsigned LaneOffset = Lane * NumElemPerLane; +              unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; +              unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; +              unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; +              return {SrcIdx, LaneOffset + Index}; +            })) +      return false; +    return Success(R, E); +  } +  case X86::BI__builtin_ia32_shufpd: +  case X86::BI__builtin_ia32_shufpd256: +  case X86::BI__builtin_ia32_shufpd512: { +    APValue R; +    if (!evalShuffleGeneric( +            Info, E, R, +            [](unsigned DstIdx, +               unsigned ShuffleMask) -> std::pair<unsigned, unsigned> { +              constexpr unsigned LaneBits = 128u; +              unsigned NumElemPerLane = LaneBits / 64; +              unsigned NumSelectableElems = NumElemPerLane / 2; +              unsigned BitsPerElem = 1; +              unsigned IndexMask = (1u << BitsPerElem) - 1; +              unsigned MaskBits = 8; +              unsigned Lane = DstIdx / NumElemPerLane; +              unsigned ElemInLane = DstIdx % NumElemPerLane; +              unsigned LaneOffset = Lane * NumElemPerLane; +              unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; +              unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; +              unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; +              return {SrcIdx, LaneOffset + Index}; +            })) +      return false; +    return Success(R, E); +  }    case X86::BI__builtin_ia32_pshufb128:    case X86::BI__builtin_ia32_pshufb256:    case X86::BI__builtin_ia32_pshufb512: { @@ -12906,6 +12988,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {      return Success(APValue(Elems.data(), NumElems), E);    } + +  case X86::BI__builtin_ia32_pslldqi128_byteshift: +  case X86::BI__builtin_ia32_pslldqi256_byteshift: +  case X86::BI__builtin_ia32_pslldqi512_byteshift: { +    assert(E->getNumArgs() == 2); + +    APValue Src; +    APSInt Imm; +    if (!EvaluateAsRValue(Info, E->getArg(0), Src) || +        !EvaluateInteger(E->getArg(1), Imm, Info)) +      return false; + +    unsigned VecLen = Src.getVectorLength(); +    unsigned Shift = Imm.getZExtValue() & 0xff; + +    SmallVector<APValue> ResultElements; +    for (unsigned Lane = 0; Lane != VecLen; Lane += 16) { +      for (unsigned I = 0; I != 16; ++I) { +        if (I < Shift) { +          APSInt Zero(8, /*isUnsigned=*/true); +          Zero = 0; +          ResultElements.push_back(APValue(Zero)); +        } else { +          ResultElements.push_back(Src.getVectorElt(Lane + I - Shift)); +        } +      } +    } + +    return Success(APValue(ResultElements.data(), ResultElements.size()), E); +  } + +  case X86::BI__builtin_ia32_psrldqi128_byteshift: +  case X86::BI__builtin_ia32_psrldqi256_byteshift: +  case X86::BI__builtin_ia32_psrldqi512_byteshift: { +    assert(E->getNumArgs() == 2); + +    APValue Src; +    APSInt Imm; +    if (!EvaluateAsRValue(Info, E->getArg(0), Src) || +        !EvaluateInteger(E->getArg(1), Imm, Info)) +      return false; + +    unsigned VecLen = Src.getVectorLength(); +    unsigned Shift = Imm.getZExtValue() & 0xff; + +    SmallVector<APValue> ResultElements; +    for (unsigned Lane = 0; Lane != VecLen; Lane += 16) { +      for (unsigned I = 0; I != 16; ++I) { +        if (I + Shift < 16) { +          ResultElements.push_back(Src.getVectorElt(Lane + I + Shift)); +        } else { +          APSInt Zero(8, /*isUnsigned=*/true); +          Zero = 0; +          ResultElements.push_back(APValue(Zero)); +        } +      } +    } + +    return Success(APValue(ResultElements.data(), ResultElements.size()), E); +  }    }  } diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp index 462a10d..39dfa19 100644 --- a/clang/lib/AST/StmtOpenACC.cpp +++ b/clang/lib/AST/StmtOpenACC.cpp @@ -326,16 +326,30 @@ OpenACCAtomicConstruct *OpenACCAtomicConstruct::Create(  static std::pair<const Expr *, const Expr *> getBinaryOpArgs(const Expr *Op) {    if (const auto *BO = dyn_cast<BinaryOperator>(Op)) { -    assert(BO->getOpcode() == BO_Assign); +    assert(BO->isAssignmentOp());      return {BO->getLHS(), BO->getRHS()};    }    const auto *OO = cast<CXXOperatorCallExpr>(Op); -  assert(OO->getOperator() == OO_Equal); - +  assert(OO->isAssignmentOp());    return {OO->getArg(0), OO->getArg(1)};  } +static std::pair<bool, const Expr *> getUnaryOpArgs(const Expr *Op) { +  if (const auto *UO = dyn_cast<UnaryOperator>(Op)) +    return {true, UO->getSubExpr()}; + +  if (const auto *OpCall = dyn_cast<CXXOperatorCallExpr>(Op)) { +    // Post-inc/dec have a second unused argument to differentiate it, so we +    // accept -- or ++ as unary, or any operator call with only 1 arg. +    if (OpCall->getNumArgs() == 1 || OpCall->getOperator() != OO_PlusPlus || +        OpCall->getOperator() != OO_MinusMinus) +      return {true, OpCall->getArg(0)}; +  } + +  return {false, nullptr}; +} +  const OpenACCAtomicConstruct::StmtInfo  OpenACCAtomicConstruct::getAssociatedStmtInfo() const {    // This ends up being a vastly simplified version of SemaOpenACCAtomic, since @@ -343,18 +357,17 @@ OpenACCAtomicConstruct::getAssociatedStmtInfo() const {    // asserts to ensure we don't get off into the weeds.    assert(getAssociatedStmt() && "invalid associated stmt?"); +  const Expr *AssocStmt = cast<const Expr>(getAssociatedStmt());    switch (AtomicKind) { -  case OpenACCAtomicKind::None: -  case OpenACCAtomicKind::Update:    case OpenACCAtomicKind::Capture: -    assert(false && "Only 'read'/'write' have been implemented here"); +    assert(false && "Only 'read'/'write'/'update' have been implemented here");      return {};    case OpenACCAtomicKind::Read: {      // Read only supports the format 'v = x'; where both sides are a scalar      // expression. This can come in 2 forms; BinaryOperator or      // CXXOperatorCallExpr (rarely).      std::pair<const Expr *, const Expr *> BinaryArgs = -        getBinaryOpArgs(cast<const Expr>(getAssociatedStmt())); +        getBinaryOpArgs(AssocStmt);      // We want the L-value for each side, so we ignore implicit casts.      return {BinaryArgs.first->IgnoreImpCasts(),              BinaryArgs.second->IgnoreImpCasts(), /*expr=*/nullptr}; @@ -364,13 +377,28 @@ OpenACCAtomicConstruct::getAssociatedStmtInfo() const {      // type, and 'x' is a scalar l value. As above, this can come in 2 forms;      // Binary Operator or CXXOperatorCallExpr.      std::pair<const Expr *, const Expr *> BinaryArgs = -        getBinaryOpArgs(cast<const Expr>(getAssociatedStmt())); +        getBinaryOpArgs(AssocStmt);      // We want the L-value for ONLY the X side, so we ignore implicit casts. For      // the right side (the expr), we emit it as an r-value so we need to      // maintain implicit casts.      return {/*v=*/nullptr, BinaryArgs.first->IgnoreImpCasts(),              BinaryArgs.second};    } +  case OpenACCAtomicKind::None: +  case OpenACCAtomicKind::Update: { +    std::pair<bool, const Expr *> UnaryArgs = getUnaryOpArgs(AssocStmt); +    if (UnaryArgs.first) +      return {/*v=*/nullptr, UnaryArgs.second->IgnoreImpCasts(), +              /*expr=*/nullptr}; + +    std::pair<const Expr *, const Expr *> BinaryArgs = +        getBinaryOpArgs(AssocStmt); +    // For binary args, we just store the RHS as an expression (in the +    // expression slot), since the codegen just wants the whole thing for a +    // recipe. +    return {/*v=*/nullptr, BinaryArgs.first->IgnoreImpCasts(), +            BinaryArgs.second}; +  }    }    llvm_unreachable("unknown OpenACC atomic kind"); | 
