aboutsummaryrefslogtreecommitdiff
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/AST/ASTContext.cpp4
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp35
-rw-r--r--clang/lib/AST/ExprConstant.cpp31
-rw-r--r--clang/lib/Basic/LangOptions.cpp8
-rw-r--r--clang/lib/Basic/Targets/X86.cpp2
-rw-r--r--clang/lib/CIR/Dialect/IR/CIRDialect.cpp124
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp31
-rw-r--r--clang/lib/Headers/avx512cdintrin.h81
-rw-r--r--clang/lib/Headers/avx512dqintrin.h60
-rw-r--r--clang/lib/Headers/avx512fintrin.h126
-rw-r--r--clang/lib/Headers/avx512vlcdintrin.h149
-rw-r--r--clang/lib/Headers/avx512vldqintrin.h50
-rw-r--r--clang/lib/Headers/avx512vlintrin.h361
-rw-r--r--clang/lib/Sema/HLSLExternalSemaSource.cpp3
-rw-r--r--clang/lib/Sema/SemaCast.cpp3
-rw-r--r--clang/lib/Sema/SemaChecking.cpp4
-rw-r--r--clang/lib/Sema/SemaType.cpp10
17 files changed, 557 insertions, 525 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index e403b3e..32c8f62 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -4712,8 +4712,8 @@ QualType ASTContext::getConstantMatrixType(QualType ElementTy, unsigned NumRows,
assert(MatrixType::isValidElementType(ElementTy) &&
"need a valid element type");
- assert(ConstantMatrixType::isDimensionValid(NumRows) &&
- ConstantMatrixType::isDimensionValid(NumColumns) &&
+ assert(NumRows > 0 && NumRows <= LangOpts.MaxMatrixDimension &&
+ NumColumns > 0 && NumColumns <= LangOpts.MaxMatrixDimension &&
"need valid matrix dimensions");
void *InsertPos = nullptr;
if (ConstantMatrixType *MTP = MatrixTypes.FindNodeOrInsertPos(ID, InsertPos))
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b69f360..a0d2c76 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3101,6 +3101,33 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 1);
+
+ QualType Arg0Type = Call->getArg(0)->getType();
+ const auto *VecT = Arg0Type->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VecT->getElementType());
+ unsigned NumElems = VecT->getNumElements();
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ for (unsigned I = 0; I != NumElems; ++I) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt ElemI = Src.elem<T>(I).toAPSInt();
+ APInt ConflictMask(ElemI.getBitWidth(), 0);
+ for (unsigned J = 0; J != I; ++J) {
+ APSInt ElemJ = Src.elem<T>(J).toAPSInt();
+ ConflictMask.setBitVal(J, ElemI == ElemJ);
+ }
+ Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
+ });
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3891,7 +3918,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
[](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
return llvm::APIntOps::fshr(Hi, Lo, Amt);
});
-
+ case X86::BI__builtin_ia32_vpconflictsi_128:
+ case X86::BI__builtin_ia32_vpconflictsi_256:
+ case X86::BI__builtin_ia32_vpconflictsi_512:
+ case X86::BI__builtin_ia32_vpconflictdi_128:
+ case X86::BI__builtin_ia32_vpconflictdi_256:
+ case X86::BI__builtin_ia32_vpconflictdi_512:
+ return interp__builtin_ia32_vpconflict(S, OpPC, Call);
case clang::X86::BI__builtin_ia32_blendpd:
case clang::X86::BI__builtin_ia32_blendpd256:
case clang::X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a07eb22..16141b2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12179,6 +12179,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_vpconflictsi_128:
+ case X86::BI__builtin_ia32_vpconflictsi_256:
+ case X86::BI__builtin_ia32_vpconflictsi_512:
+ case X86::BI__builtin_ia32_vpconflictdi_128:
+ case X86::BI__builtin_ia32_vpconflictdi_256:
+ case X86::BI__builtin_ia32_vpconflictdi_512: {
+ APValue Source;
+
+ if (!EvaluateAsRValue(Info, E->getArg(0), Source))
+ return false;
+
+ unsigned SourceLen = Source.getVectorLength();
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ const auto *VecT = E->getType()->castAs<VectorType>();
+ bool DestUnsigned =
+ VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ const APValue &EltI = Source.getVectorElt(I);
+
+ APInt ConflictMask(EltI.getInt().getBitWidth(), 0);
+ for (unsigned J = 0; J != I; ++J) {
+ const APValue &EltJ = Source.getVectorElt(J);
+ ConflictMask.setBitVal(J, EltI.getInt() == EltJ.getInt());
+ }
+ ResultElements.push_back(APValue(APSInt(ConflictMask, DestUnsigned)));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case X86::BI__builtin_ia32_blendpd:
case X86::BI__builtin_ia32_blendpd256:
case X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index 641a3db..19b5576 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -132,8 +132,12 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
Opts.NamedLoops = Std.isC2y();
Opts.HLSL = Lang == Language::HLSL;
- if (Opts.HLSL && Opts.IncludeDefaultHeader)
- Includes.push_back("hlsl.h");
+ if (Opts.HLSL) {
+ if (Opts.IncludeDefaultHeader)
+ Includes.push_back("hlsl.h");
+ // Set maximum matrix dimension to 4 for HLSL
+ Opts.MaxMatrixDimension = 4;
+ }
// Set OpenCL Version.
Opts.OpenCL = Std.isOpenCL();
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index ef4973c..e71f10c 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -626,6 +626,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_Lunarlake:
case CK_Pantherlake:
case CK_Wildcatlake:
+ case CK_Novalake:
case CK_Sierraforest:
case CK_Grandridge:
case CK_Graniterapids:
@@ -1615,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_Lunarlake:
case CK_Pantherlake:
case CK_Wildcatlake:
+ case CK_Novalake:
case CK_Sierraforest:
case CK_Grandridge:
case CK_Graniterapids:
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 7af3dc1..0712de2 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -2915,6 +2915,130 @@ LogicalResult cir::TypeInfoAttr::verify(
}
//===----------------------------------------------------------------------===//
+// TryOp
+//===----------------------------------------------------------------------===//
+
+void cir::TryOp::getSuccessorRegions(
+ mlir::RegionBranchPoint point,
+ llvm::SmallVectorImpl<mlir::RegionSuccessor> &regions) {
+ // The `try` and the `catchers` region branch back to the parent operation.
+ if (!point.isParent()) {
+ regions.push_back(mlir::RegionSuccessor());
+ return;
+ }
+
+ regions.push_back(mlir::RegionSuccessor(&getTryRegion()));
+
+ // TODO(CIR): If we know a target function never throws a specific type, we
+ // can remove the catch handler.
+ for (mlir::Region &handlerRegion : this->getHandlerRegions())
+ regions.push_back(mlir::RegionSuccessor(&handlerRegion));
+}
+
+static void
+printTryHandlerRegions(mlir::OpAsmPrinter &printer, cir::TryOp op,
+ mlir::MutableArrayRef<mlir::Region> handlerRegions,
+ mlir::ArrayAttr handlerTypes) {
+ if (!handlerTypes)
+ return;
+
+ for (const auto [typeIdx, typeAttr] : llvm::enumerate(handlerTypes)) {
+ if (typeIdx)
+ printer << " ";
+
+ if (mlir::isa<cir::CatchAllAttr>(typeAttr)) {
+ printer << "catch all ";
+ } else if (mlir::isa<cir::UnwindAttr>(typeAttr)) {
+ printer << "unwind ";
+ } else {
+ printer << "catch [type ";
+ printer.printAttribute(typeAttr);
+ printer << "] ";
+ }
+
+ printer.printRegion(handlerRegions[typeIdx],
+ /*printEntryBLockArgs=*/false,
+ /*printBlockTerminators=*/true);
+ }
+}
+
+static mlir::ParseResult parseTryHandlerRegions(
+ mlir::OpAsmParser &parser,
+ llvm::SmallVectorImpl<std::unique_ptr<mlir::Region>> &handlerRegions,
+ mlir::ArrayAttr &handlerTypes) {
+
+ auto parseCheckedCatcherRegion = [&]() -> mlir::ParseResult {
+ handlerRegions.emplace_back(new mlir::Region);
+
+ mlir::Region &currRegion = *handlerRegions.back();
+ mlir::SMLoc regionLoc = parser.getCurrentLocation();
+ if (parser.parseRegion(currRegion)) {
+ handlerRegions.clear();
+ return failure();
+ }
+
+ if (!currRegion.empty() && !(currRegion.back().mightHaveTerminator() &&
+ currRegion.back().getTerminator()))
+ return parser.emitError(
+ regionLoc, "blocks are expected to be explicitly terminated");
+
+ return success();
+ };
+
+ bool hasCatchAll = false;
+ llvm::SmallVector<mlir::Attribute, 4> catcherAttrs;
+ while (parser.parseOptionalKeyword("catch").succeeded()) {
+ bool hasLSquare = parser.parseOptionalLSquare().succeeded();
+
+ llvm::StringRef attrStr;
+ if (parser.parseOptionalKeyword(&attrStr, {"all", "type"}).failed())
+ return parser.emitError(parser.getCurrentLocation(),
+ "expected 'all' or 'type' keyword");
+
+ bool isCatchAll = attrStr == "all";
+ if (isCatchAll) {
+ if (hasCatchAll)
+ return parser.emitError(parser.getCurrentLocation(),
+ "can't have more than one catch all");
+ hasCatchAll = true;
+ }
+
+ mlir::Attribute exceptionRTTIAttr;
+ if (!isCatchAll && parser.parseAttribute(exceptionRTTIAttr).failed())
+ return parser.emitError(parser.getCurrentLocation(),
+ "expected valid RTTI info attribute");
+
+ catcherAttrs.push_back(isCatchAll
+ ? cir::CatchAllAttr::get(parser.getContext())
+ : exceptionRTTIAttr);
+
+ if (hasLSquare && isCatchAll)
+ return parser.emitError(parser.getCurrentLocation(),
+ "catch all dosen't need RTTI info attribute");
+
+ if (hasLSquare && parser.parseRSquare().failed())
+ return parser.emitError(parser.getCurrentLocation(),
+ "expected `]` after RTTI info attribute");
+
+ if (parseCheckedCatcherRegion().failed())
+ return mlir::failure();
+ }
+
+ if (parser.parseOptionalKeyword("unwind").succeeded()) {
+ if (hasCatchAll)
+ return parser.emitError(parser.getCurrentLocation(),
+ "unwind can't be used with catch all");
+
+ catcherAttrs.push_back(cir::UnwindAttr::get(parser.getContext()));
+ if (parseCheckedCatcherRegion().failed())
+ return mlir::failure();
+ }
+
+ handlerTypes = parser.getBuilder().getArrayAttr(catcherAttrs);
+ return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
// TableGen'd op method definitions
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9ee810c..92dba32 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4283,15 +4283,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
CharUnits Align = CGM.getNaturalTypeAlignment(
E->getType()->getAs<VectorType>()->getElementType(), nullptr);
- llvm::Value *AlignVal =
- llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
llvm::Value *Result;
if (BuiltinID == Builtin::BI__builtin_masked_load) {
- Function *F =
- CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, Ptr->getType()});
- Result =
- Builder.CreateCall(F, {Ptr, AlignVal, Mask, PassThru}, "masked_load");
+ Result = Builder.CreateMaskedLoad(RetTy, Ptr, Align.getAsAlign(), Mask,
+ PassThru, "masked_load");
} else {
Function *F = CGM.getIntrinsic(Intrinsic::masked_expandload, {RetTy});
Result =
@@ -4307,8 +4303,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType());
CharUnits Align = CGM.getNaturalTypeAlignment(
E->getType()->getAs<VectorType>()->getElementType(), nullptr);
- llvm::Value *AlignVal =
- llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
llvm::Value *PassThru = llvm::PoisonValue::get(RetTy);
if (E->getNumArgs() > 3)
@@ -4318,12 +4312,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
E->getType()->getAs<VectorType>()->getElementType());
llvm::Value *PtrVec = Builder.CreateGEP(ElemTy, Ptr, Idx);
- llvm::Value *Result;
- Function *F =
- CGM.getIntrinsic(Intrinsic::masked_gather, {RetTy, PtrVec->getType()});
-
- Result = Builder.CreateCall(F, {PtrVec, AlignVal, Mask, PassThru},
- "masked_gather");
+ llvm::Value *Result = Builder.CreateMaskedGather(
+ RetTy, PtrVec, Align.getAsAlign(), Mask, PassThru, "masked_gather");
return RValue::get(Result);
}
case Builtin::BI__builtin_masked_store:
@@ -4338,13 +4328,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
CharUnits Align = CGM.getNaturalTypeAlignment(
E->getArg(1)->getType()->getAs<VectorType>()->getElementType(),
nullptr);
- llvm::Value *AlignVal =
- llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
if (BuiltinID == Builtin::BI__builtin_masked_store) {
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::masked_store,
- {ValLLTy, Ptr->getType()});
- Builder.CreateCall(F, {Val, Ptr, AlignVal, Mask});
+ Builder.CreateMaskedStore(Val, Ptr, Align.getAsAlign(), Mask);
} else {
llvm::Function *F =
CGM.getIntrinsic(llvm::Intrinsic::masked_compressstore, {ValLLTy});
@@ -4361,17 +4347,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
CharUnits Align = CGM.getNaturalTypeAlignment(
E->getArg(2)->getType()->getAs<VectorType>()->getElementType(),
nullptr);
- llvm::Value *AlignVal =
- llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
llvm::Type *ElemTy = CGM.getTypes().ConvertType(
E->getArg(1)->getType()->getAs<VectorType>()->getElementType());
llvm::Value *PtrVec = Builder.CreateGEP(ElemTy, Ptr, Idx);
- Function *F = CGM.getIntrinsic(Intrinsic::masked_scatter,
- {Val->getType(), PtrVec->getType()});
-
- Builder.CreateCall(F, {Val, PtrVec, AlignVal, Mask});
+ Builder.CreateMaskedScatter(Val, PtrVec, Align.getAsAlign(), Mask);
return RValue();
}
case Builtin::BI__builtin_isinf_sign: {
diff --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h
index b161440..fb6dcb6 100644
--- a/clang/lib/Headers/avx512cdintrin.h
+++ b/clang/lib/Headers/avx512cdintrin.h
@@ -15,111 +15,98 @@
#define __AVX512CDINTRIN_H
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS \
+ constexpr __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512cd"), __min_vector_width__(512)))
+#else
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \
__min_vector_width__(512)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
-#else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#endif
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_conflict_epi64 (__m512i __A)
-{
- return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
+_mm512_conflict_epi64(__m512i __A) {
+ return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
- return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
- (__v8di)_mm512_conflict_epi64(__A),
- (__v8di)__W);
+_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ (__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
-{
+_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_conflict_epi64(__A),
- (__v8di)_mm512_setzero_si512 ());
+ (__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_conflict_epi32 (__m512i __A)
-{
- return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
+_mm512_conflict_epi32(__m512i __A) {
+ return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
- return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
- (__v16si)_mm512_conflict_epi32(__A),
- (__v16si)__W);
+_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+ return (__m512i)__builtin_ia32_selectd_512(
+ (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
-{
- return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
- (__v16si)_mm512_conflict_epi32(__A),
- (__v16si)_mm512_setzero_si512());
+_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
+ return (__m512i)__builtin_ia32_selectd_512(
+ (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A),
+ (__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_lzcnt_epi32(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) {
return (__m512i)__builtin_elementwise_clzg((__v16si)__A,
(__v16si)_mm512_set1_epi32(32));
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
- return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
- (__v16si)_mm512_lzcnt_epi32(__A),
- (__v16si)__W);
+ return (__m512i)__builtin_ia32_selectd_512(
+ (__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_lzcnt_epi32(__A),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_lzcnt_epi64(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) {
return (__m512i)__builtin_elementwise_clzg(
(__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64));
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
- return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
- (__v8di)_mm512_lzcnt_epi64(__A),
- (__v8di)__W);
+ return (__m512i)__builtin_ia32_selectq_512(
+ (__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_lzcnt_epi64(__A),
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcastmb_epi64(__mmask8 __A) {
- return (__m512i) _mm512_set1_epi64((long long) __A);
+ return (__m512i)_mm512_set1_epi64((long long)__A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcastmw_epi32(__mmask16 __A) {
return (__m512i)_mm512_set1_epi32((int)__A);
}
#undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index fb65bf9..3681cca 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1083,17 +1083,15 @@ _mm512_broadcast_f32x2(__m128 __A) {
0, 1, 0, 1, 0, 1, 0, 1);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
(__v16sf)_mm512_broadcast_f32x2(__A),
(__v16sf)__O);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
(__v16sf)_mm512_broadcast_f32x2(__A),
(__v16sf)_mm512_setzero_ps());
@@ -1106,17 +1104,15 @@ _mm512_broadcast_f32x8(__m256 __A) {
0, 1, 2, 3, 4, 5, 6, 7);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
(__v16sf)_mm512_broadcast_f32x8(__A),
(__v16sf)__O);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
(__v16sf)_mm512_broadcast_f32x8(__A),
(__v16sf)_mm512_setzero_ps());
@@ -1128,17 +1124,15 @@ _mm512_broadcast_f64x2(__m128d __A) {
0, 1, 0, 1, 0, 1, 0, 1);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
(__v8df)_mm512_broadcast_f64x2(__A),
(__v8df)__O);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
(__v8df)_mm512_broadcast_f64x2(__A),
(__v8df)_mm512_setzero_pd());
@@ -1151,17 +1145,15 @@ _mm512_broadcast_i32x2(__m128i __A) {
0, 1, 0, 1, 0, 1, 0, 1);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_broadcast_i32x2(__A),
(__v16si)__O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_broadcast_i32x2(__A),
(__v16si)_mm512_setzero_si512());
@@ -1174,17 +1166,15 @@ _mm512_broadcast_i32x8(__m256i __A) {
0, 1, 2, 3, 4, 5, 6, 7);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_broadcast_i32x8(__A),
(__v16si)__O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_broadcast_i32x8(__A),
(__v16si)_mm512_setzero_si512());
@@ -1196,17 +1186,15 @@ _mm512_broadcast_i64x2(__m128i __A) {
0, 1, 0, 1, 0, 1, 0, 1);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_broadcast_i64x2(__A),
(__v8di)__O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
(__v8di)_mm512_broadcast_i64x2(__A),
(__v8di)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 80e5842..07de036 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -225,17 +225,15 @@ _mm512_broadcastd_epi32(__m128i __A) {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectd_512(__M,
(__v16si) _mm512_broadcastd_epi32(__A),
(__v16si) __O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectd_512(__M,
(__v16si) _mm512_broadcastd_epi32(__A),
(__v16si) _mm512_setzero_si512());
@@ -247,18 +245,14 @@ _mm512_broadcastq_epi64(__m128i __A) {
0, 0, 0, 0, 0, 0, 0, 0);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
-{
- return (__m512i)__builtin_ia32_selectq_512(__M,
- (__v8di) _mm512_broadcastq_epi64(__A),
- (__v8di) __O);
-
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectq_512(__M,
(__v8di) _mm512_broadcastq_epi64(__A),
(__v8di) _mm512_setzero_si512());
@@ -321,9 +315,8 @@ _mm512_set1_epi32(int __s)
__s, __s, __s, __s, __s, __s, __s, __s };
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi32(__mmask16 __M, int __A) {
return (__m512i)__builtin_ia32_selectd_512(__M,
(__v16si)_mm512_set1_epi32(__A),
(__v16si)_mm512_setzero_si512());
@@ -335,9 +328,8 @@ _mm512_set1_epi64(long long __d)
return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) {
return (__m512i)__builtin_ia32_selectq_512(__M,
(__v8di)_mm512_set1_epi64(__A),
(__v8di)_mm512_setzero_si512());
@@ -6552,17 +6544,15 @@ _mm512_broadcast_f32x4(__m128 __A) {
0, 1, 2, 3, 0, 1, 2, 3);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
(__v16sf)_mm512_broadcast_f32x4(__A),
(__v16sf)__O);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
(__v16sf)_mm512_broadcast_f32x4(__A),
(__v16sf)_mm512_setzero_ps());
@@ -6597,17 +6587,15 @@ _mm512_broadcast_i32x4(__m128i __A) {
0, 1, 2, 3, 0, 1, 2, 3);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_broadcast_i32x4(__A),
(__v16si)__O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_broadcast_i32x4(__A),
(__v16si)_mm512_setzero_si512());
@@ -6635,33 +6623,29 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
(__v8di)_mm512_setzero_si512());
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
return (__m512d)__builtin_ia32_selectpd_512(__M,
(__v8df) _mm512_broadcastsd_pd(__A),
(__v8df) __O);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) {
return (__m512d)__builtin_ia32_selectpd_512(__M,
(__v8df) _mm512_broadcastsd_pd(__A),
(__v8df) _mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
return (__m512)__builtin_ia32_selectps_512(__M,
(__v16sf) _mm512_broadcastss_ps(__A),
(__v16sf) __O);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) {
return (__m512)__builtin_ia32_selectps_512(__M,
(__v16sf) _mm512_broadcastss_ps(__A),
(__v16sf) _mm512_setzero_ps());
@@ -8381,17 +8365,15 @@ _mm512_movehdup_ps (__m512 __A)
1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_movehdup_ps(__A),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_movehdup_ps(__A),
(__v16sf)_mm512_setzero_ps());
@@ -8404,44 +8386,38 @@ _mm512_moveldup_ps (__m512 __A)
0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_moveldup_ps(__A),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_moveldup_ps(__A),
(__v16sf)_mm512_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
_mm_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
_mm_setzero_pd());
}
@@ -8884,17 +8860,15 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
}
#endif
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
return (__m512i) __builtin_ia32_selectd_512(__M,
(__v16si) _mm512_set1_epi32(__A),
(__v16si) __O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
return (__m512i) __builtin_ia32_selectq_512(__M,
(__v8di) _mm512_set1_epi64(__A),
(__v8di) __O);
diff --git a/clang/lib/Headers/avx512vlcdintrin.h b/clang/lib/Headers/avx512vlcdintrin.h
index cb98e7c..7719680f 100644
--- a/clang/lib/Headers/avx512vlcdintrin.h
+++ b/clang/lib/Headers/avx512vlcdintrin.h
@@ -14,203 +14,182 @@
#define __AVX512VLCDINTRIN_H
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128 \
+ constexpr __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512cd"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ constexpr __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512cd"), \
+ __min_vector_width__(256)))
+#else
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512cd"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512cd"), __min_vector_width__(256)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#else
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#endif
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_broadcastmb_epi64(__mmask8 __A) {
return (__m128i) _mm_set1_epi64x((long long) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_broadcastmb_epi64(__mmask8 __A) {
- return (__m256i) _mm256_set1_epi64x((long long)__A);
+ return (__m256i)_mm256_set1_epi64x((long long)__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_broadcastmw_epi32(__mmask16 __A) {
return (__m128i) _mm_set1_epi32((int)__A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_broadcastmw_epi32(__mmask16 __A) {
return (__m256i) _mm256_set1_epi32((int)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_conflict_epi64 (__m128i __A)
-{
- return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A);
+_mm_conflict_epi64(__m128i __A) {
+ return (__m128i)__builtin_ia32_vpconflictdi_128((__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
- return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
- (__v2di)_mm_conflict_epi64(__A),
- (__v2di)__W);
+_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i)__builtin_ia32_selectq_128(
+ (__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
-{
+_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_conflict_epi64(__A),
(__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_conflict_epi64 (__m256i __A)
-{
- return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A);
+_mm256_conflict_epi64(__m256i __A) {
+ return (__m256i)__builtin_ia32_vpconflictdi_256((__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
- return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
- (__v4di)_mm256_conflict_epi64(__A),
- (__v4di)__W);
+_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i)__builtin_ia32_selectq_256(
+ (__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_conflict_epi64(__A),
(__v4di)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_conflict_epi32 (__m128i __A)
-{
- return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A);
+_mm_conflict_epi32(__m128i __A) {
+ return (__m128i)__builtin_ia32_vpconflictsi_128((__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
- return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
- (__v4si)_mm_conflict_epi32(__A),
- (__v4si)__W);
+_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i)__builtin_ia32_selectd_128(
+ (__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
-{
+_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_conflict_epi32(__A),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_conflict_epi32 (__m256i __A)
-{
- return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A);
+_mm256_conflict_epi32(__m256i __A) {
+ return (__m256i)__builtin_ia32_vpconflictsi_256((__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
- return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
- (__v8si)_mm256_conflict_epi32(__A),
- (__v8si)__W);
+_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i)__builtin_ia32_selectd_256(
+ (__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_conflict_epi32(__A),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_lzcnt_epi32(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32(__m128i __A) {
return (__m128i)__builtin_elementwise_clzg((__v4si)__A,
(__v4si)_mm_set1_epi32(32));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
- return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
- (__v4si)_mm_lzcnt_epi32(__A),
- (__v4si)__W);
+ return (__m128i)__builtin_ia32_selectd_128(
+ (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) {
- return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
- (__v4si)_mm_lzcnt_epi32(__A),
- (__v4si)_mm_setzero_si128());
+ return (__m128i)__builtin_ia32_selectd_128(
+ (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_lzcnt_epi32(__m256i __A) {
return (__m256i)__builtin_elementwise_clzg((__v8si)__A,
(__v8si)_mm256_set1_epi32(32));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
- return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
- (__v8si)_mm256_lzcnt_epi32(__A),
- (__v8si)__W);
+ return (__m256i)__builtin_ia32_selectd_256(
+ (__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_lzcnt_epi32(__A),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_lzcnt_epi64(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64(__m128i __A) {
return (__m128i)__builtin_elementwise_clzg(
(__v2di)__A, (__v2di)_mm_set1_epi64x((long long)64));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
- return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
- (__v2di)_mm_lzcnt_epi64(__A),
- (__v2di)__W);
+ return (__m128i)__builtin_ia32_selectq_128(
+ (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) {
- return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
- (__v2di)_mm_lzcnt_epi64(__A),
- (__v2di)_mm_setzero_si128());
+ return (__m128i)__builtin_ia32_selectq_128(
+ (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_lzcnt_epi64(__m256i __A) {
return (__m256i)__builtin_elementwise_clzg(
(__v4di)__A, (__v4di)_mm256_set1_epi64x((long long)64));
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
- return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
- (__v4di)_mm256_lzcnt_epi64(__A),
- (__v4di)__W);
+ return (__m256i)__builtin_ia32_selectq_256(
+ (__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_lzcnt_epi64(__A),
@@ -219,7 +198,5 @@ _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) {
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
-#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
-#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
#endif /* __AVX512VLCDINTRIN_H */
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 68bd52e..ee7974e 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -968,17 +968,15 @@ _mm256_broadcast_f32x2(__m128 __A) {
0, 1, 0, 1, 0, 1, 0, 1);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
(__v8sf)_mm256_broadcast_f32x2(__A),
(__v8sf)__O);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
(__v8sf)_mm256_broadcast_f32x2(__A),
(__v8sf)_mm256_setzero_ps());
@@ -990,17 +988,15 @@ _mm256_broadcast_f64x2(__m128d __A) {
0, 1, 0, 1);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
(__v4df)_mm256_broadcast_f64x2(__A),
(__v4df)__O);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
(__v4df)_mm256_broadcast_f64x2(__A),
(__v4df)_mm256_setzero_pd());
@@ -1012,17 +1008,15 @@ _mm_broadcast_i32x2(__m128i __A) {
0, 1, 0, 1);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
(__v4si)_mm_broadcast_i32x2(__A),
(__v4si)__O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
(__v4si)_mm_broadcast_i32x2(__A),
(__v4si)_mm_setzero_si128());
@@ -1034,17 +1028,15 @@ _mm256_broadcast_i32x2(__m128i __A) {
0, 1, 0, 1, 0, 1, 0, 1);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
(__v8si)_mm256_broadcast_i32x2(__A),
(__v8si)__O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
(__v8si)_mm256_broadcast_i32x2(__A),
(__v8si)_mm256_setzero_si256());
@@ -1056,17 +1048,15 @@ _mm256_broadcast_i64x2(__m128i __A) {
0, 1, 0, 1);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
(__v4di)_mm256_broadcast_i64x2(__A),
(__v4di)__O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
(__v4di)_mm256_broadcast_i64x2(__A),
(__v4di)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 965741f..676b5a0 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5101,69 +5101,55 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
-{
- return (__m128i)__builtin_ia32_selectd_128(__M,
- (__v4si) _mm_set1_epi32(__A),
- (__v4si)__O);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) {
+ return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
+ (__v4si)__O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_set1_epi32( __mmask8 __M, int __A)
-{
- return (__m128i)__builtin_ia32_selectd_128(__M,
- (__v4si) _mm_set1_epi32(__A),
- (__v4si)_mm_setzero_si128());
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_set1_epi32(__mmask8 __M, int __A) {
+ return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
+ (__v4si)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
-{
- return (__m256i)__builtin_ia32_selectd_256(__M,
- (__v8si) _mm256_set1_epi32(__A),
- (__v8si)__O);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) {
+ return (__m256i)__builtin_ia32_selectd_256(
+ __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
-{
- return (__m256i)__builtin_ia32_selectd_256(__M,
- (__v8si) _mm256_set1_epi32(__A),
- (__v8si)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_set1_epi32(__mmask8 __M, int __A) {
+ return (__m256i)__builtin_ia32_selectd_256(
+ __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256());
}
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
return (__m128i) __builtin_ia32_selectq_128(__M,
(__v2di) _mm_set1_epi64x(__A),
(__v2di) __O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
return (__m128i) __builtin_ia32_selectq_128(__M,
(__v2di) _mm_set1_epi64x(__A),
(__v2di) _mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
return (__m256i) __builtin_ia32_selectq_256(__M,
(__v4di) _mm256_set1_epi64x(__A),
(__v4di) __O) ;
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
- return (__m256i) __builtin_ia32_selectq_256(__M,
- (__v4di) _mm256_set1_epi64x(__A),
- (__v4di) _mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) {
+ return (__m256i)__builtin_ia32_selectq_256(
+ __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256());
}
#define _mm_fixupimm_pd(A, B, C, imm) \
@@ -5610,130 +5596,113 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
(__mmask8) __U);
}
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_unpackhi_pd(__A, __B),
(__v2df)__W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_unpackhi_pd(__A, __B),
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_unpackhi_pd(__A, __B),
(__v4df)__W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_unpackhi_pd(__A, __B),
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_unpackhi_ps(__A, __B),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_unpackhi_ps(__A, __B),
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_unpackhi_ps(__A, __B),
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_unpackhi_ps(__A, __B),
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_unpacklo_pd(__A, __B),
(__v2df)__W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_unpacklo_pd(__A, __B),
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_unpacklo_pd(__A, __B),
(__v4df)__W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_unpacklo_pd(__A, __B),
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_unpacklo_ps(__A, __B),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_unpacklo_ps(__A, __B),
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_unpacklo_ps(__A, __B),
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_unpacklo_ps(__A, __B),
(__v8sf)_mm256_setzero_ps());
@@ -6055,129 +6024,117 @@ _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_unpackhi_epi32(__A, __B),
(__v4si)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_unpackhi_epi32(__A, __B),
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_unpackhi_epi32(__A, __B),
(__v8si)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_unpackhi_epi32(__A, __B),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_unpackhi_epi64(__A, __B),
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_unpackhi_epi64(__A, __B),
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_unpackhi_epi64(__A, __B),
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_unpackhi_epi64(__A, __B),
(__v4di)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_unpacklo_epi32(__A, __B),
(__v4si)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_unpacklo_epi32(__A, __B),
(__v4si)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_unpacklo_epi32(__A, __B),
(__v8si)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_unpacklo_epi32(__A, __B),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_unpacklo_epi64(__A, __B),
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_unpacklo_epi64(__A, __B),
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_unpacklo_epi64(__A, __B),
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_unpacklo_epi64(__A, __B),
(__v4di)_mm256_setzero_si256());
@@ -6594,17 +6551,15 @@ _mm256_broadcast_f32x4(__m128 __A) {
0, 1, 2, 3, 0, 1, 2, 3);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
(__v8sf)_mm256_broadcast_f32x4(__A),
(__v8sf)__O);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
(__v8sf)_mm256_broadcast_f32x4(__A),
(__v8sf)_mm256_setzero_ps());
@@ -6616,129 +6571,113 @@ _mm256_broadcast_i32x4(__m128i __A) {
0, 1, 2, 3, 0, 1, 2, 3);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
(__v8si)_mm256_broadcast_i32x4(__A),
(__v8si)__O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
(__v8si)_mm256_broadcast_i32x4(__A),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
return (__m256d)__builtin_ia32_selectpd_256(__M,
(__v4df) _mm256_broadcastsd_pd(__A),
(__v4df) __O);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) {
return (__m256d)__builtin_ia32_selectpd_256(__M,
(__v4df) _mm256_broadcastsd_pd(__A),
(__v4df) _mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128(__M,
(__v4sf) _mm_broadcastss_ps(__A),
(__v4sf) __O);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128(__M,
(__v4sf) _mm_broadcastss_ps(__A),
(__v4sf) _mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) {
return (__m256)__builtin_ia32_selectps_256(__M,
(__v8sf) _mm256_broadcastss_ps(__A),
(__v8sf) __O);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) {
return (__m256)__builtin_ia32_selectps_256(__M,
(__v8sf) _mm256_broadcastss_ps(__A),
(__v8sf) _mm256_setzero_ps());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) {
return (__m128i)__builtin_ia32_selectd_128(__M,
(__v4si) _mm_broadcastd_epi32(__A),
(__v4si) __O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) {
return (__m128i)__builtin_ia32_selectd_128(__M,
(__v4si) _mm_broadcastd_epi32(__A),
(__v4si) _mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectd_256(__M,
(__v8si) _mm256_broadcastd_epi32(__A),
(__v8si) __O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectd_256(__M,
(__v8si) _mm256_broadcastd_epi32(__A),
(__v8si) _mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) {
return (__m128i)__builtin_ia32_selectq_128(__M,
(__v2di) _mm_broadcastq_epi64(__A),
(__v2di) __O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
return (__m128i)__builtin_ia32_selectq_128(__M,
(__v2di) _mm_broadcastq_epi64(__A),
(__v2di) _mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectq_256(__M,
(__v4di) _mm256_broadcastq_epi64(__A),
(__v4di) __O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
return (__m256i)__builtin_ia32_selectq_256(__M,
(__v4di) _mm256_broadcastq_epi64(__A),
(__v4di) _mm256_setzero_si256());
@@ -8003,65 +7942,57 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
(__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
(__v4di)_mm256_setzero_si256()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_movehdup_ps(__A),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_movehdup_ps(__A),
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_movehdup_ps(__A),
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_movehdup_ps(__A),
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_moveldup_ps(__A),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_moveldup_ps(__A),
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_moveldup_ps(__A),
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_moveldup_ps(__A),
(__v8sf)_mm256_setzero_ps());
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index e118dda..f28a037 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -159,7 +159,8 @@ void HLSLExternalSemaSource::defineHLSLMatrixAlias() {
SourceLocation(), ColsParam));
TemplateParams.emplace_back(ColsParam);
- const unsigned MaxMatDim = 4;
+ const unsigned MaxMatDim = SemaPtr->getLangOpts().MaxMatrixDimension;
+
auto *MaxRow = IntegerLiteral::Create(
AST, llvm::APInt(AST.getIntWidth(AST.IntTy), MaxMatDim), AST.IntTy,
SourceLocation());
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index ddf17d8..5360f8a 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -2927,6 +2927,8 @@ bool CastOperation::CheckHLSLCStyleCast(CheckedConversionKind CCK) {
SrcExpr = Self.ImpCastExprToType(
SrcExpr.get(), Self.Context.getArrayParameterType(SrcTy),
CK_HLSLArrayRValue, VK_PRValue, nullptr, CCK);
+ else
+ SrcExpr = Self.DefaultLvalueConversion(SrcExpr.get());
Kind = CK_HLSLElementwiseCast;
return true;
}
@@ -2935,6 +2937,7 @@ bool CastOperation::CheckHLSLCStyleCast(CheckedConversionKind CCK) {
// If the relative order of this and the HLSLElementWise cast checks
// are changed, it might change which cast handles what in a few cases
if (Self.HLSL().CanPerformAggregateSplatCast(SrcExpr.get(), DestType)) {
+ SrcExpr = Self.DefaultLvalueConversion(SrcExpr.get());
const VectorType *VT = SrcTy->getAs<VectorType>();
// change splat from vec1 case to splat from scalar
if (VT && VT->getNumElements() == 1)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 4f409ca..652527a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -16239,9 +16239,9 @@ getAndVerifyMatrixDimension(Expr *Expr, StringRef Name, Sema &S) {
return {};
}
uint64_t Dim = Value->getZExtValue();
- if (!ConstantMatrixType::isDimensionValid(Dim)) {
+ if (Dim == 0 || Dim > S.Context.getLangOpts().MaxMatrixDimension) {
S.Diag(Expr->getBeginLoc(), diag::err_builtin_matrix_invalid_dimension)
- << Name << ConstantMatrixType::getMaxElementsPerDimension();
+ << Name << S.Context.getLangOpts().MaxMatrixDimension;
return {};
}
return Dim;
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 638904d..7c1fb12 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2517,12 +2517,18 @@ QualType Sema::BuildMatrixType(QualType ElementTy, Expr *NumRows, Expr *NumCols,
Diag(AttrLoc, diag::err_attribute_zero_size) << "matrix" << ColRange;
return QualType();
}
- if (!ConstantMatrixType::isDimensionValid(MatrixRows)) {
+ if (MatrixRows > Context.getLangOpts().MaxMatrixDimension &&
+ MatrixColumns > Context.getLangOpts().MaxMatrixDimension) {
+ Diag(AttrLoc, diag::err_attribute_size_too_large)
+ << RowRange << ColRange << "matrix row and column";
+ return QualType();
+ }
+ if (MatrixRows > Context.getLangOpts().MaxMatrixDimension) {
Diag(AttrLoc, diag::err_attribute_size_too_large)
<< RowRange << "matrix row";
return QualType();
}
- if (!ConstantMatrixType::isDimensionValid(MatrixColumns)) {
+ if (MatrixColumns > Context.getLangOpts().MaxMatrixDimension) {
Diag(AttrLoc, diag::err_attribute_size_too_large)
<< ColRange << "matrix column";
return QualType();