aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp27
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp8
-rw-r--r--clang/lib/CodeGen/CGHLSLBuiltins.cpp32
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp6
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp81
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/ARM.cpp249
6 files changed, 178 insertions, 225 deletions
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 6af8066..ca579c9 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -345,7 +345,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {
if (Loc.isInvalid())
return;
- CurLoc = CGM.getContext().getSourceManager().getExpansionLoc(Loc);
+ CurLoc = CGM.getContext().getSourceManager().getFileLoc(Loc);
// If we've changed files in the middle of a lexical scope go ahead
// and create a new lexical scope with file node if it's different
@@ -572,7 +572,7 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
FileName = TheCU->getFile()->getFilename();
CSInfo = TheCU->getFile()->getChecksum();
} else {
- PresumedLoc PLoc = SM.getPresumedLoc(Loc);
+ PresumedLoc PLoc = SM.getPresumedLoc(SM.getFileLoc(Loc));
FileName = PLoc.getFilename();
if (FileName.empty()) {
@@ -599,7 +599,8 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
}
- return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc)));
+ return createFile(FileName, CSInfo,
+ getSource(SM, SM.getFileID(SM.getFileLoc(Loc))));
}
llvm::DIFile *CGDebugInfo::createFile(
@@ -654,7 +655,7 @@ unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) {
if (Loc.isInvalid())
return 0;
SourceManager &SM = CGM.getContext().getSourceManager();
- return SM.getPresumedLoc(Loc).getLine();
+ return SM.getPresumedLoc(SM.getFileLoc(Loc)).getLine();
}
unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) {
@@ -666,7 +667,8 @@ unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) {
if (Loc.isInvalid() && CurLoc.isInvalid())
return 0;
SourceManager &SM = CGM.getContext().getSourceManager();
- PresumedLoc PLoc = SM.getPresumedLoc(Loc.isValid() ? Loc : CurLoc);
+ PresumedLoc PLoc =
+ SM.getPresumedLoc(Loc.isValid() ? SM.getFileLoc(Loc) : CurLoc);
return PLoc.isValid() ? PLoc.getColumn() : 0;
}
@@ -1174,14 +1176,16 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
}
llvm::DIType *CGDebugInfo::CreateType(const BitIntType *Ty) {
-
- StringRef Name = Ty->isUnsigned() ? "unsigned _BitInt" : "_BitInt";
+ SmallString<32> Name;
+ llvm::raw_svector_ostream OS(Name);
+ OS << (Ty->isUnsigned() ? "unsigned _BitInt(" : "_BitInt(")
+ << Ty->getNumBits() << ")";
llvm::dwarf::TypeKind Encoding = Ty->isUnsigned()
? llvm::dwarf::DW_ATE_unsigned
: llvm::dwarf::DW_ATE_signed;
-
return DBuilder.createBasicType(Name, CGM.getContext().getTypeSize(Ty),
- Encoding);
+ Encoding, llvm::DINode::FlagZero, 0,
+ Ty->getNumBits());
}
llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) {
@@ -5000,7 +5004,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
// Update our current location
setLocation(Loc);
- if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty())
+ if (CurLoc.isInvalid() || LexicalBlockStack.empty())
return;
llvm::MDNode *Scope = LexicalBlockStack.back();
@@ -6276,7 +6280,8 @@ void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
void CGDebugInfo::AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,
const StringLiteral *S) {
SourceLocation Loc = S->getStrTokenLoc(0);
- PresumedLoc PLoc = CGM.getContext().getSourceManager().getPresumedLoc(Loc);
+ SourceManager &SM = CGM.getContext().getSourceManager();
+ PresumedLoc PLoc = SM.getPresumedLoc(SM.getFileLoc(Loc));
if (!PLoc.isValid())
return;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 301d577..01f2161 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2297,9 +2297,13 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
CGM.getABIInfo().getOptimalVectorMemoryType(VecTy, getLangOpts());
if (!ClangVecTy->isPackedVectorBoolType(getContext()) &&
VecTy != NewVecTy) {
- SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1);
+ SmallVector<int, 16> Mask(NewVecTy->getNumElements(),
+ VecTy->getNumElements());
std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0);
- Value = Builder.CreateShuffleVector(Value, Mask, "extractVec");
+ // Use undef instead of poison for the padding lanes, to make sure no
+ // padding bits are poisoned, which may break coercion.
+ Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
+ Mask, "extractVec");
SrcTy = NewVecTy;
}
if (Addr.getElementType() != SrcTy)
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 384bd59..fbf4a57 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -206,7 +206,7 @@ static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
}
}
-// Return wave active sum that corresponds to the QT scalar type
+// Return wave active max that corresponds to the QT scalar type
static Intrinsic::ID getWaveActiveMaxIntrinsic(llvm::Triple::ArchType Arch,
CGHLSLRuntime &RT, QualType QT) {
switch (Arch) {
@@ -225,6 +225,25 @@ static Intrinsic::ID getWaveActiveMaxIntrinsic(llvm::Triple::ArchType Arch,
}
}
+// Return wave active min that corresponds to the QT scalar type
+static Intrinsic::ID getWaveActiveMinIntrinsic(llvm::Triple::ArchType Arch,
+ CGHLSLRuntime &RT, QualType QT) {
+ switch (Arch) {
+ case llvm::Triple::spirv:
+ if (QT->isUnsignedIntegerType())
+ return Intrinsic::spv_wave_reduce_umin;
+ return Intrinsic::spv_wave_reduce_min;
+ case llvm::Triple::dxil: {
+ if (QT->isUnsignedIntegerType())
+ return Intrinsic::dx_wave_reduce_umin;
+ return Intrinsic::dx_wave_reduce_min;
+ }
+ default:
+ llvm_unreachable("Intrinsic WaveActiveMin"
+ " not supported by target architecture");
+ }
+}
+
// Returns the mangled name for a builtin function that the SPIR-V backend
// will expand into a spec Constant.
static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType,
@@ -742,6 +761,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
&CGM.getModule(), IID, {OpExpr->getType()}),
ArrayRef{OpExpr}, "hlsl.wave.active.max");
}
+ case Builtin::BI__builtin_hlsl_wave_active_min: {
+ // Due to the use of variadic arguments, explicitly retreive argument
+ Value *OpExpr = EmitScalarExpr(E->getArg(0));
+ Intrinsic::ID IID = getWaveActiveMinIntrinsic(
+ getTarget().getTriple().getArch(), CGM.getHLSLRuntime(),
+ E->getArg(0)->getType());
+
+ return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+ &CGM.getModule(), IID, {OpExpr->getType()}),
+ ArrayRef{OpExpr}, "hlsl.wave.active.min");
+ }
case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
// We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
// defined in SPIRVBuiltins.td. So instead we manually get the matching name
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 66fea92..121de42 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3731,6 +3731,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
DestructorsFlag = 0x8,
PriorityFlag = 0x20,
DetachableFlag = 0x40,
+ FreeAgentFlag = 0x80,
};
unsigned Flags = Data.Tied ? TiedFlag : 0;
bool NeedsCleanup = false;
@@ -3740,6 +3741,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
if (NeedsCleanup)
Flags = Flags | DestructorsFlag;
}
+ if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
+ OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
+ if (Kind == OMPC_THREADSET_omp_pool)
+ Flags = Flags | FreeAgentFlag;
+ }
if (Data.Priority.getInt())
Flags = Flags | PriorityFlag;
if (D.hasClausesOfKind<OMPDetachClause>())
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index f49a5af..9eab709 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -647,8 +647,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
llvm::Type *ResultType = ConvertType(E->getType());
llvm::Value *Src = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
- return Builder.CreateCall(F, { Src });
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType});
+ return Builder.CreateCall(F, {Src});
}
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32:
case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: {
@@ -1139,6 +1139,83 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
return emitAMDGCNImageOverloadedReturnType(
*this, E, Intrinsic::amdgcn_image_sample_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_1d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_2d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_3d, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_cube, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32:
+ case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false);
+ case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 60f9b86..15fa78d 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -1193,14 +1193,22 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
@@ -1243,27 +1251,43 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
- NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
@@ -7067,127 +7091,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::bitreverse;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
}
- case NEON::BI__builtin_neon_vaddv_u8:
- // FIXME: These are handled by the AArch64 scalar code.
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddv_s8: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vaddv_u16:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddv_s16: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vaddvq_u8:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddvq_s8: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vaddvq_u16:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddvq_s16: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_u8: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_u16: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_u8: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_u16: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_s8: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_s16: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_s8: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_s16: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
case NEON::BI__builtin_neon_vmaxv_f16: {
Int = Intrinsic::aarch64_neon_fmaxv;
Ty = HalfTy;
@@ -7206,78 +7109,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
return Builder.CreateTrunc(Ops[0], HalfTy);
}
- case NEON::BI__builtin_neon_vminv_u8: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminv_u16: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminvq_u8: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminvq_u16: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminv_s8: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminv_s16: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminvq_s8: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminvq_s16: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
case NEON::BI__builtin_neon_vminv_f16: {
Int = Intrinsic::aarch64_neon_fminv;
Ty = HalfTy;