aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/TargetBuiltins/ARM.cpp')
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/ARM.cpp249
1 files changed, 40 insertions, 209 deletions
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 60f9b86..15fa78d 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -1193,14 +1193,22 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddv_s16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_s32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_s8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddv_u8, vector_reduce_add, Add1ArgType),
NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vaddvq_s16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_s8, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u16, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u32, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vaddvq_u8, vector_reduce_add, Add1ArgType),
NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
@@ -1243,27 +1251,43 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxv_s16, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_s32, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_s8, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxv_u16, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxv_u32, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxv_u8, vector_reduce_umax, Add1ArgType),
NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
- NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+ NEONMAP1(vmaxvq_s16, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_s32, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_s8, vector_reduce_smax, Add1ArgType),
+ NEONMAP1(vmaxvq_u16, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_u32, vector_reduce_umax, Add1ArgType),
+ NEONMAP1(vmaxvq_u8, vector_reduce_umax, Add1ArgType),
NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
- NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminv_s16, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_s32, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_s8, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminv_u16, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminv_u32, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminv_u8, vector_reduce_umin, Add1ArgType),
NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
- NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+ NEONMAP1(vminvq_s16, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_s32, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_s8, vector_reduce_smin, Add1ArgType),
+ NEONMAP1(vminvq_u16, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_u32, vector_reduce_umin, Add1ArgType),
+ NEONMAP1(vminvq_u8, vector_reduce_umin, Add1ArgType),
NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
- NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
- NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+ NEONMAP1(vpaddd_s64, vector_reduce_add, Add1ArgType),
+ NEONMAP1(vpaddd_u64, vector_reduce_add, Add1ArgType),
NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
@@ -7067,127 +7091,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::bitreverse;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
}
- case NEON::BI__builtin_neon_vaddv_u8:
- // FIXME: These are handled by the AArch64 scalar code.
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddv_s8: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vaddv_u16:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddv_s16: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vaddvq_u8:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddvq_s8: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vaddvq_u16:
- usgn = true;
- [[fallthrough]];
- case NEON::BI__builtin_neon_vaddvq_s16: {
- Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_u8: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_u16: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_u8: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_u16: {
- Int = Intrinsic::aarch64_neon_umaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_s8: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxv_s16: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_s8: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vmaxvq_s16: {
- Int = Intrinsic::aarch64_neon_smaxv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
case NEON::BI__builtin_neon_vmaxv_f16: {
Int = Intrinsic::aarch64_neon_fmaxv;
Ty = HalfTy;
@@ -7206,78 +7109,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
return Builder.CreateTrunc(Ops[0], HalfTy);
}
- case NEON::BI__builtin_neon_vminv_u8: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminv_u16: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminvq_u8: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminvq_u16: {
- Int = Intrinsic::aarch64_neon_uminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminv_s8: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminv_s16: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 4);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
- case NEON::BI__builtin_neon_vminvq_s8: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int8Ty, 16);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int8Ty);
- }
- case NEON::BI__builtin_neon_vminvq_s16: {
- Int = Intrinsic::aarch64_neon_sminv;
- Ty = Int32Ty;
- VTy = llvm::FixedVectorType::get(Int16Ty, 8);
- llvm::Type *Tys[2] = { Ty, VTy };
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
- }
case NEON::BI__builtin_neon_vminv_f16: {
Int = Intrinsic::aarch64_neon_fminv;
Ty = HalfTy;