aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorMomchil Velikov <momchil.velikov@arm.com>2023-11-13 16:01:07 +0000
committerGitHub <noreply@github.com>2023-11-13 16:01:07 +0000
commit96ef623a7525f71184b6111f8389e45260ef13ff (patch)
tree32a4250611d2c3445cb66f89de50610d6d51d42a /clang/lib/CodeGen
parentdc5bdcbcaaadf3c479a197f9a183c3744f02beb9 (diff)
downloadllvm-96ef623a7525f71184b6111f8389e45260ef13ff.zip
llvm-96ef623a7525f71184b6111f8389e45260ef13ff.tar.gz
llvm-96ef623a7525f71184b6111f8389e45260ef13ff.tar.bz2
[AArch64] Cast predicate operand of SVE gather loads/scater stores to the parameter type of the intrinsic (NFC) (#71289)
When emitting LLVM IR for gather loads/scatter stores, the predicate parameter is cast to a type that depends on the loaded, resp. stored type. That's correct for operation where we have a predicate per lane, however it is not correct for quadword loads and stores (`LD1Q`, `ST1Q`) where the predicate is per 128-bit chunk, independent from the ACLE intrinsic type. This can be universally handled by cast to the corresponding parameter type of the intrinsic. The intrinsic itself should be defined in a way that enforces relations between parameter types.
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp24
1 files changed, 15 insertions, 9 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 24fcf23..09309a3 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9482,13 +9482,6 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
auto *OverloadedTy =
llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
- // At the ACLE level there's only one predicate type, svbool_t, which is
- // mapped to <n x 16 x i1>. However, this might be incompatible with the
- // actual type being loaded. For example, when loading doubles (i64) the
- // predicated should be <n x 2 x i1> instead. At the IR level the type of
- // the predicate and the data being loaded must match. Cast accordingly.
- Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
-
Function *F = nullptr;
if (Ops[1]->getType()->isVectorTy())
// This is the "vector base, scalar offset" case. In order to uniquely
@@ -9502,6 +9495,16 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
// intrinsic.
F = CGM.getIntrinsic(IntID, OverloadedTy);
+ // At the ACLE level there's only one predicate type, svbool_t, which is
+ // mapped to <n x 16 x i1>. However, this might be incompatible with the
+ // actual type being loaded. For example, when loading doubles (i64) the
+ // predicate should be <n x 2 x i1> instead. At the IR level the type of
+ // the predicate and the data being loaded must match. Cast to the type
+ // expected by the intrinsic. The intrinsic itself should be defined in
+ // a way than enforces relations between parameter types.
+ Ops[0] = EmitSVEPredicateCast(
+ Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
+
// Pass 0 when the offset is missing. This can only be applied when using
// the "vector base" addressing mode for which ACLE allows no offset. The
// corresponding LLVM IR always requires an offset.
@@ -9566,8 +9569,11 @@ Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
// mapped to <n x 16 x i1>. However, this might be incompatible with the
// actual type being stored. For example, when storing doubles (i64) the
// predicated should be <n x 2 x i1> instead. At the IR level the type of
- // the predicate and the data being stored must match. Cast accordingly.
- Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
+ // the predicate and the data being stored must match. Cast to the type
+ // expected by the intrinsic. The intrinsic itself should be defined in
+ // a way that enforces relations between parameter types.
+ Ops[1] = EmitSVEPredicateCast(
+ Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
// For "vector base, scalar index" scale the index so that it becomes a
// scalar offset.