aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/build-ci-container-tooling.yml2
-rw-r--r--.github/workflows/containers/github-action-ci-tooling/Dockerfile7
-rw-r--r--clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp3
-rw-r--r--clang-tools-extra/docs/ReleaseNotes.rst5
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp10
-rw-r--r--clang/include/clang/Driver/Distro.h3
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp60
-rw-r--r--clang/lib/Driver/Distro.cpp1
-rw-r--r--clang/lib/Sema/SemaConcept.cpp3
-rw-r--r--clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp4
-rw-r--r--clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp2
-rw-r--r--clang/test/CXX/temp/temp.param/p10-2a.cpp4
-rw-r--r--clang/test/SemaHLSL/BuiltIns/Buffers.hlsl6
-rw-r--r--clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl6
-rw-r--r--clang/test/SemaTemplate/concepts-recursive-inst.cpp4
-rw-r--r--clang/test/SemaTemplate/concepts.cpp8
-rw-r--r--llvm/include/llvm/ADT/PointerUnion.h8
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h8
-rw-r--r--llvm/include/llvm/ADT/Sequence.h4
-rw-r--r--llvm/include/llvm/IR/ConstantFPRange.h8
-rw-r--r--llvm/lib/IR/ConstantFPRange.cpp70
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp86
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h6
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp16
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp40
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-add.ll33
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll78
-rw-r--r--llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll182
-rw-r--r--llvm/unittests/IR/ConstantFPRangeTest.cpp186
-rw-r--r--llvm/unittests/Support/JobserverTest.cpp1
-rw-r--r--llvm/utils/lit/lit/llvm/config.py3
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp11
-rw-r--r--mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir14
-rw-r--r--offload/libomptarget/omptarget.cpp2
-rw-r--r--utils/bazel/llvm-project-overlay/clang/BUILD.bazel2
36 files changed, 668 insertions, 221 deletions
diff --git a/.github/workflows/build-ci-container-tooling.yml b/.github/workflows/build-ci-container-tooling.yml
index 42db470..c77c7861 100644
--- a/.github/workflows/build-ci-container-tooling.yml
+++ b/.github/workflows/build-ci-container-tooling.yml
@@ -72,7 +72,7 @@ jobs:
- name: Test Container
run: |
# Use --pull=never to ensure we are testing the just built image.
- podman run --pull=never --rm -it ${{ steps.vars.outputs.container-name-format-tag }} /usr/bin/bash -x -c 'cd $HOME && clang-format --version | grep version && black --version | grep black'
+ podman run --pull=never --rm -it ${{ steps.vars.outputs.container-name-format-tag }} /usr/bin/bash -x -c 'cd $HOME && clang-format --version | grep version && git-clang-format -h | grep usage && black --version | grep black'
podman run --pull=never --rm -it ${{ steps.vars.outputs.container-name-lint-tag }} /usr/bin/bash -x -c 'cd $HOME && clang-tidy --version | grep version && clang-tidy-diff.py -h | grep usage'
push-ci-container:
diff --git a/.github/workflows/containers/github-action-ci-tooling/Dockerfile b/.github/workflows/containers/github-action-ci-tooling/Dockerfile
index 7a5d8a3..7d64562 100644
--- a/.github/workflows/containers/github-action-ci-tooling/Dockerfile
+++ b/.github/workflows/containers/github-action-ci-tooling/Dockerfile
@@ -10,7 +10,8 @@ RUN apt-get update && \
tar -xvJf llvm.tar.xz -C /llvm-extract \
# Only unpack these tools to save space on Github runner.
LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-tidy \
- LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format && \
+ LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format \
+ LLVM-${LLVM_VERSION}-Linux-X64/bin/git-clang-format && \
rm llvm.tar.xz
@@ -35,7 +36,9 @@ RUN apt-get update && \
FROM base AS ci-container-code-format
ARG LLVM_VERSION
-COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format ${LLVM_SYSROOT}/bin/clang-format
+COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format \
+ /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/git-clang-format \
+ ${LLVM_SYSROOT}/bin/
ENV PATH=${LLVM_SYSROOT}/bin:${PATH}
diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp
index 3fb8560..bfdf9cb 100644
--- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp
@@ -89,7 +89,8 @@ static void fixGenericExprCastToBool(DiagnosticBuilder &Diag,
const Expr *SubExpr = Cast->getSubExpr();
- bool NeedInnerParens = utils::fixit::areParensNeededForStatement(*SubExpr);
+ bool NeedInnerParens =
+ utils::fixit::areParensNeededForStatement(*SubExpr->IgnoreImpCasts());
bool NeedOuterParens =
Parent != nullptr && utils::fixit::areParensNeededForStatement(*Parent);
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 216d3f5..33cc401 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -402,6 +402,11 @@ Changes in existing checks
declarations and macros in system headers. The documentation is also improved
to differentiate the general options from the specific ones.
+- Improved :doc:`readability-implicit-bool-conversion
+ <clang-tidy/checks/readability/implicit-bool-conversion>` check by correctly
+ adding parentheses when the inner expression are implicitly converted
+ multiple times.
+
- Improved :doc:`readability-qualified-auto
<clang-tidy/checks/readability/qualified-auto>` check by adding the option
`IgnoreAliasing`, that allows not looking at underlying types of type aliases.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp
index f3e8bf0..a0e1fd3 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp
@@ -547,3 +547,13 @@ namespace PR71848 {
// CHECK-FIXES: return static_cast<int>( foo );
}
}
+
+namespace PR161318 {
+ int AddParenOutsideOfCompoundAssignOp() {
+ int val = -1;
+ while(val >>= 7) {
+ // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: implicit conversion 'int' -> 'bool' [readability-implicit-bool-conversion]
+ // CHECK-FIXES: while((val >>= 7) != 0) {
+ }
+ }
+}
diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h
index 008de0d..a515cbf 100644
--- a/clang/include/clang/Driver/Distro.h
+++ b/clang/include/clang/Driver/Distro.h
@@ -79,6 +79,7 @@ public:
UbuntuOracular,
UbuntuPlucky,
UbuntuQuesting,
+ UbuntuResolute,
UnknownDistro
};
@@ -130,7 +131,7 @@ public:
}
bool IsUbuntu() const {
- return DistroVal >= UbuntuMaverick && DistroVal <= UbuntuQuesting;
+ return DistroVal >= UbuntuMaverick && DistroVal <= UbuntuResolute;
}
bool IsAlpineLinux() const { return DistroVal == AlpineLinux; }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 922d679..3811fb0 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1633,8 +1633,8 @@ static bool interp__builtin_elementwise_countzeroes(InterpState &S,
const InterpFrame *Frame,
const CallExpr *Call,
unsigned BuiltinID) {
- const bool HasZeroArg = Call->getNumArgs() == 2;
- const bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
+ bool HasZeroArg = Call->getNumArgs() == 2;
+ bool IsCTTZ = BuiltinID == Builtin::BI__builtin_elementwise_ctzg;
assert(Call->getNumArgs() == 1 || HasZeroArg);
if (Call->getArg(0)->getType()->isIntegerType()) {
PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
@@ -2447,18 +2447,18 @@ interp__builtin_x86_pack(InterpState &S, CodePtr, const CallExpr *E,
const Pointer &Dst = S.Stk.peek<Pointer>();
const ASTContext &ASTCtx = S.getASTContext();
- const unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType());
- const unsigned LHSVecLen = VT0->getNumElements();
- const unsigned SrcPerLane = 128 / SrcBits;
- const unsigned Lanes = LHSVecLen * SrcBits / 128;
+ unsigned SrcBits = ASTCtx.getIntWidth(VT0->getElementType());
+ unsigned LHSVecLen = VT0->getNumElements();
+ unsigned SrcPerLane = 128 / SrcBits;
+ unsigned Lanes = LHSVecLen * SrcBits / 128;
PrimType SrcT = *S.getContext().classify(VT0->getElementType());
PrimType DstT = *S.getContext().classify(getElemType(Dst));
- const bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType();
+ bool IsUnsigend = getElemType(Dst)->isUnsignedIntegerType();
for (unsigned Lane = 0; Lane != Lanes; ++Lane) {
- const unsigned BaseSrc = Lane * SrcPerLane;
- const unsigned BaseDst = Lane * (2 * SrcPerLane);
+ unsigned BaseSrc = Lane * SrcPerLane;
+ unsigned BaseDst = Lane * (2 * SrcPerLane);
for (unsigned I = 0; I != SrcPerLane; ++I) {
INT_TYPE_SWITCH_NO_BOOL(SrcT, {
@@ -2596,9 +2596,9 @@ static bool interp__builtin_elementwise_triop_fp(
FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
llvm::RoundingMode RM = getRoundingMode(FPO);
- const QualType Arg1Type = Call->getArg(0)->getType();
- const QualType Arg2Type = Call->getArg(1)->getType();
- const QualType Arg3Type = Call->getArg(2)->getType();
+ QualType Arg1Type = Call->getArg(0)->getType();
+ QualType Arg2Type = Call->getArg(1)->getType();
+ QualType Arg3Type = Call->getArg(2)->getType();
// Non-vector floating point types.
if (!Arg1Type->isVectorType()) {
@@ -2621,16 +2621,16 @@ static bool interp__builtin_elementwise_triop_fp(
assert(Arg1Type->isVectorType() && Arg2Type->isVectorType() &&
Arg3Type->isVectorType());
- const VectorType *VecT = Arg1Type->castAs<VectorType>();
- const QualType ElemT = VecT->getElementType();
- unsigned NumElems = VecT->getNumElements();
+ const VectorType *VecTy = Arg1Type->castAs<VectorType>();
+ QualType ElemQT = VecTy->getElementType();
+ unsigned NumElems = VecTy->getNumElements();
- assert(ElemT == Arg2Type->castAs<VectorType>()->getElementType() &&
- ElemT == Arg3Type->castAs<VectorType>()->getElementType());
+ assert(ElemQT == Arg2Type->castAs<VectorType>()->getElementType() &&
+ ElemQT == Arg3Type->castAs<VectorType>()->getElementType());
assert(NumElems == Arg2Type->castAs<VectorType>()->getNumElements() &&
NumElems == Arg3Type->castAs<VectorType>()->getNumElements());
- assert(ElemT->isRealFloatingType());
- (void)ElemT;
+ assert(ElemQT->isRealFloatingType());
+ (void)ElemQT;
const Pointer &VZ = S.Stk.pop<Pointer>();
const Pointer &VY = S.Stk.pop<Pointer>();
@@ -2775,7 +2775,7 @@ static bool interp__builtin_elementwise_triop(
}
const auto *VecT = Arg0Type->castAs<VectorType>();
- const PrimType &ElemT = *S.getContext().classify(VecT->getElementType());
+ PrimType ElemT = *S.getContext().classify(VecT->getElementType());
unsigned NumElems = VecT->getNumElements();
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
@@ -2847,9 +2847,9 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
unsigned Lane = static_cast<unsigned>(Index % NumLanes);
unsigned InsertPos = Lane * SubElements;
- PrimType ElemPT = BaseVec.getFieldDesc()->getPrimType();
+ PrimType ElemT = BaseVec.getFieldDesc()->getPrimType();
- TYPE_SWITCH(ElemPT, {
+ TYPE_SWITCH(ElemT, {
for (unsigned I = 0; I != BaseElements; ++I)
Dst.elem<T>(I) = BaseVec.elem<T>(I);
for (unsigned I = 0; I != SubElements; ++I)
@@ -2872,12 +2872,12 @@ static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
const Pointer &Dst = S.Stk.peek<Pointer>();
unsigned DstLen = A.getNumElems();
- const QualType ElemQT = getElemType(A);
- const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ QualType ElemQT = getElemType(A);
+ OptPrimType ElemT = S.getContext().classify(ElemQT);
unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
- INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
for (unsigned I = 0; I != DstLen; ++I) {
APInt ALane = A.elem<T>(I).toAPSInt();
APInt BLane = B.elem<T>(I).toAPSInt();
@@ -2916,13 +2916,13 @@ static bool interp__builtin_vec_ext(InterpState &S, CodePtr OpPC,
unsigned Index =
static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
- PrimType ElemPT = Vec.getFieldDesc()->getPrimType();
+ PrimType ElemT = Vec.getFieldDesc()->getPrimType();
// FIXME(#161685): Replace float+int split with a numeric-only type switch
- if (ElemPT == PT_Float) {
+ if (ElemT == PT_Float) {
S.Stk.push<Floating>(Vec.elem<Floating>(Index));
return true;
}
- INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
APSInt V = Vec.elem<T>(Index).toAPSInt();
pushInteger(S, V, Call->getType());
});
@@ -2947,8 +2947,8 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
unsigned Index =
static_cast<unsigned>(ImmAPS.getZExtValue() & (NumElems - 1));
- PrimType ElemPT = Base.getFieldDesc()->getPrimType();
- INT_TYPE_SWITCH_NO_BOOL(ElemPT, {
+ PrimType ElemT = Base.getFieldDesc()->getPrimType();
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
for (unsigned I = 0; I != NumElems; ++I)
Dst.elem<T>(I) = Base.elem<T>(I);
Dst.elem<T>(Index) = static_cast<T>(ValAPS);
diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp
index 8a5a9fc..838e087 100644
--- a/clang/lib/Driver/Distro.cpp
+++ b/clang/lib/Driver/Distro.cpp
@@ -92,6 +92,7 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) {
.Case("oracular", Distro::UbuntuOracular)
.Case("plucky", Distro::UbuntuPlucky)
.Case("questing", Distro::UbuntuQuesting)
+ .Case("resolute", Distro::UbuntuResolute)
.Default(Distro::UnknownDistro);
return Version;
}
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 9cbd1bd..7c44efd 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -614,8 +614,7 @@ ConstraintSatisfactionChecker::SubstitutionInTemplateArguments(
for (unsigned I = 0, MappedIndex = 0; I < Used.size(); I++) {
TemplateArgument Arg;
if (Used[I])
- Arg = S.Context.getCanonicalTemplateArgument(
- CTAI.SugaredConverted[MappedIndex++]);
+ Arg = CTAI.SugaredConverted[MappedIndex++];
if (I < SubstitutedOuterMost.size()) {
SubstitutedOuterMost[I] = Arg;
Offset = I + 1;
diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp
index af2dce8..5f1243a 100644
--- a/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp
+++ b/clang/test/CXX/expr/expr.prim/expr.prim.req/compound-requirement.cpp
@@ -149,7 +149,7 @@ namespace std_example {
template<typename T> constexpr bool is_same_v<T, T> = true;
template<typename T, typename U> concept same_as = is_same_v<T, U>;
- // expected-note@-1 {{because 'is_same_v<int, typename std_example::T2::inner>' evaluated to false}}
+ // expected-note@-1 {{because 'is_same_v<int, typename T2::inner>' evaluated to false}}
static_assert(C1<int>);
static_assert(C1<int*>);
@@ -160,7 +160,7 @@ namespace std_example {
template<typename T> concept C2 =
requires(T x) {
{*x} -> same_as<typename T::inner>;
- // expected-note@-1{{because 'same_as<int, typename std_example::T2::inner>' evaluated to false}}
+ // expected-note@-1{{because 'same_as<int, typename T2::inner>' evaluated to false}}
// expected-note@-2{{because '*x' would be invalid: indirection requires pointer operand ('int' invalid)}}
};
diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp
index 70a96be..9fc4906 100644
--- a/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp
+++ b/clang/test/CXX/expr/expr.prim/expr.prim.req/nested-requirement.cpp
@@ -27,7 +27,7 @@ using r4i = X<void>::r4<int>; // expected-error{{constraints not satisfied for c
// C++ [expr.prim.req.nested] Examples
namespace std_example {
- template<typename U> concept C1 = sizeof(U) == 1; // expected-note{{because 'sizeof(int) == 1' (4 == 1) evaluated to false}}
+ template<typename U> concept C1 = sizeof(U) == 1; // expected-note{{because 'sizeof(decltype(+t)) == 1' (4 == 1) evaluated to false}}
template<typename T> concept D =
requires (T t) {
requires C1<decltype (+t)>; // expected-note{{because 'decltype(+t)' (aka 'int') does not satisfy 'C1'}}
diff --git a/clang/test/CXX/temp/temp.param/p10-2a.cpp b/clang/test/CXX/temp/temp.param/p10-2a.cpp
index c0406f8..4f192d3 100644
--- a/clang/test/CXX/temp/temp.param/p10-2a.cpp
+++ b/clang/test/CXX/temp/temp.param/p10-2a.cpp
@@ -95,8 +95,8 @@ concept OneOf = (is_same_v<T, Ts> || ...); // #OneOf
// expected-note@#OneOf 3{{because 'is_same_v<int, char[1]>' evaluated to false}}
// expected-note@#OneOf 3{{and 'is_same_v<int, char[2]>' evaluated to false}}
// expected-note@#OneOf {{because 'is_same_v<decltype(nullptr), char>' evaluated to false}}
-// expected-note@#OneOf {{because 'is_same_v<std::nullptr_t, char>' evaluated to false}}
-// expected-note@#OneOf {{and 'is_same_v<std::nullptr_t, int>' evaluated to false}}
+// expected-note@#OneOf {{because 'is_same_v<decltype(nullptr), char>' evaluated to false}}
+// expected-note@#OneOf {{and 'is_same_v<decltype(nullptr), int>' evaluated to false}}
// expected-note@#OneOf {{and 'is_same_v<decltype(nullptr), int>' evaluated to false}}
template<OneOf<char[1], char[2]> T, OneOf<int, long, char> U>
diff --git a/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl b/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl
index 999372c..3f0a37d 100644
--- a/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/Buffers.hlsl
@@ -20,7 +20,7 @@ Buffer<double2> r4;
// expected-error@+4 {{constraints not satisfied for class template 'Buffer'}}
// expected-note@*:* {{template declaration from hidden source: template <typename element_type> requires __is_typed_resource_element_compatible<element_type> class Buffer}}
// expected-note@*:* {{because 'Buffer<int>' does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(hlsl::Buffer<int>)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(Buffer<int>)' evaluated to false}}
Buffer<Buffer<int> > r5;
struct s {
@@ -66,7 +66,7 @@ Buffer<half[4]> r10;
typedef vector<int, 8> int8;
// expected-error@+3 {{constraints not satisfied for class template 'Buffer'}}
// expected-note@*:* {{because 'int8' (aka 'vector<int, 8>') does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<int, 8>)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(int8)' evaluated to false}}
Buffer<int8> r11;
typedef int MyInt;
@@ -91,7 +91,7 @@ Buffer<numbers> r15;
// expected-error@+3 {{constraints not satisfied for class template 'Buffer'}}
// expected-note@*:* {{because 'double3' (aka 'vector<double, 3>') does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<double, 3>)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(double3)' evaluated to false}}
Buffer<double3> r16;
diff --git a/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl b/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl
index b33f2af..aa36c48 100644
--- a/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl
@@ -20,7 +20,7 @@ RWBuffer<double2> r4;
// expected-error@+4 {{constraints not satisfied for class template 'RWBuffer'}}
// expected-note@*:* {{template declaration from hidden source: template <typename element_type> requires __is_typed_resource_element_compatible<element_type> class RWBuffer}}
// expected-note@*:* {{because 'RWBuffer<int>' does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(hlsl::RWBuffer<int>)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(RWBuffer<int>)' evaluated to false}}
RWBuffer<RWBuffer<int> > r5;
struct s {
@@ -66,7 +66,7 @@ RWBuffer<half[4]> r10;
typedef vector<int, 8> int8;
// expected-error@+3 {{constraints not satisfied for class template 'RWBuffer'}}
// expected-note@*:* {{because 'int8' (aka 'vector<int, 8>') does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<int, 8>)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(int8)' evaluated to false}}
RWBuffer<int8> r11;
typedef int MyInt;
@@ -91,7 +91,7 @@ RWBuffer<numbers> r15;
// expected-error@+3 {{constraints not satisfied for class template 'RWBuffer'}}
// expected-note@*:* {{because 'double3' (aka 'vector<double, 3>') does not satisfy '__is_typed_resource_element_compatible'}}
-// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(vector<double, 3>)' evaluated to false}}
+// expected-note@*:* {{because '__builtin_hlsl_is_typed_resource_element_compatible(double3)' evaluated to false}}
RWBuffer<double3> r16;
diff --git a/clang/test/SemaTemplate/concepts-recursive-inst.cpp b/clang/test/SemaTemplate/concepts-recursive-inst.cpp
index 73dce93..d36c6a8 100644
--- a/clang/test/SemaTemplate/concepts-recursive-inst.cpp
+++ b/clang/test/SemaTemplate/concepts-recursive-inst.cpp
@@ -68,8 +68,8 @@ struct my_range{
void baz() {
auto it = begin(rng); // #BEGIN_CALL
// expected-error-re@#INF_REQ {{satisfaction of constraint {{.*}} depends on itself}}
-// expected-note@#INF_BEGIN {{while checking the satisfaction of concept 'Inf<DirectRecursiveCheck::my_range>' requested here}}
-// expected-note@#INF_BEGIN_EXPR {{while checking constraint satisfaction for template 'begin<DirectRecursiveCheck::my_range>' required here}}
+// expected-note@#INF_BEGIN {{while checking the satisfaction of concept 'Inf<struct my_range>' requested here}}
+// expected-note@#INF_BEGIN_EXPR {{while checking constraint satisfaction for template 'begin<struct my_range>' required here}}
// expected-note@#INF_BEGIN_EXPR {{while substituting deduced template arguments into function template 'begin'}}
// expected-note@#INF_BEGIN_EXPR {{in instantiation of requirement here}}
// expected-note@#INF_REQ {{while substituting template arguments into constraint expression here}}
diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
index 3fbe7c0..ee2bb8d 100644
--- a/clang/test/SemaTemplate/concepts.cpp
+++ b/clang/test/SemaTemplate/concepts.cpp
@@ -833,13 +833,13 @@ struct Parent {
static_assert(Parent<void>::TakesUnary<int, 0>::i == 0);
// expected-error@+3{{constraints not satisfied for class template 'TakesUnary'}}
// expected-note@#UNARY{{because 'decltype(0ULL)' (aka 'unsigned long long') does not satisfy 'C'}}
-// expected-note@#61777_C{{because 'sizeof(unsigned long long) == 4' (8 == 4) evaluated to false}}
+// expected-note@#61777_C{{because 'sizeof(decltype(0ULL)) == 4' (8 == 4) evaluated to false}}
static_assert(Parent<void>::TakesUnary<int, 0uLL>::i == 0);
static_assert(Parent<int>::TakesBinary<int, 0>::i == 0);
// expected-error@+3{{constraints not satisfied for class template 'TakesBinary'}}
// expected-note@#BINARY{{because 'C2<decltype(0ULL), int>' evaluated to false}}
-// expected-note@#61777_C2{{because 'sizeof(unsigned long long) == sizeof(int)' (8 == 4) evaluated to false}}
+// expected-note@#61777_C2{{because 'sizeof(decltype(0ULL)) == sizeof(int)' (8 == 4) evaluated to false}}
static_assert(Parent<int>::TakesBinary<int, 0ULL>::i == 0);
}
@@ -1329,8 +1329,8 @@ static_assert(__cpp17_iterator<not_move_constructible>); \
// expected-error {{static assertion failed}} \
// expected-note {{because 'not_move_constructible' does not satisfy '__cpp17_iterator'}} \
// expected-note@#__cpp17_copy_constructible {{because 'not_move_constructible' does not satisfy '__cpp17_copy_constructible'}} \
-// expected-note@#__cpp17_move_constructible {{because 'parameter_mapping_regressions::case3::not_move_constructible' does not satisfy '__cpp17_move_constructible'}} \
-// expected-note@#is_move_constructible_v {{because 'is_move_constructible_v<parameter_mapping_regressions::case3::not_move_constructible>' evaluated to false}}
+// expected-note@#__cpp17_move_constructible {{because 'not_move_constructible' does not satisfy '__cpp17_move_constructible'}} \
+// expected-note@#is_move_constructible_v {{because 'is_move_constructible_v<not_move_constructible>' evaluated to false}}
}
namespace case4 {
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h
index 7b66177..d9087dd1 100644
--- a/llvm/include/llvm/ADT/PointerUnion.h
+++ b/llvm/include/llvm/ADT/PointerUnion.h
@@ -38,11 +38,6 @@ namespace pointer_union_detail {
return std::min<int>({PointerLikeTypeTraits<Ts>::NumLowBitsAvailable...});
}
- /// Find the first type in a list of types.
- template <typename T, typename...> struct GetFirstType {
- using type = T;
- };
-
/// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
/// for the template arguments.
template <typename ...PTs> class PointerUnionUIntTraits {
@@ -264,8 +259,7 @@ struct PointerLikeTypeTraits<PointerUnion<PTs...>> {
// Teach DenseMap how to use PointerUnions as keys.
template <typename ...PTs> struct DenseMapInfo<PointerUnion<PTs...>> {
using Union = PointerUnion<PTs...>;
- using FirstInfo =
- DenseMapInfo<typename pointer_union_detail::GetFirstType<PTs...>::type>;
+ using FirstInfo = DenseMapInfo<TypeAtIndex<0, PTs...>>;
static inline Union getEmptyKey() { return Union(FirstInfo::getEmptyKey()); }
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 5b20d6bd..658f262 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -161,12 +161,10 @@ using TypeAtIndex = std::tuple_element_t<I, std::tuple<Ts...>>;
/// Helper which adds two underlying types of enumeration type.
/// Implicit conversion to a common type is accepted.
template <typename EnumTy1, typename EnumTy2,
- typename UT1 = std::enable_if_t<std::is_enum<EnumTy1>::value,
- std::underlying_type_t<EnumTy1>>,
- typename UT2 = std::enable_if_t<std::is_enum<EnumTy2>::value,
- std::underlying_type_t<EnumTy2>>>
+ typename = std::enable_if_t<std::is_enum_v<EnumTy1> &&
+ std::is_enum_v<EnumTy2>>>
constexpr auto addEnumValues(EnumTy1 LHS, EnumTy2 RHS) {
- return static_cast<UT1>(LHS) + static_cast<UT2>(RHS);
+ return llvm::to_underlying(LHS) + llvm::to_underlying(RHS);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h
index ce969ef..ae446df 100644
--- a/llvm/include/llvm/ADT/Sequence.h
+++ b/llvm/include/llvm/ADT/Sequence.h
@@ -86,6 +86,7 @@
#include <type_traits> // std::is_integral, std::is_enum, std::underlying_type,
// std::enable_if
+#include "llvm/ADT/STLForwardCompat.h" // llvm::to_underlying
#include "llvm/Support/MathExtras.h" // AddOverflow / SubOverflow
namespace llvm {
@@ -139,8 +140,7 @@ struct CheckedInt {
template <typename Enum,
std::enable_if_t<std::is_enum<Enum>::value, bool> = 0>
static CheckedInt from(Enum FromValue) {
- using type = std::underlying_type_t<Enum>;
- return from<type>(static_cast<type>(FromValue));
+ return from(llvm::to_underlying(FromValue));
}
// Equality
diff --git a/llvm/include/llvm/IR/ConstantFPRange.h b/llvm/include/llvm/IR/ConstantFPRange.h
index d47f6c0..39dc7c1 100644
--- a/llvm/include/llvm/IR/ConstantFPRange.h
+++ b/llvm/include/llvm/IR/ConstantFPRange.h
@@ -222,6 +222,14 @@ public:
LLVM_ABI ConstantFPRange
cast(const fltSemantics &DstSem,
APFloat::roundingMode RM = APFloat::rmNearestTiesToEven) const;
+
+ /// Return a new range representing the possible values resulting
+ /// from an addition of a value in this range and a value in \p Other.
+ LLVM_ABI ConstantFPRange add(const ConstantFPRange &Other) const;
+
+ /// Return a new range representing the possible values resulting
+ /// from a subtraction of a value in this range and a value in \p Other.
+ LLVM_ABI ConstantFPRange sub(const ConstantFPRange &Other) const;
};
inline raw_ostream &operator<<(raw_ostream &OS, const ConstantFPRange &CR) {
diff --git a/llvm/lib/IR/ConstantFPRange.cpp b/llvm/lib/IR/ConstantFPRange.cpp
index 070e833..51d2e21 100644
--- a/llvm/lib/IR/ConstantFPRange.cpp
+++ b/llvm/lib/IR/ConstantFPRange.cpp
@@ -414,15 +414,31 @@ ConstantFPRange ConstantFPRange::negate() const {
return ConstantFPRange(-Upper, -Lower, MayBeQNaN, MayBeSNaN);
}
+/// Return true if the finite part is not empty after removing infinities.
+static bool removeInf(APFloat &Lower, APFloat &Upper, bool &HasPosInf,
+ bool &HasNegInf) {
+ assert(strictCompare(Lower, Upper) != APFloat::cmpGreaterThan &&
+ "Non-NaN part is empty.");
+ auto &Sem = Lower.getSemantics();
+ if (Lower.isNegInfinity()) {
+ Lower = APFloat::getLargest(Sem, /*Negative=*/true);
+ HasNegInf = true;
+ }
+ if (Upper.isPosInfinity()) {
+ Upper = APFloat::getLargest(Sem, /*Negative=*/false);
+ HasPosInf = true;
+ }
+ return strictCompare(Lower, Upper) != APFloat::cmpGreaterThan;
+}
+
ConstantFPRange ConstantFPRange::getWithoutInf() const {
if (isNaNOnly())
return *this;
APFloat NewLower = Lower;
APFloat NewUpper = Upper;
- if (Lower.isNegInfinity())
- NewLower = APFloat::getLargest(getSemantics(), /*Negative=*/true);
- if (Upper.isPosInfinity())
- NewUpper = APFloat::getLargest(getSemantics(), /*Negative=*/false);
+ bool UnusedFlag;
+ removeInf(NewLower, NewUpper, /*HasPosInf=*/UnusedFlag,
+ /*HasNegInf=*/UnusedFlag);
canonicalizeRange(NewLower, NewUpper);
return ConstantFPRange(std::move(NewLower), std::move(NewUpper), MayBeQNaN,
MayBeSNaN);
@@ -444,3 +460,49 @@ ConstantFPRange ConstantFPRange::cast(const fltSemantics &DstSem,
/*MayBeQNaNVal=*/MayBeQNaN || MayBeSNaN,
/*MayBeSNaNVal=*/false);
}
+
+ConstantFPRange ConstantFPRange::add(const ConstantFPRange &Other) const {
+ bool ResMayBeQNaN = ((MayBeQNaN || MayBeSNaN) && !Other.isEmptySet()) ||
+ ((Other.MayBeQNaN || Other.MayBeSNaN) && !isEmptySet());
+ if (isNaNOnly() || Other.isNaNOnly())
+ return getNaNOnly(getSemantics(), /*MayBeQNaN=*/ResMayBeQNaN,
+ /*MayBeSNaN=*/false);
+ bool LHSHasNegInf = false, LHSHasPosInf = false;
+ APFloat LHSLower = Lower, LHSUpper = Upper;
+ bool LHSFiniteIsNonEmpty =
+ removeInf(LHSLower, LHSUpper, LHSHasPosInf, LHSHasNegInf);
+ bool RHSHasNegInf = false, RHSHasPosInf = false;
+ APFloat RHSLower = Other.Lower, RHSUpper = Other.Upper;
+ bool RHSFiniteIsNonEmpty =
+ removeInf(RHSLower, RHSUpper, RHSHasPosInf, RHSHasNegInf);
+ // -inf + +inf = QNaN
+ ResMayBeQNaN |=
+ (LHSHasNegInf && RHSHasPosInf) || (LHSHasPosInf && RHSHasNegInf);
+ // +inf + finite/+inf = +inf, -inf + finite/-inf = -inf
+ bool HasNegInf = (LHSHasNegInf && (RHSFiniteIsNonEmpty || RHSHasNegInf)) ||
+ (RHSHasNegInf && (LHSFiniteIsNonEmpty || LHSHasNegInf));
+ bool HasPosInf = (LHSHasPosInf && (RHSFiniteIsNonEmpty || RHSHasPosInf)) ||
+ (RHSHasPosInf && (LHSFiniteIsNonEmpty || LHSHasPosInf));
+ if (LHSFiniteIsNonEmpty && RHSFiniteIsNonEmpty) {
+ APFloat NewLower =
+ HasNegInf ? APFloat::getInf(LHSLower.getSemantics(), /*Negative=*/true)
+ : LHSLower + RHSLower;
+ APFloat NewUpper =
+ HasPosInf ? APFloat::getInf(LHSUpper.getSemantics(), /*Negative=*/false)
+ : LHSUpper + RHSUpper;
+ return ConstantFPRange(NewLower, NewUpper, ResMayBeQNaN,
+ /*MayBeSNaN=*/false);
+ }
+ // If both HasNegInf and HasPosInf are false, the non-NaN part is empty.
+ // We just return the canonical form [+inf, -inf] for the empty non-NaN set.
+ return ConstantFPRange(
+ APFloat::getInf(Lower.getSemantics(), /*Negative=*/HasNegInf),
+ APFloat::getInf(Upper.getSemantics(), /*Negative=*/!HasPosInf),
+ ResMayBeQNaN,
+ /*MayBeSNaN=*/false);
+}
+
+ConstantFPRange ConstantFPRange::sub(const ConstantFPRange &Other) const {
+ // fsub X, Y = fadd X, (fneg Y)
+ return add(Other.negate());
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fbce3b0..6965116 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19093,7 +19093,8 @@ static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) {
SDValue Ext1 = Op1.getOperand(0);
if (Ext0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
Ext1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- Ext0.getOperand(0) != Ext1.getOperand(0))
+ Ext0.getOperand(0) != Ext1.getOperand(0) ||
+ Ext0.getOperand(0).getValueType().isScalableVector())
return SDValue();
// Check that the type is twice the add types, and the extract are from
// upper/lower parts of the same source.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b8761d97..30dfcf2b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5064,17 +5064,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool RenamableSrc) const {
if (AArch64::GPR32spRegClass.contains(DestReg) &&
(AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
-
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
// If either operand is WSP, expand to ADD #0.
if (Subtarget.hasZeroCycleRegMoveGPR64() &&
!Subtarget.hasZeroCycleRegMoveGPR32()) {
// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
- MCRegister DestRegX = TRI->getMatchingSuperReg(
- DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
- MCRegister SrcRegX = TRI->getMatchingSuperReg(
- SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
+ MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
+ MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
// This instruction is reading and writing X registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegX, but a proper
@@ -5097,14 +5095,14 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
!Subtarget.hasZeroCycleRegMoveGPR32()) {
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
- MCRegister DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
- &AArch64::GPR64spRegClass);
+ MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
assert(DestRegX.isValid() && "Destination super-reg not valid");
MCRegister SrcRegX =
SrcReg == AArch64::WZR
? AArch64::XZR
- : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
- &AArch64::GPR64spRegClass);
+ : RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
assert(SrcRegX.isValid() && "Source super-reg not valid");
// This instruction is reading and writing X registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
@@ -5334,11 +5332,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
!Subtarget.hasZeroCycleRegMoveFPR64() &&
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub,
- &AArch64::FPR128RegClass);
- MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub,
- &AArch64::FPR128RegClass);
+ MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
+ &AArch64::FPR128RegClass);
+ MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
+ &AArch64::FPR128RegClass);
// This instruction is reading and writing Q registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegQ, but a proper
@@ -5359,11 +5356,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
!Subtarget.hasZeroCycleRegMoveFPR64() &&
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
- &AArch64::FPR128RegClass);
- MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
- &AArch64::FPR128RegClass);
+ MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
+ &AArch64::FPR128RegClass);
+ MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
+ &AArch64::FPR128RegClass);
// This instruction is reading and writing Q registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegQ, but a proper
@@ -5374,11 +5370,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
!Subtarget.hasZeroCycleRegMoveFPR32()) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
- &AArch64::FPR64RegClass);
- MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
- &AArch64::FPR64RegClass);
+ MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
+ &AArch64::FPR64RegClass);
+ MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
+ &AArch64::FPR64RegClass);
// This instruction is reading and writing D registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegD, but a proper
@@ -5398,11 +5393,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
!Subtarget.hasZeroCycleRegMoveFPR64() &&
!Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
- &AArch64::FPR128RegClass);
- MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
- &AArch64::FPR128RegClass);
+ MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
+ &AArch64::FPR128RegClass);
+ MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
+ &AArch64::FPR128RegClass);
// This instruction is reading and writing Q registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegQ, but a proper
@@ -5413,11 +5407,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
!Subtarget.hasZeroCycleRegMoveFPR32()) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
- &AArch64::FPR64RegClass);
- MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
- &AArch64::FPR64RegClass);
+ MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
+ &AArch64::FPR64RegClass);
+ MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
+ &AArch64::FPR64RegClass);
// This instruction is reading and writing D registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegD, but a proper
@@ -5441,11 +5434,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Subtarget.hasZeroCycleRegMoveFPR128() &&
!Subtarget.hasZeroCycleRegMoveFPR64() &&
!Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
- &AArch64::FPR128RegClass);
- MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
- &AArch64::FPR128RegClass);
+ MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
+ &AArch64::FPR128RegClass);
+ MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
+ &AArch64::FPR128RegClass);
// This instruction is reading and writing Q registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegQ, but a proper
@@ -5456,11 +5448,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
} else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
!Subtarget.hasZeroCycleRegMoveFPR32()) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
- &AArch64::FPR64RegClass);
- MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
- &AArch64::FPR64RegClass);
+ MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
+ &AArch64::FPR64RegClass);
+ MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
+ &AArch64::FPR64RegClass);
// This instruction is reading and writing D registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegD, but a proper
@@ -5532,9 +5523,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
#ifndef NDEBUG
- const TargetRegisterInfo &TRI = getRegisterInfo();
- errs() << TRI.getRegAsmName(DestReg) << " = COPY "
- << TRI.getRegAsmName(SrcReg) << "\n";
+ errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
+ << "\n";
#endif
llvm_unreachable("unimplemented reg-to-reg copy");
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 456fa4c..7651ba1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -322,7 +322,11 @@ public:
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy) {
- return tryInsertInstruction(new VPWidenCastRecipe(Opcode, Op, ResultTy));
+ VPIRFlags Flags;
+ if (Opcode == Instruction::Trunc)
+ Flags = VPIRFlags::TruncFlagsTy(false, false);
+ return tryInsertInstruction(
+ new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
}
VPScalarIVStepsRecipe *
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 50136a8..b96d29e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8774,13 +8774,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
assert(!RecurrenceDescriptor::isMinMaxRecurrenceKind(RecurrenceKind) &&
"Unexpected truncated min-max recurrence!");
Type *RdxTy = RdxDesc.getRecurrenceType();
- auto *Trunc =
- new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
+ VPWidenCastRecipe *Trunc;
Instruction::CastOps ExtendOpc =
RdxDesc.isSigned() ? Instruction::SExt : Instruction::ZExt;
- auto *Extnd = new VPWidenCastRecipe(ExtendOpc, Trunc, PhiTy);
- Trunc->insertAfter(NewExitingVPV->getDefiningRecipe());
- Extnd->insertAfter(Trunc);
+ VPWidenCastRecipe *Extnd;
+ {
+ VPBuilder::InsertPointGuard Guard(Builder);
+ Builder.setInsertPoint(
+ NewExitingVPV->getDefiningRecipe()->getParent(),
+ std::next(NewExitingVPV->getDefiningRecipe()->getIterator()));
+ Trunc =
+ Builder.createWidenCast(Instruction::Trunc, NewExitingVPV, RdxTy);
+ Extnd = Builder.createWidenCast(ExtendOpc, Trunc, PhiTy);
+ }
if (PhiR->getOperand(1) == NewExitingVPV)
PhiR->setOperand(1, Extnd->getVPSingleValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7563cd7..9bb8820 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1026,6 +1026,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
PredPHI->replaceAllUsesWith(Op);
}
+ VPBuilder Builder(Def);
VPValue *A;
if (match(Def, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
Type *TruncTy = TypeInfo.inferScalarType(Def);
@@ -1041,18 +1042,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
? Instruction::SExt
: Instruction::ZExt;
- auto *VPC =
- new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
+ auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A,
+ TruncTy);
if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
// UnderlyingExt has distinct return type, used to retain legacy cost.
- VPC->setUnderlyingValue(UnderlyingExt);
+ Ext->setUnderlyingValue(UnderlyingExt);
}
- VPC->insertBefore(&R);
- Def->replaceAllUsesWith(VPC);
+ Def->replaceAllUsesWith(Ext);
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
- auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
- VPC->insertBefore(&R);
- Def->replaceAllUsesWith(VPC);
+ auto *Trunc = Builder.createWidenCast(Instruction::Trunc, A, TruncTy);
+ Def->replaceAllUsesWith(Trunc);
}
}
#ifndef NDEBUG
@@ -1098,7 +1097,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(Def->getOperand(1));
// (x && y) || (x && z) -> x && (y || z)
- VPBuilder Builder(Def);
if (match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
m_LogicalAnd(m_Deferred(X), m_VPValue(Z)))) &&
// Simplify only if one of the operands has one use to avoid creating an
@@ -2206,20 +2204,20 @@ void VPlanTransforms::truncateToMinimalBitwidths(
continue;
assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");
auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.try_emplace(Op);
- VPWidenCastRecipe *NewOp =
- IterIsEmpty
- ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy,
- VPIRFlags::TruncFlagsTy(false, false))
- : ProcessedIter->second;
- R.setOperand(Idx, NewOp);
- if (!IterIsEmpty)
+ if (!IterIsEmpty) {
+ R.setOperand(Idx, ProcessedIter->second);
continue;
- ProcessedIter->second = NewOp;
- if (!Op->isLiveIn()) {
- NewOp->insertBefore(&R);
- } else {
- PH->appendRecipe(NewOp);
}
+
+ VPBuilder Builder;
+ if (Op->isLiveIn())
+ Builder.setInsertPoint(PH);
+ else
+ Builder.setInsertPoint(&R);
+ VPWidenCastRecipe *NewOp =
+ Builder.createWidenCast(Instruction::Trunc, Op, NewResTy);
+ ProcessedIter->second = NewOp;
+ R.setOperand(Idx, NewOp);
}
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index ee04e41..2d0df56 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4775,6 +4775,39 @@ entry:
ret i32 %z5
}
+define i64 @extract_scalable(<2 x i32> %0) "target-features"="+sve2" {
+; CHECK-SD-LABEL: extract_scalable:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v1.2s, #1
+; CHECK-SD-NEXT: ptrue p0.s, vl2
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-SD-NEXT: sdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-SD-NEXT: saddl v0.2d, v0.2s, v0.2s
+; CHECK-SD-NEXT: addp d0, v0.2d
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extract_scalable:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w10, v0.s[1]
+; CHECK-GI-NEXT: sdiv w9, w8, w9
+; CHECK-GI-NEXT: sdiv w8, w8, w10
+; CHECK-GI-NEXT: fmov s0, w9
+; CHECK-GI-NEXT: mov v0.s[1], w8
+; CHECK-GI-NEXT: saddl v0.2d, v0.2s, v0.2s
+; CHECK-GI-NEXT: addp d0, v0.2d
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: ret
+ %2 = sdiv <2 x i32> splat (i32 1), %0
+ %3 = sext <2 x i32> %2 to <2 x i64>
+ %4 = add <2 x i64> %3, %3
+ %5 = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %4)
+ ret i64 %5
+}
+
declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) #1
declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
declare i16 @llvm.vector.reduce.add.v24i16(<24 x i16>)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
index 000dc4a..232c354 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
@@ -44,8 +44,86 @@ loop: ; preds = %loop, %entry
exit: ; preds = %loop
ret void
}
+
+; Test case for https://github.com/llvm/llvm-project/issues/162374.
+define void @truncate_i16_to_i8_cse(ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define void @truncate_i16_to_i8_cse(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4294967296, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 4294967296, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 4294967296, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[SRC]], align 2
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[TMP5]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <vscale x 8 x i16> [[BROADCAST_SPLAT]] to <vscale x 8 x i8>
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8
+; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 8 x i8> [[TMP6]], i32 [[TMP9]]
+; CHECK-NEXT: store i8 [[TMP10]], ptr null, align 1
+; CHECK-NEXT: store i8 [[TMP10]], ptr [[DST]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 4294967296, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[COUNT:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[COUNT_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[SRC]], align 2
+; CHECK-NEXT: [[VAL_ZEXT:%.*]] = zext i16 [[VAL]] to i64
+; CHECK-NEXT: [[VAL_TRUNC_ZEXT:%.*]] = trunc i64 [[VAL_ZEXT]] to i8
+; CHECK-NEXT: store i8 [[VAL_TRUNC_ZEXT]], ptr null, align 1
+; CHECK-NEXT: [[VAL_TRUNC:%.*]] = trunc i16 [[VAL]] to i8
+; CHECK-NEXT: store i8 [[VAL_TRUNC]], ptr [[DST]], align 1
+; CHECK-NEXT: [[COUNT_NEXT]] = add i32 [[COUNT]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[COUNT_NEXT]], 0
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %count = phi i32 [ 0, %entry ], [ %count.next, %loop ]
+ %val = load i16, ptr %src, align 2
+ %val.zext = zext i16 %val to i64
+ %val.trunc.zext = trunc i64 %val.zext to i8
+ store i8 %val.trunc.zext, ptr null, align 1
+ %val.trunc = trunc i16 %val to i8
+ store i8 %val.trunc, ptr %dst, align 1
+ %count.next = add i32 %count, 1
+ %exitcond = icmp eq i32 %count.next, 0
+ %iv.next = add i64 %iv, 1
+ br i1 %exitcond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
index cb16032..1533906 100644
--- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
+++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll
@@ -1,46 +1,59 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck --check-prefix=VF4IC1 %s
+; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck --check-prefix=VF2IC2 %s
define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr noalias %B, ptr noalias %C) {
-; CHECK-LABEL: define void @narrow_select_to_single_scalar(
-; CHECK-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
-; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
-; CHECK-NEXT: store i16 [[TMP7]], ptr [[B]], align 1
-; CHECK-NEXT: store i16 0, ptr [[TMP1]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 1024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i16 [[IV]]
-; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 1
-; CHECK-NEXT: store i16 [[L_0]], ptr [[B]], align 1
-; CHECK-NEXT: [[INVAR_SEL:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i16 [[INVAR_SEL]]
-; CHECK-NEXT: store i16 0, ptr [[GEP_C]], align 1
-; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp ne i16 [[IV]], 1024
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; VF4IC1-LABEL: define void @narrow_select_to_single_scalar(
+; VF4IC1-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; VF4IC1-NEXT: [[ENTRY:.*:]]
+; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]]
+; VF4IC1: [[VECTOR_PH]]:
+; VF4IC1-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1
+; VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]]
+; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4IC1: [[VECTOR_BODY]]:
+; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
+; VF4IC1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0
+; VF4IC1-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1
+; VF4IC1-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2
+; VF4IC1-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3
+; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]]
+; VF4IC1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
+; VF4IC1-NEXT: store i16 [[TMP7]], ptr [[B]], align 1
+; VF4IC1-NEXT: store i16 0, ptr [[TMP1]], align 1
+; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; VF4IC1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
+; VF4IC1-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VF4IC1: [[MIDDLE_BLOCK]]:
+; VF4IC1-NEXT: br label %[[EXIT:.*]]
+; VF4IC1: [[EXIT]]:
+; VF4IC1-NEXT: ret void
+;
+; VF2IC2-LABEL: define void @narrow_select_to_single_scalar(
+; VF2IC2-SAME: i1 [[INVAR_COND:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; VF2IC2-NEXT: [[ENTRY:.*:]]
+; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2IC2: [[VECTOR_PH]]:
+; VF2IC2-NEXT: [[TMP0:%.*]] = select i1 [[INVAR_COND]], i16 0, i16 1
+; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[C]], i16 [[TMP0]]
+; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2IC2: [[VECTOR_BODY]]:
+; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
+; VF2IC2-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2
+; VF2IC2-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3
+; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP3]]
+; VF2IC2-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 1
+; VF2IC2-NEXT: store i16 [[TMP5]], ptr [[B]], align 1
+; VF2IC2-NEXT: store i16 0, ptr [[TMP1]], align 1
+; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
+; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VF2IC2: [[MIDDLE_BLOCK]]:
+; VF2IC2-NEXT: br label %[[EXIT:.*]]
+; VF2IC2: [[EXIT]]:
+; VF2IC2-NEXT: ret void
;
entry:
br label %loop.header
@@ -54,15 +67,88 @@ loop.header:
%gep.C = getelementptr i16, ptr %C, i16 %invar.sel
store i16 0, ptr %gep.C, align 1
%iv.next = add i16 %iv, 1
- %ec = icmp ne i16 %iv, 1024
+ %ec = icmp ne i16 %iv.next, 1024
br i1 %ec, label %loop.header, label %exit
exit:
ret void
}
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-;.
+
+; FIXME: Currently this mis-compiled when interleaving; all stores store the
+; last lane of the last part, instead of the last lane per part.
+; Test case for https://github.com/llvm/llvm-project/issues/162498.
+define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(ptr %dst) {
+; VF4IC1-LABEL: define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
+; VF4IC1-SAME: ptr [[DST:%.*]]) {
+; VF4IC1-NEXT: [[ENTRY:.*:]]
+; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]]
+; VF4IC1: [[VECTOR_PH]]:
+; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF4IC1: [[VECTOR_BODY]]:
+; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; VF4IC1-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; VF4IC1-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
+; VF4IC1-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
+; VF4IC1-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[VEC_IND]], splat (i32 1)
+; VF4IC1-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
+; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP5]]
+; VF4IC1-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
+; VF4IC1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP7]]
+; VF4IC1-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
+; VF4IC1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP9]]
+; VF4IC1-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; VF4IC1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP11]]
+; VF4IC1-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4
+; VF4IC1-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4
+; VF4IC1-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4
+; VF4IC1-NEXT: store i32 [[TMP3]], ptr [[TMP12]], align 4
+; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; VF4IC1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
+; VF4IC1-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF4IC1: [[MIDDLE_BLOCK]]:
+; VF4IC1-NEXT: br label %[[EXIT:.*]]
+; VF4IC1: [[EXIT]]:
+; VF4IC1-NEXT: ret void
+;
+; VF2IC2-LABEL: define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
+; VF2IC2-SAME: ptr [[DST:%.*]]) {
+; VF2IC2-NEXT: [[ENTRY:.*:]]
+; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2IC2: [[VECTOR_PH]]:
+; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2IC2: [[VECTOR_BODY]]:
+; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 2
+; VF2IC2-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 3
+; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[INDEX]], 1
+; VF2IC2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 1
+; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]]
+; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]]
+; VF2IC2-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4
+; VF2IC2-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4
+; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
+; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF2IC2: [[MIDDLE_BLOCK]]:
+; VF2IC2-NEXT: br label %[[EXIT:.*]]
+; VF2IC2: [[EXIT]]:
+; VF2IC2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.shift = lshr i32 %iv, 1
+ %gep.dst = getelementptr i32, ptr %dst, i32 %iv.shift
+ store i32 %iv, ptr %gep.dst, align 4
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/unittests/IR/ConstantFPRangeTest.cpp b/llvm/unittests/IR/ConstantFPRangeTest.cpp
index 58a65b9..cf9b31c 100644
--- a/llvm/unittests/IR/ConstantFPRangeTest.cpp
+++ b/llvm/unittests/IR/ConstantFPRangeTest.cpp
@@ -22,6 +22,7 @@ protected:
static ConstantFPRange Full;
static ConstantFPRange Empty;
static ConstantFPRange Finite;
+ static ConstantFPRange NonNaN;
static ConstantFPRange One;
static ConstantFPRange PosZero;
static ConstantFPRange NegZero;
@@ -44,6 +45,8 @@ ConstantFPRange ConstantFPRangeTest::Empty =
ConstantFPRange::getEmpty(APFloat::IEEEdouble());
ConstantFPRange ConstantFPRangeTest::Finite =
ConstantFPRange::getFinite(APFloat::IEEEdouble());
+ConstantFPRange ConstantFPRangeTest::NonNaN =
+ ConstantFPRange::getNonNaN(APFloat::IEEEdouble());
ConstantFPRange ConstantFPRangeTest::One = ConstantFPRange(APFloat(1.0));
ConstantFPRange ConstantFPRangeTest::PosZero = ConstantFPRange(
APFloat::getZero(APFloat::IEEEdouble(), /*Negative=*/false));
@@ -79,15 +82,21 @@ static void strictNext(APFloat &V) {
V.next(/*nextDown=*/false);
}
+enum class SparseLevel {
+ Dense,
+ SpecialValuesWithAllPowerOfTwos,
+ SpecialValuesOnly,
+};
+
template <typename Fn>
-static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive,
+static void EnumerateConstantFPRangesImpl(Fn TestFn, SparseLevel Level,
bool MayBeQNaN, bool MayBeSNaN) {
const fltSemantics &Sem = APFloat::Float8E4M3();
APFloat PosInf = APFloat::getInf(Sem, /*Negative=*/false);
APFloat NegInf = APFloat::getInf(Sem, /*Negative=*/true);
TestFn(ConstantFPRange(PosInf, NegInf, MayBeQNaN, MayBeSNaN));
- if (!Exhaustive) {
+ if (Level != SparseLevel::Dense) {
SmallVector<APFloat, 36> Values;
Values.push_back(APFloat::getInf(Sem, /*Negative=*/true));
Values.push_back(APFloat::getLargest(Sem, /*Negative=*/true));
@@ -95,10 +104,13 @@ static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive,
unsigned Exponents = APFloat::semanticsMaxExponent(Sem) -
APFloat::semanticsMinExponent(Sem) + 3;
unsigned MantissaBits = APFloat::semanticsPrecision(Sem) - 1;
- // Add -2^(max exponent), -2^(max exponent-1), ..., -2^(min exponent)
- for (unsigned M = Exponents - 2; M != 0; --M)
- Values.push_back(
- APFloat(Sem, APInt(BitWidth, (M + Exponents) << MantissaBits)));
+ if (Level == SparseLevel::SpecialValuesWithAllPowerOfTwos) {
+ // Add -2^(max exponent), -2^(max exponent-1), ..., -2^(min exponent)
+ for (unsigned M = Exponents - 2; M != 0; --M)
+ Values.push_back(
+ APFloat(Sem, APInt(BitWidth, (M + Exponents) << MantissaBits)));
+ }
+ Values.push_back(APFloat::getSmallestNormalized(Sem, /*Negative=*/true));
Values.push_back(APFloat::getSmallest(Sem, /*Negative=*/true));
Values.push_back(APFloat::getZero(Sem, /*Negative=*/true));
size_t E = Values.size();
@@ -127,26 +139,30 @@ static void EnumerateConstantFPRangesImpl(Fn TestFn, bool Exhaustive,
}
template <typename Fn>
-static void EnumerateConstantFPRanges(Fn TestFn, bool Exhaustive) {
- EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false,
+static void EnumerateConstantFPRanges(Fn TestFn, SparseLevel Level,
+ bool IgnoreSNaNs = false) {
+ EnumerateConstantFPRangesImpl(TestFn, Level, /*MayBeQNaN=*/false,
/*MayBeSNaN=*/false);
- EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/false,
- /*MayBeSNaN=*/true);
- EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true,
+ EnumerateConstantFPRangesImpl(TestFn, Level, /*MayBeQNaN=*/true,
/*MayBeSNaN=*/false);
- EnumerateConstantFPRangesImpl(TestFn, Exhaustive, /*MayBeQNaN=*/true,
+ if (IgnoreSNaNs)
+ return;
+ EnumerateConstantFPRangesImpl(TestFn, Level, /*MayBeQNaN=*/false,
+ /*MayBeSNaN=*/true);
+ EnumerateConstantFPRangesImpl(TestFn, Level, /*MayBeQNaN=*/true,
/*MayBeSNaN=*/true);
}
template <typename Fn>
static void EnumerateTwoInterestingConstantFPRanges(Fn TestFn,
- bool Exhaustive) {
+ SparseLevel Level) {
EnumerateConstantFPRanges(
[&](const ConstantFPRange &CR1) {
EnumerateConstantFPRanges(
- [&](const ConstantFPRange &CR2) { TestFn(CR1, CR2); }, Exhaustive);
+ [&](const ConstantFPRange &CR2) { TestFn(CR1, CR2); }, Level,
+ /*IgnoreSNaNs=*/true);
},
- Exhaustive);
+ Level, /*IgnoreSNaNs=*/true);
}
template <typename Fn>
@@ -348,16 +364,25 @@ TEST_F(ConstantFPRangeTest, ExhaustivelyEnumerate) {
constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1);
unsigned Count = 0;
EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; },
- /*Exhaustive=*/true);
+ SparseLevel::Dense);
EXPECT_EQ(Expected, Count);
}
TEST_F(ConstantFPRangeTest, Enumerate) {
- constexpr unsigned NNaNValues = 2 * ((1 << 4) - 2 + 4);
+ constexpr unsigned NNaNValues = 2 * ((1 << 4) - 2 + 5);
constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1);
unsigned Count = 0;
EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; },
- /*Exhaustive=*/false);
+ SparseLevel::SpecialValuesWithAllPowerOfTwos);
+ EXPECT_EQ(Expected, Count);
+}
+
+TEST_F(ConstantFPRangeTest, EnumerateWithSpecialValuesOnly) {
+ constexpr unsigned NNaNValues = 2 * 5;
+ constexpr unsigned Expected = 4 * ((NNaNValues + 1) * NNaNValues / 2 + 1);
+ unsigned Count = 0;
+ EnumerateConstantFPRanges([&](const ConstantFPRange &) { ++Count; },
+ SparseLevel::SpecialValuesOnly);
EXPECT_EQ(Expected, Count);
}
@@ -459,7 +484,7 @@ TEST_F(ConstantFPRangeTest, FPClassify) {
EXPECT_EQ(SignBit, CR.getSignBit()) << CR;
EXPECT_EQ(Mask, CR.classify()) << CR;
},
- /*Exhaustive=*/true);
+ SparseLevel::Dense);
#endif
}
@@ -560,7 +585,7 @@ TEST_F(ConstantFPRangeTest, makeAllowedFCmpRegion) {
<< "Suboptimal result for makeAllowedFCmpRegion(" << Pred << ", "
<< CR << ")";
},
- /*Exhaustive=*/false);
+ SparseLevel::SpecialValuesWithAllPowerOfTwos);
}
#endif
}
@@ -671,7 +696,7 @@ TEST_F(ConstantFPRangeTest, makeSatisfyingFCmpRegion) {
<< ", " << CR << ")";
}
},
- /*Exhaustive=*/false);
+ SparseLevel::SpecialValuesWithAllPowerOfTwos);
}
#endif
}
@@ -804,13 +829,13 @@ TEST_F(ConstantFPRangeTest, negate) {
}
TEST_F(ConstantFPRangeTest, getWithout) {
- EXPECT_EQ(Full.getWithoutNaN(), ConstantFPRange::getNonNaN(Sem));
+ EXPECT_EQ(Full.getWithoutNaN(), NonNaN);
EXPECT_EQ(NaN.getWithoutNaN(), Empty);
EXPECT_EQ(NaN.getWithoutInf(), NaN);
EXPECT_EQ(PosInf.getWithoutInf(), Empty);
EXPECT_EQ(NegInf.getWithoutInf(), Empty);
- EXPECT_EQ(ConstantFPRange::getNonNaN(Sem).getWithoutInf(), Finite);
+ EXPECT_EQ(NonNaN.getWithoutInf(), Finite);
EXPECT_EQ(Zero.getWithoutInf(), Zero);
EXPECT_EQ(ConstantFPRange::getNonNaN(APFloat::getInf(Sem, /*Negative=*/true),
APFloat(3.0))
@@ -925,4 +950,119 @@ TEST_F(ConstantFPRangeTest, cast) {
/*IgnoreNaNPayload=*/true);
}
+TEST_F(ConstantFPRangeTest, add) {
+ EXPECT_EQ(Full.add(Full), NonNaN.unionWith(QNaN));
+ EXPECT_EQ(Full.add(Empty), Empty);
+ EXPECT_EQ(Empty.add(Full), Empty);
+ EXPECT_EQ(Empty.add(Empty), Empty);
+ EXPECT_EQ(One.add(One), ConstantFPRange(APFloat(2.0)));
+ EXPECT_EQ(Some.add(Some),
+ ConstantFPRange::getNonNaN(APFloat(-6.0), APFloat(6.0)));
+ EXPECT_EQ(SomePos.add(SomeNeg),
+ ConstantFPRange::getNonNaN(APFloat(-3.0), APFloat(3.0)));
+ EXPECT_EQ(PosInf.add(PosInf), PosInf);
+ EXPECT_EQ(NegInf.add(NegInf), NegInf);
+ EXPECT_EQ(PosInf.add(Finite.unionWith(PosInf)), PosInf);
+ EXPECT_EQ(NegInf.add(Finite.unionWith(NegInf)), NegInf);
+ EXPECT_EQ(PosInf.add(Finite.unionWith(NegInf)), PosInf.unionWith(QNaN));
+ EXPECT_EQ(NegInf.add(Finite.unionWith(PosInf)), NegInf.unionWith(QNaN));
+ EXPECT_EQ(PosInf.add(NegInf), QNaN);
+ EXPECT_EQ(NegInf.add(PosInf), QNaN);
+ EXPECT_EQ(PosZero.add(NegZero), PosZero);
+ EXPECT_EQ(PosZero.add(Zero), PosZero);
+ EXPECT_EQ(NegZero.add(NegZero), NegZero);
+ EXPECT_EQ(NegZero.add(Zero), Zero);
+ EXPECT_EQ(NaN.add(NaN), QNaN);
+ EXPECT_EQ(NaN.add(Finite), QNaN);
+ EXPECT_EQ(NonNaN.unionWith(NaN).add(NonNaN), NonNaN.unionWith(QNaN));
+ EXPECT_EQ(PosInf.unionWith(QNaN).add(PosInf), PosInf.unionWith(QNaN));
+ EXPECT_EQ(PosInf.unionWith(NaN).add(ConstantFPRange(APFloat(24.0))),
+ PosInf.unionWith(QNaN));
+
+#if defined(EXPENSIVE_CHECKS)
+ EnumerateTwoInterestingConstantFPRanges(
+ [](const ConstantFPRange &LHS, const ConstantFPRange &RHS) {
+ ConstantFPRange Res = LHS.add(RHS);
+ ConstantFPRange Expected =
+ ConstantFPRange::getEmpty(LHS.getSemantics());
+ EnumerateValuesInConstantFPRange(
+ LHS,
+ [&](const APFloat &LHSC) {
+ EnumerateValuesInConstantFPRange(
+ RHS,
+ [&](const APFloat &RHSC) {
+ APFloat Sum = LHSC + RHSC;
+ EXPECT_TRUE(Res.contains(Sum))
+ << "Wrong result for " << LHS << " + " << RHS
+ << ". The result " << Res << " should contain " << Sum;
+ if (!Expected.contains(Sum))
+ Expected = Expected.unionWith(ConstantFPRange(Sum));
+ },
+ /*IgnoreNaNPayload=*/true);
+ },
+ /*IgnoreNaNPayload=*/true);
+ EXPECT_EQ(Res, Expected)
+ << "Suboptimal result for " << LHS << " + " << RHS << ". Expected "
+ << Expected << ", but got " << Res;
+ },
+ SparseLevel::SpecialValuesOnly);
+#endif
+}
+
+TEST_F(ConstantFPRangeTest, sub) {
+ EXPECT_EQ(Full.sub(Full), NonNaN.unionWith(QNaN));
+ EXPECT_EQ(Full.sub(Empty), Empty);
+ EXPECT_EQ(Empty.sub(Full), Empty);
+ EXPECT_EQ(Empty.sub(Empty), Empty);
+ EXPECT_EQ(One.sub(One), ConstantFPRange(APFloat(0.0)));
+ EXPECT_EQ(Some.sub(Some),
+ ConstantFPRange::getNonNaN(APFloat(-6.0), APFloat(6.0)));
+ EXPECT_EQ(SomePos.sub(SomeNeg),
+ ConstantFPRange::getNonNaN(APFloat(0.0), APFloat(6.0)));
+ EXPECT_EQ(PosInf.sub(NegInf), PosInf);
+ EXPECT_EQ(NegInf.sub(PosInf), NegInf);
+ EXPECT_EQ(PosInf.sub(Finite.unionWith(NegInf)), PosInf);
+ EXPECT_EQ(NegInf.sub(Finite.unionWith(PosInf)), NegInf);
+ EXPECT_EQ(PosInf.sub(Finite.unionWith(PosInf)), PosInf.unionWith(QNaN));
+ EXPECT_EQ(NegInf.sub(Finite.unionWith(NegInf)), NegInf.unionWith(QNaN));
+ EXPECT_EQ(PosInf.sub(PosInf), QNaN);
+ EXPECT_EQ(NegInf.sub(NegInf), QNaN);
+ EXPECT_EQ(PosZero.sub(NegZero), PosZero);
+ EXPECT_EQ(PosZero.sub(Zero), PosZero);
+ EXPECT_EQ(NegZero.sub(NegZero), PosZero);
+ EXPECT_EQ(NegZero.sub(PosZero), NegZero);
+ EXPECT_EQ(NegZero.sub(Zero), Zero);
+ EXPECT_EQ(NaN.sub(NaN), QNaN);
+ EXPECT_EQ(NaN.add(Finite), QNaN);
+
+#if defined(EXPENSIVE_CHECKS)
+ EnumerateTwoInterestingConstantFPRanges(
+ [](const ConstantFPRange &LHS, const ConstantFPRange &RHS) {
+ ConstantFPRange Res = LHS.sub(RHS);
+ ConstantFPRange Expected =
+ ConstantFPRange::getEmpty(LHS.getSemantics());
+ EnumerateValuesInConstantFPRange(
+ LHS,
+ [&](const APFloat &LHSC) {
+ EnumerateValuesInConstantFPRange(
+ RHS,
+ [&](const APFloat &RHSC) {
+ APFloat Diff = LHSC - RHSC;
+ EXPECT_TRUE(Res.contains(Diff))
+ << "Wrong result for " << LHS << " - " << RHS
+ << ". The result " << Res << " should contain " << Diff;
+ if (!Expected.contains(Diff))
+ Expected = Expected.unionWith(ConstantFPRange(Diff));
+ },
+ /*IgnoreNaNPayload=*/true);
+ },
+ /*IgnoreNaNPayload=*/true);
+ EXPECT_EQ(Res, Expected)
+ << "Suboptimal result for " << LHS << " - " << RHS << ". Expected "
+ << Expected << ", but got " << Res;
+ },
+ SparseLevel::SpecialValuesOnly);
+#endif
+}
+
} // anonymous namespace
diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp
index ddee023..d274458 100644
--- a/llvm/unittests/Support/JobserverTest.cpp
+++ b/llvm/unittests/Support/JobserverTest.cpp
@@ -355,6 +355,7 @@ TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) {
int CurrentActive = ++ActiveTasks;
LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive
<< "\n");
+ (void)i;
int OldMax = MaxActiveTasks.load();
while (CurrentActive > OldMax)
MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive);
diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py
index 2f2df68..913ba69 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -85,7 +85,8 @@ class LLVMConfig(object):
"HWASAN_SYMBOLIZER_PATH",
"MSAN_SYMBOLIZER_PATH",
"TSAN_SYMBOLIZER_PATH",
- "UBSAN_SYMBOLIZER_PATH" "ASAN_OPTIONS",
+ "UBSAN_SYMBOLIZER_PATH",
+ "ASAN_OPTIONS",
"HWASAN_OPTIONS",
"MSAN_OPTIONS",
"RTSAN_OPTIONS",
diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
index 17371ec..6d54bb6 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
@@ -23,6 +23,7 @@
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/Operation.h"
+#include "mlir/Transforms/InliningUtils.h"
#include "llvm/ADT/TypeSwitch.h"
using namespace mlir;
@@ -180,6 +181,15 @@ void RawBufferAtomicUMinOp::print(mlir::OpAsmPrinter &p) {
// ROCDLDialect initialization, type parsing, and registration.
//===----------------------------------------------------------------------===//
+namespace {
+struct ROCDLInlinerInterface final : DialectInlinerInterface {
+ using DialectInlinerInterface::DialectInlinerInterface;
+ bool isLegalToInline(Operation *, Region *, bool, IRMapping &) const final {
+ return true;
+ }
+};
+} // namespace
+
// TODO: This should be the llvm.rocdl dialect once this is supported.
void ROCDLDialect::initialize() {
addOperations<
@@ -194,6 +204,7 @@ void ROCDLDialect::initialize() {
// Support unknown operations because not all ROCDL operations are registered.
allowUnknownOperations();
+ addInterfaces<ROCDLInlinerInterface>();
declarePromisedInterface<gpu::TargetAttrInterface, ROCDLTargetAttr>();
}
diff --git a/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir b/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir
new file mode 100644
index 0000000..7fd97ef
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt %s --inline | FileCheck %s
+
+llvm.func @threadidx() -> i32 {
+ %tid = rocdl.workitem.id.x : i32
+ llvm.return %tid : i32
+}
+
+// CHECK-LABEL: func @caller
+llvm.func @caller() -> i32 {
+ // CHECK-NOT: llvm.call @threadidx
+ // CHECK: rocdl.workitem.id.x
+ %z = llvm.call @threadidx() : () -> (i32)
+ llvm.return %z : i32
+}
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index a1950cb..69725e7 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -757,7 +757,7 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
if (!AttachInfo.NewAllocations.empty()) {
DP("Tracked %u total new allocations:\n",
(unsigned)AttachInfo.NewAllocations.size());
- for (const auto &Alloc : AttachInfo.NewAllocations) {
+ for ([[maybe_unused]] const auto &Alloc : AttachInfo.NewAllocations) {
DP(" Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n",
DPxPTR(Alloc.first), Alloc.second);
}
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 3ea846e..4d279bf 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -996,6 +996,8 @@ cc_library(
srcs = glob([
"lib/Analysis/FlowSensitive/Models/*.cpp",
"lib/Analysis/FlowSensitive/*.cpp",
+ "lib/Analysis/LifetimeSafety/*.cpp",
+ "lib/Analysis/LifetimeSafety/*.h",
"lib/Analysis/*.cpp",
]) + [
":analysis_htmllogger_gen",