aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp7
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp2
-rw-r--r--clang-tools-extra/docs/ReleaseNotes.rst9
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-strlen.c5
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-wcslen.cpp5
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-pointers.cpp5
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison-qt.cpp8
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp8
-rw-r--r--clang/lib/Analysis/ExprMutationAnalyzer.cpp20
-rw-r--r--clang/lib/Format/WhitespaceManager.cpp88
-rw-r--r--clang/test/Frontend/rewrite-includes-bom.c6
-rw-r--r--clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c2
-rw-r--r--clang/test/Modules/crash-vfs-relative-incdir.m2
-rw-r--r--clang/test/Modules/crash-vfs-run-reproducer.m2
-rw-r--r--clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp38
-rw-r--r--clang/unittests/Format/FormatTest.cpp5
-rw-r--r--libclc/CMakeLists.txt7
-rw-r--r--libclc/clc/include/clc/math/clc_sincos_helpers.inc5
-rw-r--r--libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc9
-rw-r--r--libclc/clc/include/clc/math/clc_sincos_piby4.h14
-rw-r--r--libclc/clc/include/clc/math/clc_sincos_piby4.inc174
-rw-r--r--libclc/clc/lib/generic/math/clc_cos.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_cospi.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_sin.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_sincos_helpers.inc37
-rw-r--r--libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc123
-rw-r--r--libclc/clc/lib/generic/math/clc_sinpi.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_tan.cl1
-rw-r--r--libclc/clc/lib/generic/math/clc_tanpi.cl1
-rw-r--r--libclc/cmake/modules/AddLibclc.cmake2
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll361
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll126
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll72
33 files changed, 500 insertions, 648 deletions
diff --git a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp
index 3dd0a50..ca85168 100644
--- a/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp
@@ -309,10 +309,9 @@ static void lengthExprHandle(const Expr *LengthExpr,
// Try to obtain an 'IntegerLiteral' and adjust it.
if (!IsMacroDefinition) {
if (const auto *LengthIL = dyn_cast<IntegerLiteral>(LengthExpr)) {
- size_t NewLength = LengthIL->getValue().getZExtValue() +
- (LengthHandle == LengthHandleKind::Increase
- ? (isInjectUL(Result) ? 1UL : 1)
- : -1);
+ uint64_t NewLength =
+ LengthIL->getValue().getZExtValue() +
+ (LengthHandle == LengthHandleKind::Increase ? 1 : -1);
const auto NewLengthFix = FixItHint::CreateReplacement(
LengthIL->getSourceRange(),
diff --git a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp
index 0003429..77262eb 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseIntegerSignComparisonCheck.cpp
@@ -152,6 +152,8 @@ void UseIntegerSignComparisonCheck::check(
if (const auto *RHSCast = llvm::dyn_cast<ExplicitCastExpr>(RHS)) {
SubExprRHS = RHSCast->getSubExpr();
R2.setEnd(SubExprRHS->getBeginLoc().getLocWithOffset(-1));
+ R3.setBegin(Lexer::getLocForEndOfToken(
+ SubExprRHS->getEndLoc(), 0, *Result.SourceManager, getLangOpts()));
}
DiagnosticBuilder Diag =
diag(BinaryOp->getBeginLoc(),
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index a94dd97..43e4b61 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -276,6 +276,7 @@ Changes in existing checks
- Improved :doc:`bugprone-not-null-terminated-result
<clang-tidy/checks/bugprone/not-null-terminated-result>` check by fixing
+ bogus fix-its for ``strncmp`` and ``wcsncmp`` on Windows and
a crash caused by certain value-dependent expressions.
- Improved :doc:`bugprone-reserved-identifier
@@ -341,7 +342,8 @@ Changes in existing checks
- Improved :doc:`misc-const-correctness
<clang-tidy/checks/misc/const-correctness>` check to avoid false
positives when pointers is tranferred to non-const references
- and avoid false positives of function pointer.
+ and avoid false positives of function pointer and fix false
+ positives on return of non-const pointer.
- Improved :doc:`misc-header-include-cycle
<clang-tidy/checks/misc/header-include-cycle>` check performance.
@@ -363,6 +365,11 @@ Changes in existing checks
<clang-tidy/checks/modernize/use-designated-initializers>` check to
suggest using designated initializers for aliased aggregate types.
+- Improved :doc:`modernize-use-integer-sign-comparison
+ <clang-tidy/checks/modernize/use-integer-sign-comparison>` by providing
+ correct fix-its when the right-hand side of a comparison contains a
+ non-C-style cast.
+
- Improved :doc:`modernize-use-nullptr
<clang-tidy/checks/modernize/use-nullptr>` check by fixing a crash
on Windows when the check was enabled with a 32-bit :program:`clang-tidy`
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-strlen.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-strlen.c
index dccf4ed..ca86986 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-strlen.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-strlen.c
@@ -1,11 +1,6 @@
// RUN: %check_clang_tidy %s bugprone-not-null-terminated-result %t -- \
// RUN: -- -I %S/Inputs/not-null-terminated-result
-// FIXME: Something wrong with the APInt un/signed conversion on Windows:
-// in 'strncmp(str6, "string", 7);' it tries to inject '4294967302' as length.
-
-// UNSUPPORTED: system-windows
-
#include "not-null-terminated-result-c.h"
#define __STDC_LIB_EXT1__ 1
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-wcslen.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-wcslen.cpp
index 8047db3..688e414 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-wcslen.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/not-null-terminated-result-wcslen.cpp
@@ -1,11 +1,6 @@
// RUN: %check_clang_tidy -std=c++11-or-later %s bugprone-not-null-terminated-result %t -- \
// RUN: -- -I %S/Inputs/not-null-terminated-result
-// FIXME: Something wrong with the APInt un/signed conversion on Windows:
-// in 'wcsncmp(wcs6, L"string", 7);' it tries to inject '4294967302' as length.
-
-// UNSUPPORTED: system-windows
-
#include "not-null-terminated-result-cxx.h"
#define __STDC_LIB_EXT1__ 1
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-pointers.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-pointers.cpp
index e20680c..4c847b5 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-pointers.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-pointer-as-pointers.cpp
@@ -48,6 +48,11 @@ void ignore_const_alias() {
p_local0 = &a[1];
}
+void *return_non_const() {
+ void *const a = nullptr;
+ return a;
+}
+
void function_pointer_basic() {
void (*const fp)() = nullptr;
fp();
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison-qt.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison-qt.cpp
index 1f26ff3..31a3677 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison-qt.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison-qt.cpp
@@ -92,8 +92,7 @@ int AllComparisons() {
if (static_cast<unsigned int>(uArray[2]) < static_cast<int>(sArray[2]))
return 0;
// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
-// CHECK-FIXES: if (q20::cmp_less(uArray[2],sArray[2])))
-// FIXME: There should only be 2 closing braces. The fix-it inserts an unbalanced one.
+// CHECK-FIXES: if (q20::cmp_less(uArray[2],sArray[2]))
if ((unsigned int)uArray[3] < (int)sArray[3])
return 0;
@@ -116,6 +115,11 @@ int AllComparisons() {
// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
// CHECK-FIXES: if (q20::cmp_greater(uArray[6] , VALUE))
+ if (unsigned(uArray[7]) >= int(sArray[7]))
+ return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (q20::cmp_greater_equal(uArray[7],sArray[7]))
+
FuncParameters(uVar);
TemplateFuncParameter(sVar);
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp
index 628cee0..e7981a6 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-integer-sign-comparison.cpp
@@ -91,8 +91,7 @@ int AllComparisons() {
if (static_cast<unsigned int>(uArray[2]) < static_cast<int>(sArray[2]))
return 0;
// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
-// CHECK-FIXES: if (std::cmp_less(uArray[2],sArray[2])))
-// FIXME: There should only be 2 closing braces. The fix-it inserts an unbalanced one.
+// CHECK-FIXES: if (std::cmp_less(uArray[2],sArray[2]))
if ((unsigned int)uArray[3] < (int)sArray[3])
return 0;
@@ -115,6 +114,11 @@ int AllComparisons() {
// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
// CHECK-FIXES: if (std::cmp_greater(uArray[6] , VALUE))
+ if (unsigned(uArray[7]) >= int(sArray[7]))
+ return 0;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: comparison between 'signed' and 'unsigned' integers [modernize-use-integer-sign-comparison]
+// CHECK-FIXES: if (std::cmp_greater_equal(uArray[7],sArray[7]))
+
FuncParameters(uVar);
TemplateFuncParameter(sVar);
diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp
index 1e376da..75b17c54 100644
--- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp
+++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp
@@ -140,7 +140,8 @@ class ExprPointeeResolve {
// explicit cast will be checked in `findPointeeToNonConst`
const CastKind kind = ICE->getCastKind();
if (kind == CK_LValueToRValue || kind == CK_DerivedToBase ||
- kind == CK_UncheckedDerivedToBase)
+ kind == CK_UncheckedDerivedToBase ||
+ (kind == CK_NoOp && (ICE->getType() == ICE->getSubExpr()->getType())))
return resolveExpr(ICE->getSubExpr());
return false;
}
@@ -788,13 +789,16 @@ ExprMutationAnalyzer::Analyzer::findPointeeToNonConst(const Expr *Exp) {
// FIXME: false positive if the pointee does not change in lambda
const auto CaptureNoConst = lambdaExpr(hasCaptureInit(Exp));
- const auto Matches =
- match(stmt(anyOf(forEachDescendant(
- stmt(anyOf(AssignToNonConst, PassAsNonConstArg,
- CastToNonConst, CaptureNoConst))
- .bind("stmt")),
- forEachDescendant(InitToNonConst))),
- Stm, Context);
+ const auto ReturnNoConst =
+ returnStmt(hasReturnValue(canResolveToExprPointee(Exp)));
+
+ const auto Matches = match(
+ stmt(anyOf(forEachDescendant(
+ stmt(anyOf(AssignToNonConst, PassAsNonConstArg,
+ CastToNonConst, CaptureNoConst, ReturnNoConst))
+ .bind("stmt")),
+ forEachDescendant(InitToNonConst))),
+ Stm, Context);
return selectFirst<Stmt>("stmt", Matches);
}
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index 9261294..aae2f3e 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -432,7 +432,11 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
// right-justified. It is used to align compound assignments like `+=` and `=`.
// When RightJustify and ACS.PadOperators are true, operators in each block to
// be aligned will be padded on the left to the same length before aligning.
-template <typename F>
+//
+// The simple check will not look at the indentaion and nesting level to recurse
+// into the line for alignment. It will also not count the commas. This is e.g.
+// for aligning macro definitions.
+template <typename F, bool SimpleCheck = false>
static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
SmallVector<WhitespaceManager::Change, 16> &Changes,
unsigned StartAt,
@@ -465,9 +469,9 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
// Measure the scope level (i.e. depth of (), [], {}) of the first token, and
// abort when we hit any token in a higher scope than the starting one.
- auto IndentAndNestingLevel = StartAt < Changes.size()
- ? Changes[StartAt].indentAndNestingLevel()
- : std::tuple<unsigned, unsigned, unsigned>();
+ const auto IndentAndNestingLevel =
+ StartAt < Changes.size() ? Changes[StartAt].indentAndNestingLevel()
+ : std::tuple<unsigned, unsigned, unsigned>();
// Keep track of the number of commas before the matching tokens, we will only
// align a sequence of matching tokens if they are preceded by the same number
@@ -536,14 +540,17 @@ static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
if (CurrentChange.Tok->isNot(tok::comment))
LineIsComment = false;
- if (CurrentChange.Tok->is(tok::comma)) {
- ++CommasBeforeMatch;
- } else if (CurrentChange.indentAndNestingLevel() > IndentAndNestingLevel) {
- // Call AlignTokens recursively, skipping over this scope block.
- unsigned StoppedAt =
- AlignTokens(Style, Matches, Changes, i, ACS, RightJustify);
- i = StoppedAt - 1;
- continue;
+ if (!SimpleCheck) {
+ if (CurrentChange.Tok->is(tok::comma)) {
+ ++CommasBeforeMatch;
+ } else if (CurrentChange.indentAndNestingLevel() >
+ IndentAndNestingLevel) {
+ // Call AlignTokens recursively, skipping over this scope block.
+ const auto StoppedAt =
+ AlignTokens(Style, Matches, Changes, i, ACS, RightJustify);
+ i = StoppedAt - 1;
+ continue;
+ }
}
if (!Matches(CurrentChange))
@@ -683,61 +690,8 @@ void WhitespaceManager::alignConsecutiveMacros() {
return Current->Next->SpacesRequiredBefore == SpacesRequiredBefore;
};
- unsigned MinColumn = 0;
-
- // Start and end of the token sequence we're processing.
- unsigned StartOfSequence = 0;
- unsigned EndOfSequence = 0;
-
- // Whether a matching token has been found on the current line.
- bool FoundMatchOnLine = false;
-
- // Whether the current line consists only of comments
- bool LineIsComment = true;
-
- unsigned I = 0;
- for (unsigned E = Changes.size(); I != E; ++I) {
- if (Changes[I].NewlinesBefore != 0) {
- EndOfSequence = I;
-
- // Whether to break the alignment sequence because of an empty line.
- bool EmptyLineBreak = (Changes[I].NewlinesBefore > 1) &&
- !Style.AlignConsecutiveMacros.AcrossEmptyLines;
-
- // Whether to break the alignment sequence because of a line without a
- // match.
- bool NoMatchBreak =
- !FoundMatchOnLine &&
- !(LineIsComment && Style.AlignConsecutiveMacros.AcrossComments);
-
- if (EmptyLineBreak || NoMatchBreak) {
- AlignMatchingTokenSequence(StartOfSequence, EndOfSequence, MinColumn,
- AlignMacrosMatches, Changes);
- }
-
- // A new line starts, re-initialize line status tracking bools.
- FoundMatchOnLine = false;
- LineIsComment = true;
- }
-
- if (Changes[I].Tok->isNot(tok::comment))
- LineIsComment = false;
-
- if (!AlignMacrosMatches(Changes[I]))
- continue;
-
- FoundMatchOnLine = true;
-
- if (StartOfSequence == 0)
- StartOfSequence = I;
-
- unsigned ChangeMinColumn = Changes[I].StartOfTokenColumn;
- MinColumn = std::max(MinColumn, ChangeMinColumn);
- }
-
- EndOfSequence = I;
- AlignMatchingTokenSequence(StartOfSequence, EndOfSequence, MinColumn,
- AlignMacrosMatches, Changes);
+ AlignTokens<decltype(AlignMacrosMatches) &, /*SimpleCheck=*/true>(
+ Style, AlignMacrosMatches, Changes, 0, Style.AlignConsecutiveMacros);
}
void WhitespaceManager::alignConsecutiveAssignments() {
diff --git a/clang/test/Frontend/rewrite-includes-bom.c b/clang/test/Frontend/rewrite-includes-bom.c
index caa431a..27bf470 100644
--- a/clang/test/Frontend/rewrite-includes-bom.c
+++ b/clang/test/Frontend/rewrite-includes-bom.c
@@ -1,8 +1,8 @@
-// RUN: grep -q $'^\xEF\xBB\xBF' %S/Inputs/rewrite-includes-bom.h
+// RUN: cat %S/Inputs/rewrite-includes-bom.h | od -t x1 | grep -q 'ef\s*bb\s*bf'
// RUN: %clang_cc1 -E -frewrite-includes -I %S/Inputs %s -o %t.c
-// RUN: ! grep -q $'\xEF\xBB\xBF' %t.c
+// RUN: cat %t.c | od -t x1 | not grep -q 'ef\s*bb\s*bf'
// RUN: %clang_cc1 -fsyntax-only -verify %t.c
// expected-no-diagnostics
-// REQUIRES: shell
+// UNSUPPORTED: system-windows
#include "rewrite-includes-bom.h"
diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c b/clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c
index 46aba91..6f574ac 100644
--- a/clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_utf8bom.c
@@ -1,5 +1,5 @@
// Test UTF8 BOM at start of file
-// RUN: printf '\xef\xbb\xbf' > %t.c
+// RUN: printf '\357\273\277' > %t.c
// RUN: echo '#ifdef TEST\n' >> %t.c
// RUN: echo '#include <string>' >> %t.c
// RUN: echo '#endif' >> %t.c
diff --git a/clang/test/Modules/crash-vfs-relative-incdir.m b/clang/test/Modules/crash-vfs-relative-incdir.m
index c0407f7..46c3413c 100644
--- a/clang/test/Modules/crash-vfs-relative-incdir.m
+++ b/clang/test/Modules/crash-vfs-relative-incdir.m
@@ -53,4 +53,4 @@
// RUN: cd %t
// RUN: chmod 755 crash-vfs-*.sh
-// RUN: ./crash-vfs-*.sh
+// RUN: bash ./crash-vfs-*.sh
diff --git a/clang/test/Modules/crash-vfs-run-reproducer.m b/clang/test/Modules/crash-vfs-run-reproducer.m
index fd861fe..fa06fd9 100644
--- a/clang/test/Modules/crash-vfs-run-reproducer.m
+++ b/clang/test/Modules/crash-vfs-run-reproducer.m
@@ -53,4 +53,4 @@
// RUN: cd %t
// RUN: chmod 755 crash-vfs-*.sh
-// RUN: ./crash-vfs-*.sh
+// RUN: bash ./crash-vfs-*.sh
diff --git a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
index 95f8ae2..ef22960 100644
--- a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
+++ b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
@@ -2038,4 +2038,42 @@ TEST(ExprMutationAnalyzerTest, PointeeMutatedByConditionOperator) {
EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
}
+TEST(ExprMutationAnalyzerTest, PointeeMutatedByReturn) {
+ {
+ const std::string Code = R"(
+ int * f() {
+ int *const x = nullptr;
+ return x;
+ })";
+ auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+ auto Results =
+ match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+ EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+ }
+ {
+ const std::string Code = R"(
+ int * f() {
+ int *const x = nullptr;
+ return x;
+ })";
+ // in C++23, AST will have NoOp cast.
+ auto AST =
+ buildASTFromCodeWithArgs(Code, {"-Wno-everything", "-std=c++23"});
+ auto Results =
+ match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+ EXPECT_TRUE(isPointeeMutated(Results, AST.get()));
+ }
+ {
+ const std::string Code = R"(
+ int const* f() {
+ int *const x = nullptr;
+ return x;
+ })";
+ auto AST = buildASTFromCodeWithArgs(Code, {"-Wno-everything"});
+ auto Results =
+ match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+ EXPECT_FALSE(isPointeeMutated(Results, AST.get()));
+ }
+}
+
} // namespace clang
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 0fb8139..a3ad978 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -18559,6 +18559,11 @@ TEST_F(FormatTest, AlignConsecutiveMacros) {
"#define bbbb 4\n"
"#define ccc (5)",
Style);
+
+ Style.ColumnLimit = 30;
+ verifyFormat("#define MY_FUNC(x) callMe(X)\n"
+ "#define MY_LONG_CONSTANT 17",
+ Style);
}
TEST_F(FormatTest, AlignConsecutiveAssignmentsAcrossEmptyLines) {
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 088edc0..9a81f26 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -77,6 +77,7 @@ if( LIBCLC_STANDALONE_BUILD OR CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DI
# Setup the paths where libclc runtimes should be stored.
set( LIBCLC_OUTPUT_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR} )
+ set( LIBCLC_INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/clc )
else()
# In-tree configuration
set( LIBCLC_STANDALONE_BUILD FALSE )
@@ -100,10 +101,12 @@ else()
# Setup the paths where libclc runtimes should be stored. By default, in an
# in-tree build we place the libraries in clang's resource driectory.
include(GetClangResourceDir)
- get_clang_resource_dir( LIBCLC_OUTPUT_DIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. )
+ get_clang_resource_dir( LIBCLC_INSTALL_DIR )
+ cmake_path( APPEND LIBCLC_INSTALL_DIR "lib" "libclc" )
# Note we do not adhere to LLVM_ENABLE_PER_TARGET_RUNTIME_DIR.
- set( LIBCLC_OUTPUT_LIBRARY_DIR ${LIBCLC_OUTPUT_DIR}/lib/libclc )
+ cmake_path( GET LLVM_LIBRARY_OUTPUT_INTDIR PARENT_PATH LIBCLC_OUTPUT_LIBRARY_DIR )
+ cmake_path( APPEND LIBCLC_OUTPUT_LIBRARY_DIR ${LIBCLC_INSTALL_DIR} )
file( MAKE_DIRECTORY ${LIBCLC_OUTPUT_LIBRARY_DIR} )
endif()
diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers.inc b/libclc/clc/include/clc/math/clc_sincos_helpers.inc
index 4daff92..0a3b816 100644
--- a/libclc/clc/include/clc/math/clc_sincos_helpers.inc
+++ b/libclc/clc/include/clc/math/clc_sincos_helpers.inc
@@ -10,6 +10,11 @@ _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x,
__CLC_FLOATN y);
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
__CLC_FLOATN y);
+
+_CLC_DECL _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_FLOATN x,
+ private __CLC_FLOATN *sinval,
+ private __CLC_FLOATN *cosval);
+
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
__CLC_INTN regn);
diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
index 09c6e1c..15934ca 100644
--- a/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
+++ b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
@@ -6,6 +6,15 @@
//
//===----------------------------------------------------------------------===//
+_CLC_DECL _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x,
+ __CLC_DOUBLEN xx,
+ private __CLC_DOUBLEN *sinval,
+ private __CLC_DOUBLEN *cosval);
+
+_CLC_DECL _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx,
+ private __CLC_DOUBLEN *leadval,
+ private __CLC_DOUBLEN *tailval);
+
_CLC_DECL _CLC_OVERLOAD void
__clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
private __CLC_DOUBLEN *rr,
diff --git a/libclc/clc/include/clc/math/clc_sincos_piby4.h b/libclc/clc/include/clc/math/clc_sincos_piby4.h
deleted file mode 100644
index 50608ae..0000000
--- a/libclc/clc/include/clc/math/clc_sincos_piby4.h
+++ /dev/null
@@ -1,14 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/math/clc_fma.h>
-#include <clc/math/clc_mad.h>
-#include <clc/math/math.h>
-
-#define __CLC_BODY <clc/math/clc_sincos_piby4.inc>
-#include <clc/math/gentype.inc>
diff --git a/libclc/clc/include/clc/math/clc_sincos_piby4.inc b/libclc/clc/include/clc/math/clc_sincos_piby4.inc
deleted file mode 100644
index 91ec518..0000000
--- a/libclc/clc/include/clc/math/clc_sincos_piby4.inc
+++ /dev/null
@@ -1,174 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#if __CLC_FPSIZE == 32
-
-// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
-_CLC_INLINE _CLC_OVERLOAD void
-__clc_sincos_piby4(__CLC_GENTYPE x, private __CLC_GENTYPE *sinval,
- private __CLC_GENTYPE *cosval) {
- // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
- // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
- // = x * f(w)
- // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
- // We use a minimax approximation of (f(w) - 1) / w
- // because this produces an expansion in even powers of x.
-
- // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
- // = f(w)
- // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
- // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
- // because this produces an expansion in even powers of x.
-
- const __CLC_GENTYPE sc1 = -0.166666666638608441788607926e0F;
- const __CLC_GENTYPE sc2 = 0.833333187633086262120839299e-2F;
- const __CLC_GENTYPE sc3 = -0.198400874359527693921333720e-3F;
- const __CLC_GENTYPE sc4 = 0.272500015145584081596826911e-5F;
-
- const __CLC_GENTYPE cc1 = 0.41666666664325175238031e-1F;
- const __CLC_GENTYPE cc2 = -0.13888887673175665567647e-2F;
- const __CLC_GENTYPE cc3 = 0.24800600878112441958053e-4F;
- const __CLC_GENTYPE cc4 = -0.27301013343179832472841e-6F;
-
- __CLC_GENTYPE x2 = x * x;
-
- *sinval = __clc_mad(
- x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
- x);
- *cosval = __clc_mad(
- x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
- __clc_mad(x2, -0.5f, 1.0f));
-}
-
-#elif __CLC_FPSIZE == 64
-
-_CLC_INLINE _CLC_OVERLOAD void
-__clc_sincos_piby4(__CLC_GENTYPE x, __CLC_GENTYPE xx,
- private __CLC_GENTYPE *sinval,
- private __CLC_GENTYPE *cosval) {
- // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
- // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
- // = x * f(w)
- // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
- // We use a minimax approximation of (f(w) - 1) / w
- // because this produces an expansion in even powers of x.
- // If xx (the tail of x) is non-zero, we add a correction
- // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
- // is an approximation to cos(x)*sin(xx) valid because
- // xx is tiny relative to x.
-
- // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
- // = f(w)
- // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
- // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
- // because this produces an expansion in even powers of x.
- // If xx (the tail of x) is non-zero, we subtract a correction
- // term g(x,xx) = x*xx to the result, where g(x,xx)
- // is an approximation to sin(x)*sin(xx) valid because
- // xx is tiny relative to x.
-
- const __CLC_GENTYPE sc1 = -0.166666666666666646259241729;
- const __CLC_GENTYPE sc2 = 0.833333333333095043065222816e-2;
- const __CLC_GENTYPE sc3 = -0.19841269836761125688538679e-3;
- const __CLC_GENTYPE sc4 = 0.275573161037288022676895908448e-5;
- const __CLC_GENTYPE sc5 = -0.25051132068021699772257377197e-7;
- const __CLC_GENTYPE sc6 = 0.159181443044859136852668200e-9;
-
- const __CLC_GENTYPE cc1 = 0.41666666666666665390037e-1;
- const __CLC_GENTYPE cc2 = -0.13888888888887398280412e-2;
- const __CLC_GENTYPE cc3 = 0.248015872987670414957399e-4;
- const __CLC_GENTYPE cc4 = -0.275573172723441909470836e-6;
- const __CLC_GENTYPE cc5 = 0.208761463822329611076335e-8;
- const __CLC_GENTYPE cc6 = -0.113826398067944859590880e-10;
-
- __CLC_GENTYPE x2 = x * x;
- __CLC_GENTYPE x3 = x2 * x;
- __CLC_GENTYPE r = (__CLC_GENTYPE)0.5 * x2;
- __CLC_GENTYPE t = (__CLC_GENTYPE)1.0 - r;
-
- __CLC_GENTYPE sp = __clc_fma(
- __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
-
- __CLC_GENTYPE cp =
- t +
- __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5),
- x2, cc4),
- x2, cc3),
- x2, cc2),
- x2, cc1),
- x2 * x2, __clc_fma(x, xx, (1.0 - t) - r));
-
- *sinval =
- x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx));
- *cosval = cp;
-}
-
-_CLC_INLINE _CLC_OVERLOAD void __clc_tan_piby4(__CLC_GENTYPE x,
- __CLC_GENTYPE xx,
- private __CLC_GENTYPE *leadval,
- private __CLC_GENTYPE *tailval) {
- // 0x3fe921fb54442d18
- const __CLC_GENTYPE piby4_lead = 7.85398163397448278999e-01;
- // 0x3c81a62633145c06
- const __CLC_GENTYPE piby4_tail = 3.06161699786838240164e-17;
-
- // In order to maintain relative precision transform using the identity:
- // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
- // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
-
- __CLC_LONGN ca = x > 0.68;
- __CLC_LONGN cb = x < -0.68;
- __CLC_GENTYPE transform = ca ? 1.0 : 0.0;
- transform = cb ? -1.0 : transform;
-
- __CLC_GENTYPE tx = __clc_fma(-transform, x, piby4_lead) +
- __clc_fma(-transform, xx, piby4_tail);
- __CLC_LONGN c = ca | cb;
- x = c ? tx : x;
- xx = c ? 0.0 : xx;
-
- // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
- __CLC_GENTYPE t1 = x;
- __CLC_GENTYPE r = __clc_fma(2.0, x * xx, x * x);
-
- __CLC_GENTYPE a = __clc_fma(r,
- __clc_fma(r, 0.224044448537022097264602535574e-3,
- -0.229345080057565662883358588111e-1),
- 0.372379159759792203640806338901e0);
-
- __CLC_GENTYPE b =
- __clc_fma(r,
- __clc_fma(r,
- __clc_fma(r, -0.232371494088563558304549252913e-3,
- 0.260656620398645407524064091208e-1),
- -0.515658515729031149329237816945e0),
- 0.111713747927937668539901657944e1);
-
- __CLC_GENTYPE t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
-
- __CLC_GENTYPE tp = t1 + t2;
-
- // Compute -1.0/(t1 + t2) accurately
- __CLC_GENTYPE z1 =
- __CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
- __CLC_GENTYPE z2 = t2 - (z1 - t1);
- __CLC_GENTYPE trec = -MATH_RECIP(tp);
- __CLC_GENTYPE trec_top =
- __CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
-
- __CLC_GENTYPE tpr = __clc_fma(
- __clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
-
- __CLC_GENTYPE tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
- __CLC_GENTYPE tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
-
- *leadval = c ? tpt : tp;
- *tailval = c ? tptr : tpr;
-}
-
-#endif
diff --git a/libclc/clc/lib/generic/math/clc_cos.cl b/libclc/clc/lib/generic/math/clc_cos.cl
index e7e4d6a..5529ec4 100644
--- a/libclc/clc/lib/generic/math/clc_cos.cl
+++ b/libclc/clc/lib/generic/math/clc_cos.cl
@@ -10,7 +10,6 @@
#include <clc/float/definitions.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
#include <clc/math/math.h>
#include <clc/relational/clc_isinf.h>
#include <clc/relational/clc_isnan.h>
diff --git a/libclc/clc/lib/generic/math/clc_cospi.cl b/libclc/clc/lib/generic/math/clc_cospi.cl
index 07e1b49..6a10171 100644
--- a/libclc/clc/lib/generic/math/clc_cospi.cl
+++ b/libclc/clc/lib/generic/math/clc_cospi.cl
@@ -11,7 +11,6 @@
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
#include <clc/math/math.h>
#define __CLC_BODY <clc_cospi.inc>
diff --git a/libclc/clc/lib/generic/math/clc_sin.cl b/libclc/clc/lib/generic/math/clc_sin.cl
index 741383f..99338c9 100644
--- a/libclc/clc/lib/generic/math/clc_sin.cl
+++ b/libclc/clc/lib/generic/math/clc_sin.cl
@@ -11,7 +11,6 @@
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
#include <clc/math/clc_trunc.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
index 9a46170..2a71b56 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@@ -74,6 +74,43 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
return ret;
}
+// Evaluate single precisions sin and cos of value in interval [-pi/4, pi/4]
+_CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_FLOATN x,
+ private __CLC_FLOATN *sinval,
+ private __CLC_FLOATN *cosval) {
+ // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+ // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+ // = x * f(w)
+ // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+ // We use a minimax approximation of (f(w) - 1) / w
+ // because this produces an expansion in even powers of x.
+
+ // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+ // = f(w)
+ // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+ // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+ // because this produces an expansion in even powers of x.
+
+ const __CLC_FLOATN sc1 = -0.166666666638608441788607926e0F;
+ const __CLC_FLOATN sc2 = 0.833333187633086262120839299e-2F;
+ const __CLC_FLOATN sc3 = -0.198400874359527693921333720e-3F;
+ const __CLC_FLOATN sc4 = 0.272500015145584081596826911e-5F;
+
+ const __CLC_FLOATN cc1 = 0.41666666664325175238031e-1F;
+ const __CLC_FLOATN cc2 = -0.13888887673175665567647e-2F;
+ const __CLC_FLOATN cc3 = 0.24800600878112441958053e-4F;
+ const __CLC_FLOATN cc4 = -0.27301013343179832472841e-6F;
+
+ __CLC_FLOATN x2 = x * x;
+
+ *sinval = __clc_mad(
+ x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
+ x);
+ *cosval = __clc_mad(
+ x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
+ __clc_mad(x2, -0.5f, 1.0f));
+}
+
_CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
__CLC_INTN regn) {
// Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
index 8fae90c..e029c6d 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
@@ -6,6 +6,129 @@
//
//===----------------------------------------------------------------------===//
+_CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x,
+ __CLC_DOUBLEN xx,
+ private __CLC_DOUBLEN *sinval,
+ private __CLC_DOUBLEN *cosval) {
+ // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+ // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+ // = x * f(w)
+ // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+ // We use a minimax approximation of (f(w) - 1) / w
+ // because this produces an expansion in even powers of x.
+ // If xx (the tail of x) is non-zero, we add a correction
+ // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
+ // is an approximation to cos(x)*sin(xx) valid because
+ // xx is tiny relative to x.
+
+ // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+ // = f(w)
+ // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+ // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+ // because this produces an expansion in even powers of x.
+ // If xx (the tail of x) is non-zero, we subtract a correction
+ // term g(x,xx) = x*xx to the result, where g(x,xx)
+ // is an approximation to sin(x)*sin(xx) valid because
+ // xx is tiny relative to x.
+
+ const __CLC_DOUBLEN sc1 = -0.166666666666666646259241729;
+ const __CLC_DOUBLEN sc2 = 0.833333333333095043065222816e-2;
+ const __CLC_DOUBLEN sc3 = -0.19841269836761125688538679e-3;
+ const __CLC_DOUBLEN sc4 = 0.275573161037288022676895908448e-5;
+ const __CLC_DOUBLEN sc5 = -0.25051132068021699772257377197e-7;
+ const __CLC_DOUBLEN sc6 = 0.159181443044859136852668200e-9;
+
+ const __CLC_DOUBLEN cc1 = 0.41666666666666665390037e-1;
+ const __CLC_DOUBLEN cc2 = -0.13888888888887398280412e-2;
+ const __CLC_DOUBLEN cc3 = 0.248015872987670414957399e-4;
+ const __CLC_DOUBLEN cc4 = -0.275573172723441909470836e-6;
+ const __CLC_DOUBLEN cc5 = 0.208761463822329611076335e-8;
+ const __CLC_DOUBLEN cc6 = -0.113826398067944859590880e-10;
+
+ __CLC_DOUBLEN x2 = x * x;
+ __CLC_DOUBLEN x3 = x2 * x;
+ __CLC_DOUBLEN r = (__CLC_DOUBLEN)0.5 * x2;
+ __CLC_DOUBLEN t = (__CLC_DOUBLEN)1.0 - r;
+
+ __CLC_DOUBLEN sp = __clc_fma(
+ __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
+
+ __CLC_DOUBLEN cp =
+ t +
+ __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5),
+ x2, cc4),
+ x2, cc3),
+ x2, cc2),
+ x2, cc1),
+ x2 * x2, __clc_fma(x, xx, (1.0 - t) - r));
+
+ *sinval =
+ x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx));
+ *cosval = cp;
+}
+
+_CLC_DEF _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx,
+ private __CLC_DOUBLEN *leadval,
+ private __CLC_DOUBLEN *tailval) {
+ // 0x3fe921fb54442d18
+ const __CLC_DOUBLEN piby4_lead = 7.85398163397448278999e-01;
+ // 0x3c81a62633145c06
+ const __CLC_DOUBLEN piby4_tail = 3.06161699786838240164e-17;
+
+ // In order to maintain relative precision transform using the identity:
+ // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
+ // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
+
+ __CLC_LONGN ca = x > 0.68;
+ __CLC_LONGN cb = x < -0.68;
+ __CLC_DOUBLEN transform = ca ? 1.0 : 0.0;
+ transform = cb ? -1.0 : transform;
+
+ __CLC_DOUBLEN tx = __clc_fma(-transform, x, piby4_lead) +
+ __clc_fma(-transform, xx, piby4_tail);
+ __CLC_LONGN c = ca | cb;
+ x = c ? tx : x;
+ xx = c ? 0.0 : xx;
+
+ // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
+ __CLC_DOUBLEN t1 = x;
+ __CLC_DOUBLEN r = __clc_fma(2.0, x * xx, x * x);
+
+ __CLC_DOUBLEN a = __clc_fma(r,
+ __clc_fma(r, 0.224044448537022097264602535574e-3,
+ -0.229345080057565662883358588111e-1),
+ 0.372379159759792203640806338901e0);
+
+ __CLC_DOUBLEN b =
+ __clc_fma(r,
+ __clc_fma(r,
+ __clc_fma(r, -0.232371494088563558304549252913e-3,
+ 0.260656620398645407524064091208e-1),
+ -0.515658515729031149329237816945e0),
+ 0.111713747927937668539901657944e1);
+
+ __CLC_DOUBLEN t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
+
+ __CLC_DOUBLEN tp = t1 + t2;
+
+ // Compute -1.0/(t1 + t2) accurately
+ __CLC_DOUBLEN z1 =
+ __CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
+ __CLC_DOUBLEN z2 = t2 - (z1 - t1);
+ __CLC_DOUBLEN trec = -MATH_RECIP(tp);
+ __CLC_DOUBLEN trec_top =
+ __CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
+
+ __CLC_DOUBLEN tpr = __clc_fma(
+ __clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
+
+ __CLC_DOUBLEN tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
+ __CLC_DOUBLEN tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
+
+ *leadval = c ? tpt : tp;
+ *tailval = c ? tptr : tpr;
+}
+
// Reduction for medium sized arguments
_CLC_DEF _CLC_OVERLOAD void
__clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
diff --git a/libclc/clc/lib/generic/math/clc_sinpi.cl b/libclc/clc/lib/generic/math/clc_sinpi.cl
index 6cff247..bb5de09f0 100644
--- a/libclc/clc/lib/generic/math/clc_sinpi.cl
+++ b/libclc/clc/lib/generic/math/clc_sinpi.cl
@@ -11,7 +11,6 @@
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
#include <clc/math/math.h>
#define __CLC_BODY <clc_sinpi.inc>
diff --git a/libclc/clc/lib/generic/math/clc_tan.cl b/libclc/clc/lib/generic/math/clc_tan.cl
index adf42c4..7e68216 100644
--- a/libclc/clc/lib/generic/math/clc_tan.cl
+++ b/libclc/clc/lib/generic/math/clc_tan.cl
@@ -11,7 +11,6 @@
#include <clc/internal/clc.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
#include <clc/math/math.h>
#include <clc/math/tables.h>
#include <clc/relational/clc_isinf.h>
diff --git a/libclc/clc/lib/generic/math/clc_tanpi.cl b/libclc/clc/lib/generic/math/clc_tanpi.cl
index f126589..099457c1 100644
--- a/libclc/clc/lib/generic/math/clc_tanpi.cl
+++ b/libclc/clc/lib/generic/math/clc_tanpi.cl
@@ -12,7 +12,6 @@
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_native_recip.h>
#include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
#include <clc/math/math.h>
#define __CLC_BODY <clc_tanpi.inc>
diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake
index 614f9e3..d8c2219 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -261,7 +261,7 @@ function(libclc_install)
install(
FILES ${files}
- DESTINATION "${CMAKE_INSTALL_DATADIR}/clc"
+ DESTINATION ${LIBCLC_INSTALL_DIR}
)
endfunction()
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index d4cc154..52ca22b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -1,38 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,FAST
-; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
-; RUN: -mtriple=arm64-eabi -aarch64-neon-syntax=apple \
-; RUN: | FileCheck %s --check-prefixes=GISEL,FALLBACK
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -O0 -fast-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FI
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for test_vcvt_bf16_f64
-; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f64_f32)
-; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f64_f32)
define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp {
; CHECK-LABEL: test_vcvt_f64_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_f64_f32:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl v0.2d, v0.2s
-; GISEL-NEXT: ret
%vcvt1.i = fpext <2 x float> %x to <2 x double>
ret <2 x double> %vcvt1.i
}
-; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f64_f32)
-; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f64_f32)
define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp {
; CHECK-LABEL: test_vcvt_high_f64_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_f64_f32:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
-; GISEL-NEXT: ret
%cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
%vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
ret <2 x double> %vcvt1.i
@@ -43,11 +29,6 @@ define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind r
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
-; GISEL-NEXT: ret
%bc1 = bitcast <4 x float> %x to <2 x double>
%ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1>
%bc2 = bitcast <1 x double> %ext to <2 x float>
@@ -60,11 +41,6 @@ define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind rea
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
-; GISEL-NEXT: ret
%ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
%bc2 = bitcast <1 x i64> %ext to <2 x float>
%r = fpext <2 x float> %bc2 to <2 x double>
@@ -76,11 +52,6 @@ define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind rea
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
-; GISEL-NEXT: ret
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%bc2 = bitcast <2 x i32> %ext to <2 x float>
%r = fpext <2 x float> %bc2 to <2 x double>
@@ -92,11 +63,6 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
-; GISEL-NEXT: ret
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%bc2 = bitcast <4 x i16> %ext to <2 x float>
%r = fpext <2 x float> %bc2 to <2 x double>
@@ -108,11 +74,6 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
-; GISEL-NEXT: ret
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%bc2 = bitcast <8 x i8> %ext to <2 x float>
%r = fpext <2 x float> %bc2 to <2 x double>
@@ -124,11 +85,6 @@ define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind read
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
-; GISEL-NEXT: ret
%ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
%bc2 = bitcast <1 x i64> %ext to <4 x half>
%r = fpext <4 x half> %bc2 to <4 x float>
@@ -140,11 +96,6 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
-; GISEL-NEXT: ret
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%bc2 = bitcast <2 x i32> %ext to <4 x half>
%r = fpext <4 x half> %bc2 to <4 x float>
@@ -156,11 +107,6 @@ define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind read
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
-; GISEL-NEXT: ret
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%bc2 = bitcast <4 x i16> %ext to <4 x half>
%r = fpext <4 x half> %bc2 to <4 x float>
@@ -172,134 +118,118 @@ define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readn
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
-; GISEL-NEXT: ret
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%bc2 = bitcast <8 x i8> %ext to <4 x half>
%r = fpext <4 x half> %bc2 to <4 x float>
ret <4 x float> %r
}
-; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f32_f64)
-; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f32_f64)
define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp {
; CHECK-LABEL: test_vcvt_f32_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvt_f32_f64:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtn v0.2s, v0.2d
-; GISEL-NEXT: ret
%vcvt1.i = fptrunc <2 x double> %v to <2 x float>
ret <2 x float> %vcvt1.i
}
-; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_bf16_f64)
-; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_bf16_f64)
define <2 x bfloat> @test_vcvt_bf16_f64(<2 x double> %v) nounwind readnone ssp {
-; GENERIC-LABEL: test_vcvt_bf16_f64:
-; GENERIC: // %bb.0:
-; GENERIC-NEXT: fcvtxn v0.2s, v0.2d
-; GENERIC-NEXT: movi.4s v1, #1
-; GENERIC-NEXT: movi.4s v2, #127, msl #8
-; GENERIC-NEXT: ushr.4s v3, v0, #16
-; GENERIC-NEXT: add.4s v2, v0, v2
-; GENERIC-NEXT: and.16b v1, v3, v1
-; GENERIC-NEXT: fcmeq.4s v3, v0, v0
-; GENERIC-NEXT: orr.4s v0, #64, lsl #16
-; GENERIC-NEXT: add.4s v1, v1, v2
-; GENERIC-NEXT: bit.16b v0, v1, v3
-; GENERIC-NEXT: shrn.4h v0, v0, #16
-; GENERIC-NEXT: ret
+; CHECK-SD-LABEL: test_vcvt_bf16_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvtxn v0.2s, v0.2d
+; CHECK-SD-NEXT: movi.4s v1, #1
+; CHECK-SD-NEXT: movi.4s v2, #127, msl #8
+; CHECK-SD-NEXT: ushr.4s v3, v0, #16
+; CHECK-SD-NEXT: add.4s v2, v0, v2
+; CHECK-SD-NEXT: and.16b v1, v3, v1
+; CHECK-SD-NEXT: fcmeq.4s v3, v0, v0
+; CHECK-SD-NEXT: orr.4s v0, #64, lsl #16
+; CHECK-SD-NEXT: add.4s v1, v1, v2
+; CHECK-SD-NEXT: bit.16b v0, v1, v3
+; CHECK-SD-NEXT: shrn.4h v0, v0, #16
+; CHECK-SD-NEXT: ret
;
-; FAST-LABEL: test_vcvt_bf16_f64:
-; FAST: // %bb.0:
-; FAST-NEXT: fcvtxn v1.2s, v0.2d
-; FAST-NEXT: // implicit-def: $q0
-; FAST-NEXT: fmov d0, d1
-; FAST-NEXT: ushr.4s v1, v0, #16
-; FAST-NEXT: movi.4s v2, #1
-; FAST-NEXT: and.16b v1, v1, v2
-; FAST-NEXT: add.4s v1, v1, v0
-; FAST-NEXT: movi.4s v2, #127, msl #8
-; FAST-NEXT: add.4s v1, v1, v2
-; FAST-NEXT: mov.16b v2, v0
-; FAST-NEXT: orr.4s v2, #64, lsl #16
-; FAST-NEXT: fcmeq.4s v0, v0, v0
-; FAST-NEXT: bsl.16b v0, v1, v2
-; FAST-NEXT: shrn.4h v0, v0, #16
-; FAST-NEXT: ret
+; CHECK-FI-LABEL: test_vcvt_bf16_f64:
+; CHECK-FI: // %bb.0:
+; CHECK-FI-NEXT: fcvtxn v1.2s, v0.2d
+; CHECK-FI-NEXT: // implicit-def: $q0
+; CHECK-FI-NEXT: fmov d0, d1
+; CHECK-FI-NEXT: ushr.4s v1, v0, #16
+; CHECK-FI-NEXT: movi.4s v2, #1
+; CHECK-FI-NEXT: and.16b v1, v1, v2
+; CHECK-FI-NEXT: add.4s v1, v1, v0
+; CHECK-FI-NEXT: movi.4s v2, #127, msl #8
+; CHECK-FI-NEXT: add.4s v1, v1, v2
+; CHECK-FI-NEXT: mov.16b v2, v0
+; CHECK-FI-NEXT: orr.4s v2, #64, lsl #16
+; CHECK-FI-NEXT: fcmeq.4s v0, v0, v0
+; CHECK-FI-NEXT: bsl.16b v0, v1, v2
+; CHECK-FI-NEXT: shrn.4h v0, v0, #16
+; CHECK-FI-NEXT: ret
;
-; GISEL-LABEL: test_vcvt_bf16_f64:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtxn v0.2s, v0.2d
-; GISEL-NEXT: movi.4s v1, #1
-; GISEL-NEXT: movi.4s v2, #127, msl #8
-; GISEL-NEXT: ushr.4s v3, v0, #16
-; GISEL-NEXT: add.4s v2, v0, v2
-; GISEL-NEXT: and.16b v1, v3, v1
-; GISEL-NEXT: fcmeq.4s v3, v0, v0
-; GISEL-NEXT: orr.4s v0, #64, lsl #16
-; GISEL-NEXT: add.4s v1, v1, v2
-; GISEL-NEXT: bit.16b v0, v1, v3
-; GISEL-NEXT: shrn.4h v0, v0, #16
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test_vcvt_bf16_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvtxn v0.2s, v0.2d
+; CHECK-GI-NEXT: movi.4s v1, #1
+; CHECK-GI-NEXT: movi.4s v2, #127, msl #8
+; CHECK-GI-NEXT: ushr.4s v3, v0, #16
+; CHECK-GI-NEXT: add.4s v2, v0, v2
+; CHECK-GI-NEXT: and.16b v1, v3, v1
+; CHECK-GI-NEXT: fcmeq.4s v3, v0, v0
+; CHECK-GI-NEXT: orr.4s v0, #64, lsl #16
+; CHECK-GI-NEXT: add.4s v1, v1, v2
+; CHECK-GI-NEXT: bit.16b v0, v1, v3
+; CHECK-GI-NEXT: shrn.4h v0, v0, #16
+; CHECK-GI-NEXT: ret
%vcvt1.i = fptrunc <2 x double> %v to <2 x bfloat>
ret <2 x bfloat> %vcvt1.i
}
define half @test_vcvt_f16_f32(<1 x float> %x) {
-; GENERIC-LABEL: test_vcvt_f16_f32:
-; GENERIC: // %bb.0:
-; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0
-; GENERIC-NEXT: fcvt h0, s0
-; GENERIC-NEXT: ret
+; CHECK-SD-LABEL: test_vcvt_f16_f32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fcvt h0, s0
+; CHECK-SD-NEXT: ret
;
-; FAST-LABEL: test_vcvt_f16_f32:
-; FAST: // %bb.0:
-; FAST-NEXT: fmov d1, d0
-; FAST-NEXT: // implicit-def: $q0
-; FAST-NEXT: fmov d0, d1
-; FAST-NEXT: // kill: def $s0 killed $s0 killed $q0
-; FAST-NEXT: fcvt h0, s0
-; FAST-NEXT: ret
+; CHECK-FI-LABEL: test_vcvt_f16_f32:
+; CHECK-FI: // %bb.0:
+; CHECK-FI-NEXT: fmov d1, d0
+; CHECK-FI-NEXT: // implicit-def: $q0
+; CHECK-FI-NEXT: fmov d0, d1
+; CHECK-FI-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-FI-NEXT: fcvt h0, s0
+; CHECK-FI-NEXT: ret
;
-; GISEL-LABEL: test_vcvt_f16_f32:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvt h0, s0
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test_vcvt_f16_f32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvt h0, s0
+; CHECK-GI-NEXT: ret
%tmp = fptrunc <1 x float> %x to <1 x half>
%elt = extractelement <1 x half> %tmp, i32 0
ret half %elt
}
-; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64)
-; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64)
define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
-; GENERIC-LABEL: test_vcvt_high_f32_f64:
-; GENERIC: // %bb.0:
-; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0
-; GENERIC-NEXT: fcvtn2 v0.4s, v1.2d
-; GENERIC-NEXT: ret
+; CHECK-SD-LABEL: test_vcvt_high_f32_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-SD-NEXT: ret
;
-; FAST-LABEL: test_vcvt_high_f32_f64:
-; FAST: // %bb.0:
-; FAST-NEXT: fmov d2, d0
-; FAST-NEXT: // implicit-def: $q0
-; FAST-NEXT: fmov d0, d2
-; FAST-NEXT: fcvtn2 v0.4s, v1.2d
-; FAST-NEXT: ret
+; CHECK-FI-LABEL: test_vcvt_high_f32_f64:
+; CHECK-FI: // %bb.0:
+; CHECK-FI-NEXT: fmov d2, d0
+; CHECK-FI-NEXT: // implicit-def: $q0
+; CHECK-FI-NEXT: fmov d0, d2
+; CHECK-FI-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-FI-NEXT: ret
;
-; GISEL-LABEL: test_vcvt_high_f32_f64:
-; GISEL: // %bb.0:
-; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: fcvtn2 v0.4s, v1.2d
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test_vcvt_high_f32_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fcvtn2 v0.4s, v1.2d
+; CHECK-GI-NEXT: ret
%cvt = fptrunc <2 x double> %v to <2 x float>
%vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %vcvt2.i
@@ -310,99 +240,80 @@ define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-NEXT: ret
-;
-; GISEL-LABEL: test_vcvtx_f32_f64:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvtxn v0.2s, v0.2d
-; GISEL-NEXT: ret
%vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
ret <2 x float> %vcvtx1.i
}
define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
-; GENERIC-LABEL: test_vcvtx_high_f32_f64:
-; GENERIC: // %bb.0:
-; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0
-; GENERIC-NEXT: fcvtxn2 v0.4s, v1.2d
-; GENERIC-NEXT: ret
+; CHECK-SD-LABEL: test_vcvtx_high_f32_f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: fcvtxn2 v0.4s, v1.2d
+; CHECK-SD-NEXT: ret
;
-; FAST-LABEL: test_vcvtx_high_f32_f64:
-; FAST: // %bb.0:
-; FAST-NEXT: fmov d2, d0
-; FAST-NEXT: // implicit-def: $q0
-; FAST-NEXT: fmov d0, d2
-; FAST-NEXT: fcvtxn2 v0.4s, v1.2d
-; FAST-NEXT: ret
+; CHECK-FI-LABEL: test_vcvtx_high_f32_f64:
+; CHECK-FI: // %bb.0:
+; CHECK-FI-NEXT: fmov d2, d0
+; CHECK-FI-NEXT: // implicit-def: $q0
+; CHECK-FI-NEXT: fmov d0, d2
+; CHECK-FI-NEXT: fcvtxn2 v0.4s, v1.2d
+; CHECK-FI-NEXT: ret
;
-; GISEL-LABEL: test_vcvtx_high_f32_f64:
-; GISEL: // %bb.0:
-; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: fcvtxn2 v0.4s, v1.2d
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test_vcvtx_high_f32_f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fcvtxn2 v0.4s, v1.2d
+; CHECK-GI-NEXT: ret
%vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind
%res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %res
}
-
-declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone
-declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone
-
-declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone
-declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone
-
-declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone
-
define i16 @to_half(float %in) {
-; GENERIC-LABEL: to_half:
-; GENERIC: // %bb.0:
-; GENERIC-NEXT: fcvt h0, s0
-; GENERIC-NEXT: fmov w0, s0
-; GENERIC-NEXT: ret
+; CHECK-SD-LABEL: to_half:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcvt h0, s0
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
;
-; FAST-LABEL: to_half:
-; FAST: // %bb.0:
-; FAST-NEXT: fcvt h1, s0
-; FAST-NEXT: // implicit-def: $w0
-; FAST-NEXT: fmov s0, w0
-; FAST-NEXT: fmov s0, s1
-; FAST-NEXT: fmov w0, s0
-; FAST-NEXT: // kill: def $w1 killed $w0
-; FAST-NEXT: ret
+; CHECK-FI-LABEL: to_half:
+; CHECK-FI: // %bb.0:
+; CHECK-FI-NEXT: fcvt h1, s0
+; CHECK-FI-NEXT: // implicit-def: $w0
+; CHECK-FI-NEXT: fmov s0, w0
+; CHECK-FI-NEXT: fmov s0, s1
+; CHECK-FI-NEXT: fmov w0, s0
+; CHECK-FI-NEXT: // kill: def $w1 killed $w0
+; CHECK-FI-NEXT: ret
;
-; GISEL-LABEL: to_half:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcvt h0, s0
-; GISEL-NEXT: fmov w0, s0
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: to_half:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcvt h0, s0
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
%res = call i16 @llvm.convert.to.fp16.f32(float %in)
ret i16 %res
}
define float @from_half(i16 %in) {
-; GENERIC-LABEL: from_half:
-; GENERIC: // %bb.0:
-; GENERIC-NEXT: fmov s0, w0
-; GENERIC-NEXT: fcvt s0, h0
-; GENERIC-NEXT: ret
+; CHECK-SD-LABEL: from_half:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: ret
;
-; FAST-LABEL: from_half:
-; FAST: // %bb.0:
-; FAST-NEXT: fmov s0, w0
-; FAST-NEXT: // kill: def $h0 killed $h0 killed $s0
-; FAST-NEXT: fcvt s0, h0
-; FAST-NEXT: ret
+; CHECK-FI-LABEL: from_half:
+; CHECK-FI: // %bb.0:
+; CHECK-FI-NEXT: fmov s0, w0
+; CHECK-FI-NEXT: // kill: def $h0 killed $h0 killed $s0
+; CHECK-FI-NEXT: fcvt s0, h0
+; CHECK-FI-NEXT: ret
;
-; GISEL-LABEL: from_half:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fmov s0, w0
-; GISEL-NEXT: fcvt s0, h0
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: from_half:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fcvt s0, h0
+; CHECK-GI-NEXT: ret
%res = call float @llvm.convert.from.fp16.f32(i16 %in)
ret float %res
}
-
-declare float @llvm.convert.from.fp16.f32(i16) #1
-declare i16 @llvm.convert.to.fp16.f32(float) #1
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; FALLBACK: {{.*}}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
index 5ae0839..3dfa6df 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
@@ -1361,132 +1361,6 @@ for.body: ; preds = %for.body.preheader,
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1
}
-define i32 @red_extended_add_incomplete_chain(ptr %start, ptr %end, i32 %offset) {
-; CHECK-NEON-LABEL: define i32 @red_extended_add_incomplete_chain(
-; CHECK-NEON-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEON-NEXT: entry:
-; CHECK-NEON-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
-; CHECK-NEON-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
-; CHECK-NEON-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1
-; CHECK-NEON-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
-; CHECK-NEON-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
-; CHECK-NEON-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK-NEON: vector.ph:
-; CHECK-NEON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 16
-; CHECK-NEON-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-NEON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
-; CHECK-NEON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[OFFSET]], i64 0
-; CHECK-NEON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEON-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK-NEON: vector.body:
-; CHECK-NEON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEON-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEON-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
-; CHECK-NEON-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
-; CHECK-NEON-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = add <16 x i32> [[VEC_PHI]], [[TMP3]]
-; CHECK-NEON-NEXT: [[TMP4]] = add <16 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]]
-; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEON-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEON-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
-; CHECK-NEON: middle.block:
-; CHECK-NEON-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP4]])
-; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
-; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK-NEON: scalar.ph:
-;
-; CHECK-SVE-LABEL: define i32 @red_extended_add_incomplete_chain(
-; CHECK-SVE-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-SVE-NEXT: entry:
-; CHECK-SVE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
-; CHECK-SVE-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
-; CHECK-SVE-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1
-; CHECK-SVE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
-; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
-; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]]
-; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK-SVE: vector.ph:
-; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
-; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
-; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-SVE-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
-; CHECK-SVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OFFSET]], i64 0
-; CHECK-SVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK-SVE: vector.body:
-; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-SVE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
-; CHECK-SVE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[NEXT_GEP]], align 1
-; CHECK-SVE-NEXT: [[TMP7:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
-; CHECK-SVE-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP7]]
-; CHECK-SVE-NEXT: [[TMP9]] = add <vscale x 4 x i32> [[TMP8]], [[BROADCAST_SPLAT]]
-; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-SVE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-SVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
-; CHECK-SVE: middle.block:
-; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP9]])
-; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
-; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK-SVE: scalar.ph:
-;
-; CHECK-SVE-MAXBW-LABEL: define i32 @red_extended_add_incomplete_chain(
-; CHECK-SVE-MAXBW-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-SVE-MAXBW-NEXT: entry:
-; CHECK-SVE-MAXBW-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
-; CHECK-SVE-MAXBW-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
-; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1
-; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
-; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
-; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]]
-; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK-SVE-MAXBW: vector.ph:
-; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
-; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
-; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-SVE-MAXBW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
-; CHECK-SVE-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[OFFSET]], i64 0
-; CHECK-SVE-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK-SVE-MAXBW: vector.body:
-; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-SVE-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-SVE-MAXBW-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
-; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[NEXT_GEP]], align 1
-; CHECK-SVE-MAXBW-NEXT: [[TMP7:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
-; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = add <vscale x 8 x i32> [[VEC_PHI]], [[TMP7]]
-; CHECK-SVE-MAXBW-NEXT: [[TMP8]] = add <vscale x 8 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]]
-; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-SVE-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
-; CHECK-SVE-MAXBW: middle.block:
-; CHECK-SVE-MAXBW-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP8]])
-; CHECK-SVE-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
-; CHECK-SVE-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK-SVE-MAXBW: scalar.ph:
-;
-entry:
- br label %loop
-
-loop:
- %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
- %red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
- %l = load i8, ptr %ptr.iv, align 1
- %l.ext = zext i8 %l to i32
- %add = add i32 %red, %l.ext
- %red.next = add i32 %add, %offset
- %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
- %ec = icmp eq ptr %ptr.iv, %end
- br i1 %ec, label %exit, label %loop
-
-exit:
- ret i32 %red.next
-}
-
attributes #0 = { vscale_range(1,16) }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll
new file mode 100644
index 0000000..d80178fd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt --mattr=+neon,+dotprod -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S %s | FileCheck %s --check-prefixes=CHECK-NEON
+
+target triple = "arm64-apple-macosx"
+
+define i32 @red_extended_add_incomplete_chain(ptr %start, ptr %end, i32 %offset) {
+; CHECK-NEON-LABEL: define i32 @red_extended_add_incomplete_chain(
+; CHECK-NEON-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEON-NEXT: [[ENTRY:.*]]:
+; CHECK-NEON-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; CHECK-NEON-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEON-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1
+; CHECK-NEON-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; CHECK-NEON-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEON-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEON: [[VECTOR_PH]]:
+; CHECK-NEON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 16
+; CHECK-NEON-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
+; CHECK-NEON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
+; CHECK-NEON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[OFFSET]], i64 0
+; CHECK-NEON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEON-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-NEON: [[VECTOR_BODY]]:
+; CHECK-NEON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEON-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEON-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
+; CHECK-NEON-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
+; CHECK-NEON-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
+; CHECK-NEON-NEXT: [[TMP4:%.*]] = add <16 x i32> [[VEC_PHI]], [[TMP3]]
+; CHECK-NEON-NEXT: [[TMP5]] = add <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
+; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEON-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEON-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEON: [[MIDDLE_BLOCK]]:
+; CHECK-NEON-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]])
+; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
+; CHECK-NEON-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-NEON: [[SCALAR_PH]]:
+; CHECK-NEON-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
+; CHECK-NEON-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEON-NEXT: br label %[[LOOP:.*]]
+; CHECK-NEON: [[LOOP]]:
+; CHECK-NEON-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[GEP_IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEON-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEON-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 1
+; CHECK-NEON-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32
+; CHECK-NEON-NEXT: [[ADD:%.*]] = add i32 [[RED]], [[L_EXT]]
+; CHECK-NEON-NEXT: [[RED_NEXT]] = add i32 [[ADD]], [[OFFSET]]
+; CHECK-NEON-NEXT: [[GEP_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1
+; CHECK-NEON-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
+; CHECK-NEON-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEON: [[EXIT]]:
+; CHECK-NEON-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEON-NEXT: ret i32 [[RED_NEXT_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
+ %red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
+ %l = load i8, ptr %ptr.iv, align 1
+ %l.ext = zext i8 %l to i32
+ %add = add i32 %red, %l.ext
+ %red.next = add i32 %add, %offset
+ %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+ %ec = icmp eq ptr %ptr.iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %red.next
+}