aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang-tools-extra/clang-doc/JSONGenerator.cpp10
-rw-r--r--clang-tools-extra/test/clang-doc/json/multiple-namespaces.cpp20
-rw-r--r--clang-tools-extra/test/clang-doc/json/nested-namespace.cpp2
-rw-r--r--clang/include/clang/Basic/CodeGenOptions.def2
-rw-r--r--clang/include/clang/Driver/Options.td6
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp1
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/CommonArgs.cpp5
-rw-r--r--clang/lib/Format/FormatTokenLexer.cpp13
-rw-r--r--clang/test/Driver/call-graph-section.c5
-rw-r--r--clang/unittests/Analysis/CMakeLists.txt1
-rw-r--r--clang/unittests/Format/FormatTest.cpp5
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h1
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp15
-rw-r--r--flang/module/cudadevice.f905
-rw-r--r--flang/test/Lower/CUDA/cuda-device-proc.cuf8
-rw-r--r--libc/shared/math.h1
-rw-r--r--libc/shared/math/exp2f16.h29
-rw-r--r--libc/src/__support/math/CMakeLists.txt14
-rw-r--r--libc/src/__support/math/exp2f16.h111
-rw-r--r--libc/src/math/generic/CMakeLists.txt10
-rw-r--r--libc/src/math/generic/exp2f16.cpp86
-rw-r--r--libc/test/shared/CMakeLists.txt1
-rw-r--r--libc/test/shared/shared_math_test.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp12
-rw-r--r--llvm/test/tools/llvm-remarkutil/filter.test10
-rw-r--r--llvm/tools/llvm-remarkutil/RemarkFilter.cpp8
-rw-r--r--llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp14
-rw-r--r--llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h9
-rw-r--r--mlir/test/Examples/standalone/test.wheel.toy1
-rw-r--r--mlir/test/lit.cfg.py3
-rw-r--r--mlir/test/lit.site.cfg.py.in1
-rw-r--r--utils/bazel/llvm-project-overlay/libc/BUILD.bazel18
34 files changed, 335 insertions, 117 deletions
diff --git a/clang-tools-extra/clang-doc/JSONGenerator.cpp b/clang-tools-extra/clang-doc/JSONGenerator.cpp
index 6fba211..b17cc80 100644
--- a/clang-tools-extra/clang-doc/JSONGenerator.cpp
+++ b/clang-tools-extra/clang-doc/JSONGenerator.cpp
@@ -584,12 +584,20 @@ static SmallString<16> determineFileName(Info *I, SmallString<128> &Path) {
FileName = RecordSymbolInfo->MangledName;
} else if (I->USR == GlobalNamespaceID)
FileName = "index";
- else
+ else if (I->IT == InfoType::IT_namespace) {
+ for (const auto &NS : I->Namespace) {
+ FileName += NS.Name;
+ FileName += "_";
+ }
+ FileName += I->Name;
+ } else
FileName = I->Name;
sys::path::append(Path, FileName + ".json");
return FileName;
}
+// FIXME: Revert back to creating nested directories for namespaces instead of
+// putting everything in a flat directory structure.
Error JSONGenerator::generateDocs(
StringRef RootDir, llvm::StringMap<std::unique_ptr<doc::Info>> Infos,
const ClangDocContext &CDCtx) {
diff --git a/clang-tools-extra/test/clang-doc/json/multiple-namespaces.cpp b/clang-tools-extra/test/clang-doc/json/multiple-namespaces.cpp
new file mode 100644
index 0000000..04fcfc1
--- /dev/null
+++ b/clang-tools-extra/test/clang-doc/json/multiple-namespaces.cpp
@@ -0,0 +1,20 @@
+// RUN: rm -rf %t && mkdir -p %t
+// RUN: clang-doc --output=%t --format=json --executor=standalone %s
+// RUN: FileCheck %s < %t/json/foo_tools.json --check-prefix=CHECK-FOO
+// RUN: FileCheck %s < %t/json/bar_tools.json --check-prefix=CHECK-BAR
+
+namespace foo {
+ namespace tools {
+ class FooTools {};
+ } // namespace tools
+} // namespace foo
+
+namespace bar {
+ namespace tools {
+ class BarTools {};
+ } // namespace tools
+} // namespace bar
+
+// CHECK-FOO: "Name": "tools"
+
+// CHECK-BAR: "Name": "tools"
diff --git a/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp b/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp
index b19afc1..cf19e1e 100644
--- a/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp
+++ b/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp
@@ -1,7 +1,7 @@
// RUN: rm -rf %t && mkdir -p %t
// RUN: clang-doc --output=%t --format=json --executor=standalone %s
// RUN: FileCheck %s < %t/json/nested.json --check-prefix=NESTED
-// RUN: FileCheck %s < %t/json/inner.json --check-prefix=INNER
+// RUN: FileCheck %s < %t/json/nested_inner.json --check-prefix=INNER
namespace nested {
int Global;
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index d924cb4..90e1f8d 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -72,6 +72,8 @@ CODEGENOPT(EnableNoundefAttrs, 1, 0, Benign) ///< Enable emitting `noundef` attr
CODEGENOPT(DebugPassManager, 1, 0, Benign) ///< Prints debug information for the new
///< pass manager.
CODEGENOPT(DisableRedZone , 1, 0, Benign) ///< Set when -mno-red-zone is enabled.
+CODEGENOPT(CallGraphSection, 1, 0, Benign) ///< Emit a call graph section into the
+ ///< object file.
CODEGENOPT(EmitCallSiteInfo, 1, 0, Benign) ///< Emit call site info only in the case of
///< '-g' + 'O>0' level.
CODEGENOPT(IndirectTlsSegRefs, 1, 0, Benign) ///< Set when -mno-tls-direct-seg-refs
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index c2f2ac5..a55a523 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4534,6 +4534,12 @@ defm data_sections : BoolFOption<"data-sections",
PosFlag<SetTrue, [], [ClangOption, CC1Option],
"Place each data in its own section">,
NegFlag<SetFalse>>;
+defm experimental_call_graph_section
+ : BoolFOption<"experimental-call-graph-section",
+ CodeGenOpts<"CallGraphSection">, DefaultFalse,
+ PosFlag<SetTrue, [], [ClangOption, CC1Option],
+ "Emit a call graph section">,
+ NegFlag<SetFalse>>;
defm stack_size_section : BoolFOption<"stack-size-section",
CodeGenOpts<"StackSizeSection">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CC1Option],
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 2d95982..f8e8086 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -473,6 +473,7 @@ static bool initTargetOptions(const CompilerInstance &CI,
Options.StackUsageOutput = CodeGenOpts.StackUsageOutput;
Options.EmitAddrsig = CodeGenOpts.Addrsig;
Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection;
+ Options.EmitCallGraphSection = CodeGenOpts.CallGraphSection;
Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo;
Options.EnableAIXExtendedAltivecABI = LangOpts.EnableAIXExtendedAltivecABI;
Options.XRayFunctionIndex = CodeGenOpts.XRayFunctionIndex;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d326a81..bf75573 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6442,6 +6442,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(A->getValue());
}
+ if (Args.hasFlag(options::OPT_fexperimental_call_graph_section,
+ options::OPT_fno_experimental_call_graph_section, false))
+ CmdArgs.push_back("-fexperimental-call-graph-section");
+
Args.addOptInFlag(CmdArgs, options::OPT_fstack_size_section,
options::OPT_fno_stack_size_section);
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 16cc1db..99400ac 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1272,6 +1272,11 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
CmdArgs.push_back(
Args.MakeArgString(Twine(PluginOptPrefix) + "-stack-size-section"));
+ if (Args.hasFlag(options::OPT_fexperimental_call_graph_section,
+ options::OPT_fno_experimental_call_graph_section, false))
+ CmdArgs.push_back(
+ Args.MakeArgString(Twine(PluginOptPrefix) + "-call-graph-section"));
+
// Setup statistics file output.
SmallString<128> StatsFile = getStatsFileName(Args, Output, *Input, D);
if (!StatsFile.empty())
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 86a5185..ab32938 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -93,12 +93,6 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
auto &Tok = *Tokens.back();
const auto NewlinesBefore = Tok.NewlinesBefore;
switch (FormatOff) {
- case FO_CurrentLine:
- if (NewlinesBefore == 0)
- Tok.Finalized = true;
- else
- FormatOff = FO_None;
- break;
case FO_NextLine:
if (NewlinesBefore > 1) {
FormatOff = FO_None;
@@ -107,6 +101,13 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
FormatOff = FO_CurrentLine;
}
break;
+ case FO_CurrentLine:
+ if (NewlinesBefore == 0) {
+ Tok.Finalized = true;
+ break;
+ }
+ FormatOff = FO_None;
+ [[fallthrough]];
default:
if (!FormattingDisabled && FormatOffRegex.match(Tok.TokenText)) {
if (Tok.is(tok::comment) &&
diff --git a/clang/test/Driver/call-graph-section.c b/clang/test/Driver/call-graph-section.c
new file mode 100644
index 0000000..00fa896
--- /dev/null
+++ b/clang/test/Driver/call-graph-section.c
@@ -0,0 +1,5 @@
+// RUN: %clang -### -fexperimental-call-graph-section %s 2>&1 | FileCheck --check-prefix=CALL-GRAPH-SECTION %s
+// RUN: %clang -### -fexperimental-call-graph-section -fno-experimental-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
+
+// CALL-GRAPH-SECTION: "-fexperimental-call-graph-section"
+// NO-CALL-GRAPH-SECTION-NOT: "-fexperimental-call-graph-section"
diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt
index 52e7d28..e0acf43 100644
--- a/clang/unittests/Analysis/CMakeLists.txt
+++ b/clang/unittests/Analysis/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_unittest(ClangAnalysisTests
clangAST
clangASTMatchers
clangAnalysis
+ clangAnalysisLifetimeSafety
clangBasic
clangFrontend
clangLex
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index fef7036..450c34f 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -24843,6 +24843,11 @@ TEST_F(FormatTest, OneLineFormatOffRegex) {
" } while (0 )",
Style);
+ Style.OneLineFormatOffRegex = "MACRO_TEST";
+ verifyNoChange(" MACRO_TEST1 ( ) ;\n"
+ " MACRO_TEST2( );",
+ Style);
+
Style.ColumnLimit = 50;
Style.OneLineFormatOffRegex = "^LogErrorPrint$";
verifyFormat(" myproject::LogErrorPrint(logger, \"Don't split me!\");\n"
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 1f7da10..0e3c9aa2 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -274,6 +274,7 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
template <Extremum, ExtremumBehavior>
mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genFraction(mlir::Type resultType,
mlir::ArrayRef<mlir::Value> args);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index b3a2d49..7c5c5fb 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -502,6 +502,10 @@ static constexpr IntrinsicHandler handlers[]{
&I::genExtendsTypeOf,
{{{"a", asBox}, {"mold", asBox}}},
/*isElemental=*/false},
+ {"fence_proxy_async",
+ &I::genFenceProxyAsync,
+ {},
+ /*isElemental=*/false},
{"findloc",
&I::genFindloc,
{{{"array", asBox},
@@ -4367,6 +4371,17 @@ IntrinsicLibrary::genExtendsTypeOf(mlir::Type resultType,
fir::getBase(args[1])));
}
+// FENCE_PROXY_ASYNC (CUDA)
+void IntrinsicLibrary::genFenceProxyAsync(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 0);
+ auto kind = mlir::NVVM::ProxyKindAttr::get(
+ builder.getContext(), mlir::NVVM::ProxyKind::async_shared);
+ auto space = mlir::NVVM::SharedSpaceAttr::get(
+ builder.getContext(), mlir::NVVM::SharedSpace::shared_cta);
+ mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space);
+}
+
// FINDLOC
fir::ExtendedValue
IntrinsicLibrary::genFindloc(mlir::Type resultType,
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index afb39eb..106f3e2 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -2009,6 +2009,11 @@ implicit none
end interface
interface
+ attributes(device) subroutine fence_proxy_async()
+ end subroutine
+ end interface
+
+ interface
attributes(device) subroutine tma_bulk_commit_group()
end subroutine
end interface
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index eef4225..697b17b 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -421,6 +421,14 @@ end subroutine
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
! CHECK: nvvm.mbarrier.arrive.expect_tx %[[SHARED_PTR]], %{{.*}} : !llvm.ptr<3>, i32
+
+attributes(global) subroutine test_fence()
+ call fence_proxy_async()
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_fence()
+! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}
+
attributes(global) subroutine test_tma()
call tma_bulk_commit_group()
call tma_bulk_wait_group()
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 82b9250..e3f7965 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -49,6 +49,7 @@
#include "math/exp10m1f16.h"
#include "math/exp2.h"
#include "math/exp2f.h"
+#include "math/exp2f16.h"
#include "math/expf.h"
#include "math/expf16.h"
#include "math/frexpf.h"
diff --git a/libc/shared/math/exp2f16.h b/libc/shared/math/exp2f16.h
new file mode 100644
index 0000000..f799511
--- /dev/null
+++ b/libc/shared/math/exp2f16.h
@@ -0,0 +1,29 @@
+//===-- Shared exp2f16 function ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP2F16_H
+#define LLVM_LIBC_SHARED_MATH_EXP2F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/exp2f16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp2f16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP2F16_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 61253de..9685496 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -738,6 +738,20 @@ add_header_library(
)
add_header_library(
+ exp2f16
+ HDRS
+ exp2f16.h
+ DEPENDS
+ .expxf16_utils
+ libc.src.__support.FPUtil.cast
+ libc.src.__support.FPUtil.except_value_utils
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.macros.optimization
+)
+
+add_header_library(
exp10
HDRS
exp10.h
diff --git a/libc/src/__support/math/exp2f16.h b/libc/src/__support/math/exp2f16.h
new file mode 100644
index 0000000..599ba0f
--- /dev/null
+++ b/libc/src/__support/math/exp2f16.h
@@ -0,0 +1,111 @@
+//===-- Implementation header for exp2f16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2F16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "expxf16_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 exp2f16(float16 x) {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ constexpr fputil::ExceptValues<float16, 3> EXP2F16_EXCEPTS = {{
+ // (input, RZ output, RU offset, RD offset, RN offset)
+ // x = 0x1.714p-11, exp2f16(x) = 0x1p+0 (RZ)
+ {0x11c5U, 0x3c00U, 1U, 0U, 1U},
+ // x = -0x1.558p-4, exp2f16(x) = 0x1.e34p-1 (RZ)
+ {0xad56U, 0x3b8dU, 1U, 0U, 0U},
+ // x = -0x1.d5cp-4, exp2f16(x) = 0x1.d8cp-1 (RZ)
+ {0xaf57U, 0x3b63U, 1U, 0U, 0U},
+ }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ using namespace math::expxf16_internal;
+ using FPBits = fputil::FPBits<float16>;
+ FPBits x_bits(x);
+
+ uint16_t x_u = x_bits.uintval();
+ uint16_t x_abs = x_u & 0x7fffU;
+
+ // When |x| >= 16, or x is NaN.
+ if (LIBC_UNLIKELY(x_abs >= 0x4c00U)) {
+ // exp2(NaN) = NaN
+ if (x_bits.is_nan()) {
+ if (x_bits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ return x;
+ }
+
+ // When x >= 16.
+ if (x_bits.is_pos()) {
+ // exp2(+inf) = +inf
+ if (x_bits.is_inf())
+ return FPBits::inf().get_val();
+
+ switch (fputil::quick_get_round()) {
+ case FE_TONEAREST:
+ case FE_UPWARD:
+ fputil::set_errno_if_required(ERANGE);
+ fputil::raise_except_if_required(FE_OVERFLOW);
+ return FPBits::inf().get_val();
+ default:
+ return FPBits::max_normal().get_val();
+ }
+ }
+
+ // When x <= -25.
+ if (x_u >= 0xce40U) {
+ // exp2(-inf) = +0
+ if (x_bits.is_inf())
+ return FPBits::zero().get_val();
+
+ fputil::set_errno_if_required(ERANGE);
+ fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
+
+ if (fputil::fenv_is_round_up())
+ return FPBits::min_subnormal().get_val();
+ return FPBits::zero().get_val();
+ }
+ }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ if (auto r = EXP2F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+ return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ // exp2(x) = exp2(hi + mid) * exp2(lo)
+ auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
+ return fputil::cast<float16>(exp2_hi_mid * exp2_lo);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2F16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 55f4aaf..0754b5e 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1478,15 +1478,7 @@ add_entrypoint_object(
HDRS
../exp2f16.h
DEPENDS
- libc.hdr.errno_macros
- libc.hdr.fenv_macros
- libc.src.__support.FPUtil.cast
- libc.src.__support.FPUtil.except_value_utils
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.rounding_mode
- libc.src.__support.macros.optimization
- libc.src.__support.math.expxf16_utils
+ libc.src.__support.math.exp2f16
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/exp2f16.cpp b/libc/src/math/generic/exp2f16.cpp
index 5db0c3a..80799d4 100644
--- a/libc/src/math/generic/exp2f16.cpp
+++ b/libc/src/math/generic/exp2f16.cpp
@@ -7,92 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/exp2f16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h"
-#include "src/__support/math/expxf16_utils.h"
+#include "src/__support/math/exp2f16.h"
namespace LIBC_NAMESPACE_DECL {
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-static constexpr fputil::ExceptValues<float16, 3> EXP2F16_EXCEPTS = {{
- // (input, RZ output, RU offset, RD offset, RN offset)
- // x = 0x1.714p-11, exp2f16(x) = 0x1p+0 (RZ)
- {0x11c5U, 0x3c00U, 1U, 0U, 1U},
- // x = -0x1.558p-4, exp2f16(x) = 0x1.e34p-1 (RZ)
- {0xad56U, 0x3b8dU, 1U, 0U, 0U},
- // x = -0x1.d5cp-4, exp2f16(x) = 0x1.d8cp-1 (RZ)
- {0xaf57U, 0x3b63U, 1U, 0U, 0U},
-}};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-LLVM_LIBC_FUNCTION(float16, exp2f16, (float16 x)) {
- using namespace math::expxf16_internal;
- using FPBits = fputil::FPBits<float16>;
- FPBits x_bits(x);
-
- uint16_t x_u = x_bits.uintval();
- uint16_t x_abs = x_u & 0x7fffU;
-
- // When |x| >= 16, or x is NaN.
- if (LIBC_UNLIKELY(x_abs >= 0x4c00U)) {
- // exp2(NaN) = NaN
- if (x_bits.is_nan()) {
- if (x_bits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits::quiet_nan().get_val();
- }
-
- return x;
- }
-
- // When x >= 16.
- if (x_bits.is_pos()) {
- // exp2(+inf) = +inf
- if (x_bits.is_inf())
- return FPBits::inf().get_val();
-
- switch (fputil::quick_get_round()) {
- case FE_TONEAREST:
- case FE_UPWARD:
- fputil::set_errno_if_required(ERANGE);
- fputil::raise_except_if_required(FE_OVERFLOW);
- return FPBits::inf().get_val();
- default:
- return FPBits::max_normal().get_val();
- }
- }
-
- // When x <= -25.
- if (x_u >= 0xce40U) {
- // exp2(-inf) = +0
- if (x_bits.is_inf())
- return FPBits::zero().get_val();
-
- fputil::set_errno_if_required(ERANGE);
- fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
-
- if (fputil::fenv_is_round_up())
- return FPBits::min_subnormal().get_val();
- return FPBits::zero().get_val();
- }
- }
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- if (auto r = EXP2F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
- return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
- // exp2(x) = exp2(hi + mid) * exp2(lo)
- auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
- return fputil::cast<float16>(exp2_hi_mid * exp2_lo);
-}
+LLVM_LIBC_FUNCTION(float16, exp2f16, (float16 x)) { return math::exp2f16(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index f341d3f..8d81199 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -42,6 +42,7 @@ add_fp_unittest(
libc.src.__support.math.exp
libc.src.__support.math.exp2
libc.src.__support.math.exp2f
+ libc.src.__support.math.exp2f16
libc.src.__support.math.exp10
libc.src.__support.math.exp10f
libc.src.__support.math.exp10f16
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 477b7ec..84787d5 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -28,7 +28,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::cospif16(0.0f16));
EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16));
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp10m1f16(0.0f16));
-
+ EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp2f16(0.0f16));
EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16));
ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(8.0f16, 5));
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e62d57e..50136a8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9348,13 +9348,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- // Ensure that the start values for all header phi recipes are updated before
- // vectorizing the epilogue loop.
VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
- // When vectorizing the epilogue loop, the canonical induction start
- // value needs to be changed from zero to the value after the main
- // vector loop. Find the resume value created during execution of the main
- // VPlan. It must be the first phi in the loop preheader.
+ // When vectorizing the epilogue loop, the canonical induction needs to be
+ // adjusted by the value after the main vector loop. Find the resume value
+ // created during execution of the main VPlan. It must be the first phi in the
+ // loop preheader. Use the value to increment the canonical IV, and update all
+ // users in the loop region to use the adjusted value.
// FIXME: Improve modeling for canonical IV start values in the epilogue
// loop.
using namespace llvm::PatternMatch;
@@ -9389,10 +9388,16 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
+ VPBuilder Builder(Header, Header->getFirstNonPhi());
+ VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV});
+ IV->replaceAllUsesWith(Add);
+ Add->setOperand(0, IV);
DenseMap<Value *, Value *> ToFrozen;
SmallVector<Instruction *> InstsToMove;
+ // Ensure that the start values for all header phi recipes are updated before
+ // vectorizing the epilogue loop. Skip the canonical IV, which has been
+ // handled above.
for (VPRecipeBase &R : drop_begin(Header->phis())) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c8a2d84..7563cd7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1234,6 +1234,18 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (!Plan->isUnrolled())
return;
+ // Hoist an invariant increment Y of a phi X, by having X start at Y.
+ if (match(Def, m_c_Add(m_VPValue(X), m_VPValue(Y))) && Y->isLiveIn() &&
+ isa<VPPhi>(X)) {
+ auto *Phi = cast<VPPhi>(X);
+ if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) &&
+ Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) {
+ Phi->setOperand(0, Y);
+ Def->replaceAllUsesWith(Phi);
+ return;
+ }
+ }
+
// VPVectorPointer for part 0 can be replaced by their start pointer.
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
if (VecPtr->isFirstPart()) {
diff --git a/llvm/test/tools/llvm-remarkutil/filter.test b/llvm/test/tools/llvm-remarkutil/filter.test
index 8304b9f..9fd2e94 100644
--- a/llvm/test/tools/llvm-remarkutil/filter.test
+++ b/llvm/test/tools/llvm-remarkutil/filter.test
@@ -18,9 +18,19 @@ RUN: llvm-remarkutil filter --remark-type=analysis %p/Inputs/filter.yaml | FileC
RUN: llvm-remarkutil yaml2bitstream -o %t.opt.bitstream %p/Inputs/filter.yaml
RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream | FileCheck %s --strict-whitespace --check-prefix=REMARK1
+RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream -o %t.r1.yamL
+RUN: cat %t.r1.yamL | FileCheck %s --strict-whitespace --check-prefix=REMARK1
+RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream -o %t.r1.yMl
+RUN: cat %t.r1.yMl | FileCheck %s --strict-whitespace --check-prefix=REMARK1
+RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream --serializer=yaml -o %t.r1.fake.opt.bitstream
+RUN: cat %t.r1.fake.opt.bitstream | FileCheck %s --strict-whitespace --check-prefix=REMARK1
RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream -o %t.r1.opt.bitstream
RUN: llvm-remarkutil bitstream2yaml %t.r1.opt.bitstream | FileCheck %s --strict-whitespace --check-prefix=REMARK1
+RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream -o %t.r1
+RUN: llvm-remarkutil bitstream2yaml %t.r1 | FileCheck %s --strict-whitespace --check-prefix=REMARK1
+RUN: llvm-remarkutil filter --function=func1 %p/Inputs/filter.yaml --serializer=bitstream -o %t.r1.fake.yaml
+RUN: llvm-remarkutil bitstream2yaml %t.r1.fake.yaml | FileCheck %s --strict-whitespace --check-prefix=REMARK1
RUN: llvm-remarkutil filter --function=func %p/Inputs/filter.yaml | FileCheck %s --allow-empty --strict-whitespace --check-prefix=EMPTY
diff --git a/llvm/tools/llvm-remarkutil/RemarkFilter.cpp b/llvm/tools/llvm-remarkutil/RemarkFilter.cpp
index 507ae36..9b521b4 100644
--- a/llvm/tools/llvm-remarkutil/RemarkFilter.cpp
+++ b/llvm/tools/llvm-remarkutil/RemarkFilter.cpp
@@ -48,12 +48,8 @@ static Error tryFilter() {
return MaybeParser.takeError();
auto &Parser = **MaybeParser;
- Format SerializerFormat = OutputFormat;
- if (SerializerFormat == Format::Auto) {
- SerializerFormat = Parser.ParserFormat;
- if (OutputFileName.empty() || OutputFileName == "-")
- SerializerFormat = Format::YAML;
- }
+ Format SerializerFormat =
+ getSerializerFormat(OutputFileName, OutputFormat, Parser.ParserFormat);
auto MaybeOF = getOutputFileForRemarks(OutputFileName, SerializerFormat);
if (!MaybeOF)
diff --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp
index be52948..b6204d0 100644
--- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp
+++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp
@@ -54,6 +54,20 @@ getOutputFileForRemarks(StringRef OutputFileName, Format OutputFormat) {
: sys::fs::OF_None);
}
+Format getSerializerFormat(StringRef OutputFileName, Format SelectedFormat,
+ Format DefaultFormat) {
+ if (SelectedFormat != Format::Auto)
+ return SelectedFormat;
+ SelectedFormat = DefaultFormat;
+ if (OutputFileName.empty() || OutputFileName == "-" ||
+ OutputFileName.ends_with_insensitive(".yaml") ||
+ OutputFileName.ends_with_insensitive(".yml"))
+ SelectedFormat = Format::YAML;
+ if (OutputFileName.ends_with_insensitive(".bitstream"))
+ SelectedFormat = Format::Bitstream;
+ return SelectedFormat;
+}
+
Expected<FilterMatcher>
FilterMatcher::createRE(const llvm::cl::opt<std::string> &Arg) {
return createRE(Arg.ArgStr, Arg);
diff --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
index 0dd550765..73867fe 100644
--- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
+++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h
@@ -47,7 +47,8 @@
"serializer", cl::init(Format::Auto), \
cl::desc("Output remark format to serialize"), \
cl::values(clEnumValN(Format::Auto, "auto", \
- "Follow the parser format (default)"), \
+ "Automatic detection based on output file " \
+ "extension or parser format (default)"), \
clEnumValN(Format::YAML, "yaml", "YAML"), \
clEnumValN(Format::Bitstream, "bitstream", "Bitstream")), \
cl::sub(SUBOPT));
@@ -151,6 +152,12 @@ getOutputFileWithFlags(StringRef OutputFileName, sys::fs::OpenFlags Flags);
Expected<std::unique_ptr<ToolOutputFile>>
getOutputFileForRemarks(StringRef OutputFileName, Format OutputFormat);
+/// Choose the serializer format. If \p SelectedFormat is Format::Auto, try to
+/// detect the format based on the extension of \p OutputFileName or fall back
+/// to \p DefaultFormat.
+Format getSerializerFormat(StringRef OutputFileName, Format SelectedFormat,
+ Format DefaultFormat);
+
/// Filter object which can be either a string or a regex to match with the
/// remark properties.
class FilterMatcher {
diff --git a/mlir/test/Examples/standalone/test.wheel.toy b/mlir/test/Examples/standalone/test.wheel.toy
index 5ff9271..c8d188a 100644
--- a/mlir/test/Examples/standalone/test.wheel.toy
+++ b/mlir/test/Examples/standalone/test.wheel.toy
@@ -2,6 +2,7 @@
# than 255 chars when combined with the fact that pip wants to install into a tmp directory buried under
# C/Users/ContainerAdministrator/AppData/Local/Temp.
# UNSUPPORTED: target={{.*(windows).*}}
+# REQUIRES: expensive_checks
# REQUIRES: non-shared-libs-build
# REQUIRES: bindings-python
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index f99c24d..6ff12d6 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -348,6 +348,9 @@ if config.enable_assertions:
else:
config.available_features.add("noasserts")
+if config.expensive_checks:
+ config.available_features.add("expensive_checks")
+
def have_host_jit_feature_support(feature_name):
mlir_runner_exe = lit.util.which("mlir-runner", config.mlir_tools_dir)
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 1aaf798..91a71af 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -11,6 +11,7 @@ config.llvm_shlib_ext = "@SHLIBEXT@"
config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@"))
config.python_executable = "@Python3_EXECUTABLE@"
config.enable_assertions = @ENABLE_ASSERTIONS@
+config.expensive_checks = "@EXPENSIVE_CHECKS@"
config.native_target = "@LLVM_NATIVE_ARCH@"
config.host_os = "@HOST_OS@"
config.host_cc = "@HOST_CC@"
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 640fa03..936bc12 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2917,6 +2917,22 @@ libc_support_library(
)
libc_support_library(
+ name = "__support_math_exp2f16",
+ hdrs = ["src/__support/math/exp2f16.h"],
+ deps = [
+ ":__support_fputil_except_value_utils",
+ ":__support_fputil_fma",
+ ":__support_fputil_multiply_add",
+ ":__support_fputil_nearest_integer",
+ ":__support_fputil_polyeval",
+ ":__support_fputil_rounding_mode",
+ ":__support_macros_optimization",
+ ":__support_math_common_constants",
+ ":__support_math_expxf16_utils",
+ ],
+)
+
+libc_support_library(
name = "__support_math_exp10",
hdrs = ["src/__support/math/exp10.h"],
deps = [
@@ -3696,7 +3712,7 @@ libc_math_function(
libc_math_function(
name = "exp2f16",
additional_deps = [
- ":__support_math_expxf16_utils",
+ ":__support_math_exp2f16",
],
)