diff options
34 files changed, 335 insertions, 117 deletions
diff --git a/clang-tools-extra/clang-doc/JSONGenerator.cpp b/clang-tools-extra/clang-doc/JSONGenerator.cpp index 6fba211..b17cc80 100644 --- a/clang-tools-extra/clang-doc/JSONGenerator.cpp +++ b/clang-tools-extra/clang-doc/JSONGenerator.cpp @@ -584,12 +584,20 @@ static SmallString<16> determineFileName(Info *I, SmallString<128> &Path) { FileName = RecordSymbolInfo->MangledName; } else if (I->USR == GlobalNamespaceID) FileName = "index"; - else + else if (I->IT == InfoType::IT_namespace) { + for (const auto &NS : I->Namespace) { + FileName += NS.Name; + FileName += "_"; + } + FileName += I->Name; + } else FileName = I->Name; sys::path::append(Path, FileName + ".json"); return FileName; } +// FIXME: Revert back to creating nested directories for namespaces instead of +// putting everything in a flat directory structure. Error JSONGenerator::generateDocs( StringRef RootDir, llvm::StringMap<std::unique_ptr<doc::Info>> Infos, const ClangDocContext &CDCtx) { diff --git a/clang-tools-extra/test/clang-doc/json/multiple-namespaces.cpp b/clang-tools-extra/test/clang-doc/json/multiple-namespaces.cpp new file mode 100644 index 0000000..04fcfc1 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/json/multiple-namespaces.cpp @@ -0,0 +1,20 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --output=%t --format=json --executor=standalone %s +// RUN: FileCheck %s < %t/json/foo_tools.json --check-prefix=CHECK-FOO +// RUN: FileCheck %s < %t/json/bar_tools.json --check-prefix=CHECK-BAR + +namespace foo { + namespace tools { + class FooTools {}; + } // namespace tools +} // namespace foo + +namespace bar { + namespace tools { + class BarTools {}; + } // namespace tools +} // namespace bar + +// CHECK-FOO: "Name": "tools" + +// CHECK-BAR: "Name": "tools" diff --git a/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp b/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp index b19afc1..cf19e1e 100644 --- a/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp +++ b/clang-tools-extra/test/clang-doc/json/nested-namespace.cpp @@ -1,7 +1,7 @@ // RUN: rm -rf %t && mkdir -p %t // RUN: clang-doc --output=%t --format=json --executor=standalone %s // RUN: FileCheck %s < %t/json/nested.json --check-prefix=NESTED -// RUN: FileCheck %s < %t/json/inner.json --check-prefix=INNER +// RUN: FileCheck %s < %t/json/nested_inner.json --check-prefix=INNER namespace nested { int Global; diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index d924cb4..90e1f8d 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -72,6 +72,8 @@ CODEGENOPT(EnableNoundefAttrs, 1, 0, Benign) ///< Enable emitting `noundef` attr CODEGENOPT(DebugPassManager, 1, 0, Benign) ///< Prints debug information for the new ///< pass manager. CODEGENOPT(DisableRedZone , 1, 0, Benign) ///< Set when -mno-red-zone is enabled. +CODEGENOPT(CallGraphSection, 1, 0, Benign) ///< Emit a call graph section into the + ///< object file. CODEGENOPT(EmitCallSiteInfo, 1, 0, Benign) ///< Emit call site info only in the case of ///< '-g' + 'O>0' level. CODEGENOPT(IndirectTlsSegRefs, 1, 0, Benign) ///< Set when -mno-tls-direct-seg-refs diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c2f2ac5..a55a523 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4534,6 +4534,12 @@ defm data_sections : BoolFOption<"data-sections", PosFlag<SetTrue, [], [ClangOption, CC1Option], "Place each data in its own section">, NegFlag<SetFalse>>; +defm experimental_call_graph_section + : BoolFOption<"experimental-call-graph-section", + CodeGenOpts<"CallGraphSection">, DefaultFalse, + PosFlag<SetTrue, [], [ClangOption, CC1Option], + "Emit a call graph section">, + NegFlag<SetFalse>>; defm stack_size_section : BoolFOption<"stack-size-section", CodeGenOpts<"StackSizeSection">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option], diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 2d95982..f8e8086 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -473,6 +473,7 @@ static bool initTargetOptions(const CompilerInstance &CI, Options.StackUsageOutput = CodeGenOpts.StackUsageOutput; Options.EmitAddrsig = CodeGenOpts.Addrsig; Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; + Options.EmitCallGraphSection = CodeGenOpts.CallGraphSection; Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; Options.EnableAIXExtendedAltivecABI = LangOpts.EnableAIXExtendedAltivecABI; Options.XRayFunctionIndex = CodeGenOpts.XRayFunctionIndex; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index d326a81..bf75573 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6442,6 +6442,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(A->getValue()); } + if (Args.hasFlag(options::OPT_fexperimental_call_graph_section, + options::OPT_fno_experimental_call_graph_section, false)) + CmdArgs.push_back("-fexperimental-call-graph-section"); + Args.addOptInFlag(CmdArgs, options::OPT_fstack_size_section, options::OPT_fno_stack_size_section); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 16cc1db..99400ac 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1272,6 +1272,11 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, CmdArgs.push_back( Args.MakeArgString(Twine(PluginOptPrefix) + "-stack-size-section")); + if (Args.hasFlag(options::OPT_fexperimental_call_graph_section, + options::OPT_fno_experimental_call_graph_section, false)) + CmdArgs.push_back( + Args.MakeArgString(Twine(PluginOptPrefix) + "-call-graph-section")); + // Setup statistics file output. SmallString<128> StatsFile = getStatsFileName(Args, Output, *Input, D); if (!StatsFile.empty()) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 86a5185..ab32938 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -93,12 +93,6 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { auto &Tok = *Tokens.back(); const auto NewlinesBefore = Tok.NewlinesBefore; switch (FormatOff) { - case FO_CurrentLine: - if (NewlinesBefore == 0) - Tok.Finalized = true; - else - FormatOff = FO_None; - break; case FO_NextLine: if (NewlinesBefore > 1) { FormatOff = FO_None; @@ -107,6 +101,13 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { FormatOff = FO_CurrentLine; } break; + case FO_CurrentLine: + if (NewlinesBefore == 0) { + Tok.Finalized = true; + break; + } + FormatOff = FO_None; + [[fallthrough]]; default: if (!FormattingDisabled && FormatOffRegex.match(Tok.TokenText)) { if (Tok.is(tok::comment) && diff --git a/clang/test/Driver/call-graph-section.c b/clang/test/Driver/call-graph-section.c new file mode 100644 index 0000000..00fa896 --- /dev/null +++ b/clang/test/Driver/call-graph-section.c @@ -0,0 +1,5 @@ +// RUN: %clang -### -fexperimental-call-graph-section %s 2>&1 | FileCheck --check-prefix=CALL-GRAPH-SECTION %s +// RUN: %clang -### -fexperimental-call-graph-section -fno-experimental-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s + +// CALL-GRAPH-SECTION: "-fexperimental-call-graph-section" +// NO-CALL-GRAPH-SECTION-NOT: "-fexperimental-call-graph-section" diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt index 52e7d28..e0acf43 100644 --- a/clang/unittests/Analysis/CMakeLists.txt +++ b/clang/unittests/Analysis/CMakeLists.txt @@ -11,6 +11,7 @@ add_clang_unittest(ClangAnalysisTests clangAST clangASTMatchers clangAnalysis + clangAnalysisLifetimeSafety clangBasic clangFrontend clangLex diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index fef7036..450c34f 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -24843,6 +24843,11 @@ TEST_F(FormatTest, OneLineFormatOffRegex) { " } while (0 )", Style); + Style.OneLineFormatOffRegex = "MACRO_TEST"; + verifyNoChange(" MACRO_TEST1 ( ) ;\n" + " MACRO_TEST2( );", + Style); + Style.ColumnLimit = 50; Style.OneLineFormatOffRegex = "^LogErrorPrint$"; verifyFormat(" myproject::LogErrorPrint(logger, \"Don't split me!\");\n" diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 1f7da10..0e3c9aa2 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -274,6 +274,7 @@ struct IntrinsicLibrary { llvm::ArrayRef<fir::ExtendedValue>); template <Extremum, ExtremumBehavior> mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>); + void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>); mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>); mlir::Value genFraction(mlir::Type resultType, mlir::ArrayRef<mlir::Value> args); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index b3a2d49..7c5c5fb 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -502,6 +502,10 @@ static constexpr IntrinsicHandler handlers[]{ &I::genExtendsTypeOf, {{{"a", asBox}, {"mold", asBox}}}, /*isElemental=*/false}, + {"fence_proxy_async", + &I::genFenceProxyAsync, + {}, + /*isElemental=*/false}, {"findloc", &I::genFindloc, {{{"array", asBox}, @@ -4367,6 +4371,17 @@ IntrinsicLibrary::genExtendsTypeOf(mlir::Type resultType, fir::getBase(args[1]))); } +// FENCE_PROXY_ASYNC (CUDA) +void IntrinsicLibrary::genFenceProxyAsync( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 0); + auto kind = mlir::NVVM::ProxyKindAttr::get( + builder.getContext(), mlir::NVVM::ProxyKind::async_shared); + auto space = mlir::NVVM::SharedSpaceAttr::get( + builder.getContext(), mlir::NVVM::SharedSpace::shared_cta); + mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space); +} + // FINDLOC fir::ExtendedValue IntrinsicLibrary::genFindloc(mlir::Type resultType, diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index afb39eb..106f3e2 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -2009,6 +2009,11 @@ implicit none end interface interface + attributes(device) subroutine fence_proxy_async() + end subroutine + end interface + + interface attributes(device) subroutine tma_bulk_commit_group() end subroutine end interface diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index eef4225..697b17b 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -421,6 +421,14 @@ end subroutine ! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> ! CHECK: nvvm.mbarrier.arrive.expect_tx %[[SHARED_PTR]], %{{.*}} : !llvm.ptr<3>, i32 + +attributes(global) subroutine test_fence() + call fence_proxy_async() +end subroutine + +! CHECK-LABEL: func.func @_QPtest_fence() +! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>} + attributes(global) subroutine test_tma() call tma_bulk_commit_group() call tma_bulk_wait_group() diff --git a/libc/shared/math.h b/libc/shared/math.h index 82b9250..e3f7965 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -49,6 +49,7 @@ #include "math/exp10m1f16.h" #include "math/exp2.h" #include "math/exp2f.h" +#include "math/exp2f16.h" #include "math/expf.h" #include "math/expf16.h" #include "math/frexpf.h" diff --git a/libc/shared/math/exp2f16.h b/libc/shared/math/exp2f16.h new file mode 100644 index 0000000..f799511 --- /dev/null +++ b/libc/shared/math/exp2f16.h @@ -0,0 +1,29 @@ +//===-- Shared exp2f16 function ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_EXP2F16_H +#define LLVM_LIBC_SHARED_MATH_EXP2F16_H + +#include "include/llvm-libc-macros/float16-macros.h" +#include "shared/libc_common.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/math/exp2f16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::exp2f16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_EXP2F16_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 61253de..9685496 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -738,6 +738,20 @@ add_header_library( ) add_header_library( + exp2f16 + HDRS + exp2f16.h + DEPENDS + .expxf16_utils + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization +) + +add_header_library( exp10 HDRS exp10.h diff --git a/libc/src/__support/math/exp2f16.h b/libc/src/__support/math/exp2f16.h new file mode 100644 index 0000000..599ba0f --- /dev/null +++ b/libc/src/__support/math/exp2f16.h @@ -0,0 +1,111 @@ +//===-- Implementation header for exp2f16 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2F16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2F16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "expxf16_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float16 exp2f16(float16 x) { + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr fputil::ExceptValues<float16, 3> EXP2F16_EXCEPTS = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.714p-11, exp2f16(x) = 0x1p+0 (RZ) + {0x11c5U, 0x3c00U, 1U, 0U, 1U}, + // x = -0x1.558p-4, exp2f16(x) = 0x1.e34p-1 (RZ) + {0xad56U, 0x3b8dU, 1U, 0U, 0U}, + // x = -0x1.d5cp-4, exp2f16(x) = 0x1.d8cp-1 (RZ) + {0xaf57U, 0x3b63U, 1U, 0U, 0U}, + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using namespace math::expxf16_internal; + using FPBits = fputil::FPBits<float16>; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When |x| >= 16, or x is NaN. + if (LIBC_UNLIKELY(x_abs >= 0x4c00U)) { + // exp2(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // When x >= 16. + if (x_bits.is_pos()) { + // exp2(+inf) = +inf + if (x_bits.is_inf()) + return FPBits::inf().get_val(); + + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_UPWARD: + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW); + return FPBits::inf().get_val(); + default: + return FPBits::max_normal().get_val(); + } + } + + // When x <= -25. + if (x_u >= 0xce40U) { + // exp2(-inf) = +0 + if (x_bits.is_inf()) + return FPBits::zero().get_val(); + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); + + if (fputil::fenv_is_round_up()) + return FPBits::min_subnormal().get_val(); + return FPBits::zero().get_val(); + } + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXP2F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // exp2(x) = exp2(hi + mid) * exp2(lo) + auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x); + return fputil::cast<float16>(exp2_hi_mid * exp2_lo); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2F16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 55f4aaf..0754b5e 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1478,15 +1478,7 @@ add_entrypoint_object( HDRS ../exp2f16.h DEPENDS - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.math.expxf16_utils + libc.src.__support.math.exp2f16 ) add_entrypoint_object( diff --git a/libc/src/math/generic/exp2f16.cpp b/libc/src/math/generic/exp2f16.cpp index 5db0c3a..80799d4 100644 --- a/libc/src/math/generic/exp2f16.cpp +++ b/libc/src/math/generic/exp2f16.cpp @@ -7,92 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/exp2f16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/__support/math/expxf16_utils.h" +#include "src/__support/math/exp2f16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr fputil::ExceptValues<float16, 3> EXP2F16_EXCEPTS = {{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.714p-11, exp2f16(x) = 0x1p+0 (RZ) - {0x11c5U, 0x3c00U, 1U, 0U, 1U}, - // x = -0x1.558p-4, exp2f16(x) = 0x1.e34p-1 (RZ) - {0xad56U, 0x3b8dU, 1U, 0U, 0U}, - // x = -0x1.d5cp-4, exp2f16(x) = 0x1.d8cp-1 (RZ) - {0xaf57U, 0x3b63U, 1U, 0U, 0U}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float16, exp2f16, (float16 x)) { - using namespace math::expxf16_internal; - using FPBits = fputil::FPBits<float16>; - FPBits x_bits(x); - - uint16_t x_u = x_bits.uintval(); - uint16_t x_abs = x_u & 0x7fffU; - - // When |x| >= 16, or x is NaN. - if (LIBC_UNLIKELY(x_abs >= 0x4c00U)) { - // exp2(NaN) = NaN - if (x_bits.is_nan()) { - if (x_bits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // When x >= 16. - if (x_bits.is_pos()) { - // exp2(+inf) = +inf - if (x_bits.is_inf()) - return FPBits::inf().get_val(); - - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_UPWARD: - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW); - return FPBits::inf().get_val(); - default: - return FPBits::max_normal().get_val(); - } - } - - // When x <= -25. - if (x_u >= 0xce40U) { - // exp2(-inf) = +0 - if (x_bits.is_inf()) - return FPBits::zero().get_val(); - - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT); - - if (fputil::fenv_is_round_up()) - return FPBits::min_subnormal().get_val(); - return FPBits::zero().get_val(); - } - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXP2F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // exp2(x) = exp2(hi + mid) * exp2(lo) - auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x); - return fputil::cast<float16>(exp2_hi_mid * exp2_lo); -} +LLVM_LIBC_FUNCTION(float16, exp2f16, (float16 x)) { return math::exp2f16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index f341d3f..8d81199 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -42,6 +42,7 @@ add_fp_unittest( libc.src.__support.math.exp libc.src.__support.math.exp2 libc.src.__support.math.exp2f + libc.src.__support.math.exp2f16 libc.src.__support.math.exp10 libc.src.__support.math.exp10f libc.src.__support.math.exp10f16 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index 477b7ec..84787d5 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -28,7 +28,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) { EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::cospif16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16)); EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp10m1f16(0.0f16)); - + EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp2f16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16)); ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(8.0f16, 5)); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e62d57e..50136a8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9348,13 +9348,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); - // Ensure that the start values for all header phi recipes are updated before - // vectorizing the epilogue loop. VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); - // When vectorizing the epilogue loop, the canonical induction start - // value needs to be changed from zero to the value after the main - // vector loop. Find the resume value created during execution of the main - // VPlan. It must be the first phi in the loop preheader. + // When vectorizing the epilogue loop, the canonical induction needs to be + // adjusted by the value after the main vector loop. Find the resume value + // created during execution of the main VPlan. It must be the first phi in the + // loop preheader. Use the value to increment the canonical IV, and update all + // users in the loop region to use the adjusted value. // FIXME: Improve modeling for canonical IV start values in the epilogue // loop. using namespace llvm::PatternMatch; @@ -9389,10 +9388,16 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( }) && "the canonical IV should only be used by its increment or " "ScalarIVSteps when resetting the start value"); - IV->setOperand(0, VPV); + VPBuilder Builder(Header, Header->getFirstNonPhi()); + VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV}); + IV->replaceAllUsesWith(Add); + Add->setOperand(0, IV); DenseMap<Value *, Value *> ToFrozen; SmallVector<Instruction *> InstsToMove; + // Ensure that the start values for all header phi recipes are updated before + // vectorizing the epilogue loop. Skip the canonical IV, which has been + // handled above. for (VPRecipeBase &R : drop_begin(Header->phis())) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c8a2d84..7563cd7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1234,6 +1234,18 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (!Plan->isUnrolled()) return; + // Hoist an invariant increment Y of a phi X, by having X start at Y. + if (match(Def, m_c_Add(m_VPValue(X), m_VPValue(Y))) && Y->isLiveIn() && + isa<VPPhi>(X)) { + auto *Phi = cast<VPPhi>(X); + if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) && + Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) { + Phi->setOperand(0, Y); + Def->replaceAllUsesWith(Phi); + return; + } + } + // VPVectorPointer for part 0 can be replaced by their start pointer. if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) { if (VecPtr->isFirstPart()) { diff --git a/llvm/test/tools/llvm-remarkutil/filter.test b/llvm/test/tools/llvm-remarkutil/filter.test index 8304b9f..9fd2e94 100644 --- a/llvm/test/tools/llvm-remarkutil/filter.test +++ b/llvm/test/tools/llvm-remarkutil/filter.test @@ -18,9 +18,19 @@ RUN: llvm-remarkutil filter --remark-type=analysis %p/Inputs/filter.yaml | FileC RUN: llvm-remarkutil yaml2bitstream -o %t.opt.bitstream %p/Inputs/filter.yaml RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream | FileCheck %s --strict-whitespace --check-prefix=REMARK1 +RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream -o %t.r1.yamL +RUN: cat %t.r1.yamL | FileCheck %s --strict-whitespace --check-prefix=REMARK1 +RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream -o %t.r1.yMl +RUN: cat %t.r1.yMl | FileCheck %s --strict-whitespace --check-prefix=REMARK1 +RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream --serializer=yaml -o %t.r1.fake.opt.bitstream +RUN: cat %t.r1.fake.opt.bitstream | FileCheck %s --strict-whitespace --check-prefix=REMARK1 RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream -o %t.r1.opt.bitstream RUN: llvm-remarkutil bitstream2yaml %t.r1.opt.bitstream | FileCheck %s --strict-whitespace --check-prefix=REMARK1 +RUN: llvm-remarkutil filter --function=func1 %t.opt.bitstream -o %t.r1 +RUN: llvm-remarkutil bitstream2yaml %t.r1 | FileCheck %s --strict-whitespace --check-prefix=REMARK1 +RUN: llvm-remarkutil filter --function=func1 %p/Inputs/filter.yaml --serializer=bitstream -o %t.r1.fake.yaml +RUN: llvm-remarkutil bitstream2yaml %t.r1.fake.yaml | FileCheck %s --strict-whitespace --check-prefix=REMARK1 RUN: llvm-remarkutil filter --function=func %p/Inputs/filter.yaml | FileCheck %s --allow-empty --strict-whitespace --check-prefix=EMPTY diff --git a/llvm/tools/llvm-remarkutil/RemarkFilter.cpp b/llvm/tools/llvm-remarkutil/RemarkFilter.cpp index 507ae36..9b521b4 100644 --- a/llvm/tools/llvm-remarkutil/RemarkFilter.cpp +++ b/llvm/tools/llvm-remarkutil/RemarkFilter.cpp @@ -48,12 +48,8 @@ static Error tryFilter() { return MaybeParser.takeError(); auto &Parser = **MaybeParser; - Format SerializerFormat = OutputFormat; - if (SerializerFormat == Format::Auto) { - SerializerFormat = Parser.ParserFormat; - if (OutputFileName.empty() || OutputFileName == "-") - SerializerFormat = Format::YAML; - } + Format SerializerFormat = + getSerializerFormat(OutputFileName, OutputFormat, Parser.ParserFormat); auto MaybeOF = getOutputFileForRemarks(OutputFileName, SerializerFormat); if (!MaybeOF) diff --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp index be52948..b6204d0 100644 --- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp +++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.cpp @@ -54,6 +54,20 @@ getOutputFileForRemarks(StringRef OutputFileName, Format OutputFormat) { : sys::fs::OF_None); } +Format getSerializerFormat(StringRef OutputFileName, Format SelectedFormat, + Format DefaultFormat) { + if (SelectedFormat != Format::Auto) + return SelectedFormat; + SelectedFormat = DefaultFormat; + if (OutputFileName.empty() || OutputFileName == "-" || + OutputFileName.ends_with_insensitive(".yaml") || + OutputFileName.ends_with_insensitive(".yml")) + SelectedFormat = Format::YAML; + if (OutputFileName.ends_with_insensitive(".bitstream")) + SelectedFormat = Format::Bitstream; + return SelectedFormat; +} + Expected<FilterMatcher> FilterMatcher::createRE(const llvm::cl::opt<std::string> &Arg) { return createRE(Arg.ArgStr, Arg); diff --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h index 0dd550765..73867fe 100644 --- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h +++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h @@ -47,7 +47,8 @@ "serializer", cl::init(Format::Auto), \ cl::desc("Output remark format to serialize"), \ cl::values(clEnumValN(Format::Auto, "auto", \ - "Follow the parser format (default)"), \ + "Automatic detection based on output file " \ + "extension or parser format (default)"), \ clEnumValN(Format::YAML, "yaml", "YAML"), \ clEnumValN(Format::Bitstream, "bitstream", "Bitstream")), \ cl::sub(SUBOPT)); @@ -151,6 +152,12 @@ getOutputFileWithFlags(StringRef OutputFileName, sys::fs::OpenFlags Flags); Expected<std::unique_ptr<ToolOutputFile>> getOutputFileForRemarks(StringRef OutputFileName, Format OutputFormat); +/// Choose the serializer format. If \p SelectedFormat is Format::Auto, try to +/// detect the format based on the extension of \p OutputFileName or fall back +/// to \p DefaultFormat. +Format getSerializerFormat(StringRef OutputFileName, Format SelectedFormat, + Format DefaultFormat); + /// Filter object which can be either a string or a regex to match with the /// remark properties. class FilterMatcher { diff --git a/mlir/test/Examples/standalone/test.wheel.toy b/mlir/test/Examples/standalone/test.wheel.toy index 5ff9271..c8d188a 100644 --- a/mlir/test/Examples/standalone/test.wheel.toy +++ b/mlir/test/Examples/standalone/test.wheel.toy @@ -2,6 +2,7 @@ # than 255 chars when combined with the fact that pip wants to install into a tmp directory buried under # C/Users/ContainerAdministrator/AppData/Local/Temp. # UNSUPPORTED: target={{.*(windows).*}} +# REQUIRES: expensive_checks # REQUIRES: non-shared-libs-build # REQUIRES: bindings-python diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index f99c24d..6ff12d6 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -348,6 +348,9 @@ if config.enable_assertions: else: config.available_features.add("noasserts") +if config.expensive_checks: + config.available_features.add("expensive_checks") + def have_host_jit_feature_support(feature_name): mlir_runner_exe = lit.util.which("mlir-runner", config.mlir_tools_dir) diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in index 1aaf798..91a71af 100644 --- a/mlir/test/lit.site.cfg.py.in +++ b/mlir/test/lit.site.cfg.py.in @@ -11,6 +11,7 @@ config.llvm_shlib_ext = "@SHLIBEXT@" config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@")) config.python_executable = "@Python3_EXECUTABLE@" config.enable_assertions = @ENABLE_ASSERTIONS@ +config.expensive_checks = "@EXPENSIVE_CHECKS@" config.native_target = "@LLVM_NATIVE_ARCH@" config.host_os = "@HOST_OS@" config.host_cc = "@HOST_CC@" diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 640fa03..936bc12 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2917,6 +2917,22 @@ libc_support_library( ) libc_support_library( + name = "__support_math_exp2f16", + hdrs = ["src/__support/math/exp2f16.h"], + deps = [ + ":__support_fputil_except_value_utils", + ":__support_fputil_fma", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_fputil_rounding_mode", + ":__support_macros_optimization", + ":__support_math_common_constants", + ":__support_math_expxf16_utils", + ], +) + +libc_support_library( name = "__support_math_exp10", hdrs = ["src/__support/math/exp10.h"], deps = [ @@ -3696,7 +3712,7 @@ libc_math_function( libc_math_function( name = "exp2f16", additional_deps = [ - ":__support_math_expxf16_utils", + ":__support_math_exp2f16", ], ) |