diff options
author | Amir Ayupov <aaupov@fb.com> | 2024-09-26 12:25:43 -0700 |
---|---|---|
committer | Amir Ayupov <aaupov@fb.com> | 2024-09-26 12:25:43 -0700 |
commit | 648f2bbea6af7f8a6313ecf4a7dc15579bea53fb (patch) | |
tree | d4df07841adc5cbb0bc0c4d00c2a76e4a2a32ac0 | |
parent | 41e1fa020cfe334c337757390ac648cae047641e (diff) | |
parent | 70ef5eb6f087524dc952a8f5249b79f4a4000e04 (diff) | |
download | llvm-users/shawbyoung/spr/main.bolt-match-blocks-with-pseudo-probes.zip llvm-users/shawbyoung/spr/main.bolt-match-blocks-with-pseudo-probes.tar.gz llvm-users/shawbyoung/spr/main.bolt-match-blocks-with-pseudo-probes.tar.bz2 |
[𝘀𝗽𝗿] changes introduced through rebaseusers/shawbyoung/spr/main.bolt-match-blocks-with-pseudo-probes
Created using spr 1.3.4
[skip ci]
298 files changed, 4928 insertions, 1801 deletions
diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index 672dd75..925912d 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -442,11 +442,22 @@ jobs: name: ${{ needs.prepare.outputs.release-binary-filename }}-attestation path: ${{ needs.prepare.outputs.release-binary-filename }}.jsonl + - name: Checkout Release Scripts + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + sparse-checkout: | + llvm/utils/release/github-upload-release.py + llvm/utils/git/requirements.txt + sparse-checkout-cone-mode: false + + - name: Install Python Requirements + run: | + pip install --require-hashes -r ./llvm/utils/git/requirements.txt + - name: Upload Release shell: bash run: | - sudo apt install python3-github - ./llvm-project/llvm/utils/release/github-upload-release.py \ + ./llvm/utils/release/github-upload-release.py \ --token ${{ github.token }} \ --release ${{ needs.prepare.outputs.release-version }} \ upload \ diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index 91955af..9865118 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -270,12 +270,12 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> { }; namespace bolt { -struct PseudoProbeDesc { +struct ProfilePseudoProbeDesc { std::vector<Hex64> GUID; std::vector<Hex64> Hash; std::vector<uint32_t> GUIDHashIdx; // Index of hash for that GUID in Hash - bool operator==(const PseudoProbeDesc &Other) const { + bool operator==(const ProfilePseudoProbeDesc &Other) const { // Only treat empty Desc as equal return GUID.empty() && Other.GUID.empty() && Hash.empty() && Other.Hash.empty() && GUIDHashIdx.empty() && @@ -284,8 +284,8 @@ struct PseudoProbeDesc { }; } // end namespace bolt -template <> struct MappingTraits<bolt::PseudoProbeDesc> { - static void mapping(IO &YamlIO, bolt::PseudoProbeDesc &PD) { +template <> struct MappingTraits<bolt::ProfilePseudoProbeDesc> { + static void mapping(IO &YamlIO, bolt::ProfilePseudoProbeDesc &PD) { YamlIO.mapRequired("gs", PD.GUID); YamlIO.mapRequired("gh", PD.GUIDHashIdx); YamlIO.mapRequired("hs", PD.Hash); @@ -295,7 +295,7 @@ template <> struct MappingTraits<bolt::PseudoProbeDesc> { } // end namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeDesc) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::ProfilePseudoProbeDesc) namespace llvm { namespace yaml { @@ -304,7 +304,7 @@ namespace bolt { struct BinaryProfile { BinaryProfileHeader Header; std::vector<BinaryFunctionProfile> Functions; - PseudoProbeDesc PseudoProbeDesc; + ProfilePseudoProbeDesc PseudoProbeDesc; }; } // namespace bolt @@ -313,7 +313,7 @@ template <> struct MappingTraits<bolt::BinaryProfile> { YamlIO.mapRequired("header", BP.Header); YamlIO.mapRequired("functions", BP.Functions); YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc, - bolt::PseudoProbeDesc()); + bolt::ProfilePseudoProbeDesc()); } }; diff --git a/bolt/include/bolt/Profile/YAMLProfileReader.h b/bolt/include/bolt/Profile/YAMLProfileReader.h index bd5a86f..a6f0fd6 100644 --- a/bolt/include/bolt/Profile/YAMLProfileReader.h +++ b/bolt/include/bolt/Profile/YAMLProfileReader.h @@ -105,7 +105,7 @@ private: yaml::bolt::BinaryProfile YamlBP; /// Map a function ID from a YAML profile to a BinaryFunction object. - std::vector<BinaryFunction *> YamlProfileToFunction; + DenseMap<uint32_t, BinaryFunction *> YamlProfileToFunction; using FunctionSet = std::unordered_set<const BinaryFunction *>; /// To keep track of functions that have a matched profile before the profile @@ -162,8 +162,6 @@ private: /// Update matched YAML -> BinaryFunction pair. void matchProfileToFunction(yaml::bolt::BinaryFunctionProfile &YamlBF, BinaryFunction &BF) { - if (YamlBF.Id >= YamlProfileToFunction.size()) - YamlProfileToFunction.resize(YamlBF.Id + 1); YamlProfileToFunction[YamlBF.Id] = &BF; YamlBF.Used = true; diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h index aec6e47..d4d7217 100644 --- a/bolt/include/bolt/Profile/YAMLProfileWriter.h +++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h @@ -47,7 +47,7 @@ public: convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, const InlineTreeDesc &InlineTree, uint64_t GUID); - static std::tuple<yaml::bolt::PseudoProbeDesc, InlineTreeDesc> + static std::tuple<yaml::bolt::ProfilePseudoProbeDesc, InlineTreeDesc> convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder); static yaml::bolt::BinaryFunctionProfile diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 67ed320..3bd0950 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -238,9 +238,7 @@ bool YAMLProfileReader::parseFunctionProfile( BB.setExecutionCount(YamlBB.ExecCount); for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) { - BinaryFunction *Callee = YamlCSI.DestId < YamlProfileToFunction.size() - ? YamlProfileToFunction[YamlCSI.DestId] - : nullptr; + BinaryFunction *Callee = YamlProfileToFunction.lookup(YamlCSI.DestId); bool IsFunction = Callee ? true : false; MCSymbol *CalleeSymbol = nullptr; if (IsFunction) @@ -707,7 +705,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { break; } } - YamlProfileToFunction.resize(YamlBP.Functions.size() + 1); + YamlProfileToFunction.reserve(YamlBP.Functions.size()); // Computes hash for binary functions. if (opts::MatchProfileWithFunctionHash) { @@ -760,12 +758,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) { NormalizeByCalls = usesEvent("branches"); uint64_t NumUnused = 0; for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { - if (YamlBF.Id >= YamlProfileToFunction.size()) { - // Such profile was ignored. - ++NumUnused; - continue; - } - if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id]) + if (BinaryFunction *BF = YamlProfileToFunction.lookup(YamlBF.Id)) parseFunctionProfile(*BF, YamlBF); else ++NumUnused; diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 44600c3..4437be4 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -81,9 +81,10 @@ YAMLProfileWriter::collectInlineTree( return InlineTree; } -std::tuple<yaml::bolt::PseudoProbeDesc, YAMLProfileWriter::InlineTreeDesc> +std::tuple<yaml::bolt::ProfilePseudoProbeDesc, + YAMLProfileWriter::InlineTreeDesc> YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) { - yaml::bolt::PseudoProbeDesc Desc; + yaml::bolt::ProfilePseudoProbeDesc Desc; InlineTreeDesc InlineTree; for (const MCDecodedPseudoProbeInlineTree &TopLev : diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test index ec11725..e5e8aad 100644 --- a/bolt/test/X86/pseudoprobe-decoding-inline.test +++ b/bolt/test/X86/pseudoprobe-decoding-inline.test @@ -34,8 +34,8 @@ # ## Check that without --profile-write-pseudo-probes option, no pseudo probes are ## generated -# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata -# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT +# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata +# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT # CHECK-NO-OPT-NOT: probes: # CHECK-NO-OPT-NOT: inline_tree: # CHECK-NO-OPT-NOT: pseudo_probe_desc: diff --git a/bolt/test/X86/pseudoprobe-decoding-noinline.test b/bolt/test/X86/pseudoprobe-decoding-noinline.test index 5dd6c2e..36a2fab 100644 --- a/bolt/test/X86/pseudoprobe-decoding-noinline.test +++ b/bolt/test/X86/pseudoprobe-decoding-noinline.test @@ -1,6 +1,45 @@ # REQUIRES: system-linux -# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt 2>&1 | FileCheck %s +# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt --lite=0 --enable-bat 2>&1 | FileCheck %s +# PREAGG: B X:0 #foo# 1 0 +# PREAGG: B X:0 #bar# 1 0 +# PREAGG: B X:0 #main# 1 0 + +## Check pseudo-probes in regular YAML profile (non-BOLTed binary) +# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin %t.preagg PREAGG +# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes +# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML +## Check pseudo-probes in BAT YAML profile (BOLTed binary) +# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG +# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes +# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML +# CHECK-YAML: name: bar +# CHECK-YAML: - bid: 0 +# CHECK-YAML: probes: [ { blx: 9 } ] +# CHECK-YAML: inline_tree: [ { } ] +# +# CHECK-YAML: name: foo +# CHECK-YAML: - bid: 0 +# CHECK-YAML: probes: [ { blx: 3 } ] +# CHECK-YAML: inline_tree: [ { g: 2 } ] +# +# CHECK-YAML: name: main +# CHECK-YAML: - bid: 0 +# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ] +# CHECK-YAML: inline_tree: [ { g: 1 } ] +# +# CHECK-YAML: pseudo_probe_desc: +# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0xDB956436E78DD5FA, 0x5CF8C24CDB18BDAC ] +# CHECK-YAML-NEXT: gh: [ 2, 1, 0 ] +# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ] +# +## Check that without --profile-write-pseudo-probes option, no pseudo probes are +## generated +# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata +# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT +# CHECK-NO-OPT-NOT: probes: +# CHECK-NO-OPT-NOT: inline_tree: +# CHECK-NO-OPT-NOT: pseudo_probe_desc: ;; Report of decoding input pseudo probe binaries ; CHECK: GUID: 6699318081062747564 Name: foo diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst index 1884acd..95509ef 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst @@ -19,20 +19,28 @@ Example: .. code-block:: c++ struct Base { - virtual void ~Base(); + virtual ~Base(); + int i; }; struct Derived : public Base {}; - void foo() { - Base *b = new Derived[10]; - + void foo(Base* b) { b += 1; // warning: pointer arithmetic on class that declares a virtual function can // result in undefined behavior if the dynamic type differs from the // pointer type + } + + int bar(const Derived d[]) { + return d[1].i; // warning due to pointer arithmetic on polymorphic object + } - delete[] static_cast<Derived*>(b); + // Making Derived final suppresses the warning + struct FinalDerived final : public Base {}; + + int baz(const FinalDerived d[]) { + return d[1].i; // no warning as FinalDerived is final } Options @@ -47,17 +55,9 @@ Options .. code-block:: c++ - void bar() { - Base *b = new Base[10]; + void bar(Base b[], Derived d[]) { b += 1; // warning, as Base declares a virtual destructor - - delete[] b; - - Derived *d = new Derived[10]; // Derived overrides the destructor, and - // declares no other virtual functions d += 1; // warning only if IgnoreVirtualDeclarationsOnly is set to false - - delete[] d; } References diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 3dc04f6..6cf03d2 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4763,6 +4763,7 @@ def HLSLSaturate : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } + def HLSLSelect : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_select"]; let Attributes = [NoThrow, Const]; @@ -4775,6 +4776,12 @@ def HLSLSign : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLStep: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_step"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 8c69199..f780322 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5609,10 +5609,6 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">, MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>; def pipe : Flag<["-", "--"], "pipe">, HelpText<"Use pipes between commands, when possible">; -// Facebook T92898286 -def post_link_optimize : Flag<["--"], "post-link-optimize">, - HelpText<"Apply post-link optimizations using BOLT">; -// End Facebook T92898286 def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">; def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 6f9d050..223ac66 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -146,6 +146,8 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__riscv_cmodel_medlow"); else if (CodeModel == "medium") Builder.defineMacro("__riscv_cmodel_medany"); + else if (CodeModel == "large") + Builder.defineMacro("__riscv_cmodel_large"); StringRef ABIName = getABI(); if (ABIName == "ilp32f" || ABIName == "lp64f") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9950c06..27abeba 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18861,6 +18861,16 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { return SelectVal; } + case Builtin::BI__builtin_hlsl_step: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasFloatingRepresentation() && + "step operands must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(), + ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step"); + } case Builtin::BI__builtin_hlsl_wave_get_lane_index: { return EmitRuntimeCall(CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index", diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 6e22680..a8aabca 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -81,6 +81,7 @@ public: GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt) GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate) GENERATE_HLSL_INTRINSIC_FUNCTION(Sign, sign) + GENERATE_HLSL_INTRINSIC_FUNCTION(Step, step) GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id) GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot) GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 608fdf2..b7ae0de 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -672,41 +672,12 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } - // Facebook T92898286 - if (Args.hasArg(options::OPT_post_link_optimize)) - CmdArgs.push_back("-q"); - // End Facebook T92898286 - Args.AddAllArgs(CmdArgs, options::OPT_T); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); - // Facebook T92898286 - if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename()) - return; - - const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv")); - ArgStringList MoveCmdArgs; - MoveCmdArgs.push_back(Output.getFilename()); - const char *PreBoltBin = - Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt"); - MoveCmdArgs.push_back(PreBoltBin); - C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), - MvExec, MoveCmdArgs, std::nullopt)); - - ArgStringList BoltCmdArgs; - const char *BoltExec = - Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt")); - BoltCmdArgs.push_back(PreBoltBin); - BoltCmdArgs.push_back("-reorder-blocks=reverse"); - BoltCmdArgs.push_back("-update-debug-sections"); - BoltCmdArgs.push_back("-o"); - BoltCmdArgs.push_back(Output.getFilename()); - C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), - BoltExec, BoltCmdArgs, std::nullopt)); - // End Facebook T92898286 } void tools::gnutools::Assembler::ConstructJob(Compilation &C, diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 7a1edd9..d08dcd3 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -1718,6 +1718,39 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt) float4 sqrt(float4); //===----------------------------------------------------------------------===// +// step builtins +//===----------------------------------------------------------------------===// + +/// \fn T step(T x, T y) +/// \brief Returns 1 if the x parameter is greater than or equal to the y +/// parameter; otherwise, 0. vector. \param x [in] The first floating-point +/// value to compare. \param y [in] The first floating-point value to compare. +/// +/// Step is based on the following formula: (x >= y) ? 1 : 0 + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +half step(half, half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +half2 step(half2, half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +half3 step(half3, half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +half4 step(half4, half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +float step(float, float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +float2 step(float2, float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +float3 step(float3, float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_step) +float4 step(float4, float4); + +//===----------------------------------------------------------------------===// // tan builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 4e44813..527718c 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1747,6 +1747,18 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().IntTy); break; } + case Builtin::BI__builtin_hlsl_step: { + if (SemaRef.checkArgCount(TheCall, 2)) + return true; + if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall)) + return true; + + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + // return type is the same as the input type + TheCall->setType(ArgTyA); + break; + } // Note these are llvm builtins that we want to catch invalid intrinsic // generation. Normal handling of these builitns will occur elsewhere. case Builtin::BI__builtin_elementwise_bitreverse: { diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index d21b8cb..4d11f2a 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -9548,7 +9548,7 @@ static void DiagnoseNarrowingInInitList(Sema &S, unsigned ConstRefDiagID, unsigned WarnDiagID) { unsigned DiagID; auto &L = S.getLangOpts(); - if (L.CPlusPlus11 && + if (L.CPlusPlus11 && !L.HLSL && (!L.MicrosoftExt || L.isCompatibleWithMSVC(LangOptions::MSVC2015))) DiagID = IsConstRef ? ConstRefDiagID : DefaultDiagID; else diff --git a/clang/test/AST/HLSL/vector-constructors.hlsl b/clang/test/AST/HLSL/vector-constructors.hlsl index 905f11d..9161ad1 100644 --- a/clang/test/AST/HLSL/vector-constructors.hlsl +++ b/clang/test/AST/HLSL/vector-constructors.hlsl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -ast-dump -o - %s | FileCheck %s typedef float float2 __attribute__((ext_vector_type(2))); typedef float float3 __attribute__((ext_vector_type(3))); diff --git a/clang/test/ClangScanDeps/implicit-target.c b/clang/test/ClangScanDeps/implicit-target.c new file mode 100644 index 0000000..cf757f9 --- /dev/null +++ b/clang/test/ClangScanDeps/implicit-target.c @@ -0,0 +1,31 @@ +// Check that we can detect an implicit target when clang is invoked as +// <triple->clang. Using an implicit triple requires that the target actually +// is available, too. +// REQUIRES: x86-registered-target + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json + +// Check that we can deduce this both when using a compilation database, and when using +// a literal command line. + +// RUN: clang-scan-deps -format experimental-full -compilation-database %t/cdb.json | FileCheck %s + +// RUN: clang-scan-deps -format experimental-full -- x86_64-w64-mingw32-clang %t/source.c -o %t/source.o | FileCheck %s + +// CHECK: "-triple", +// CHECK-NEXT: "x86_64-w64-windows-gnu", + + +//--- cdb.json.in +[ + { + "directory": "DIR" + "command": "x86_64-w64-mingw32-clang -c DIR/source.c -o DIR/source.o" + "file": "DIR/source.c" + }, +] + +//--- source.c +void func(void) {} diff --git a/clang/test/CodeGenHLSL/builtins/step.hlsl b/clang/test/CodeGenHLSL/builtins/step.hlsl new file mode 100644 index 0000000..442f493 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/step.hlsl @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: --check-prefixes=CHECK,NATIVE_HALF \
+// RUN: -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN: -DFNATTRS=noundef -DTARGET=dx
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: --check-prefixes=CHECK,NATIVE_HALF \
+// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF \
+// RUN: -DFNATTRS="spir_func noundef" -DTARGET=spv
+
+// NATIVE_HALF: define [[FNATTRS]] half @
+// NATIVE_HALF: call half @llvm.[[TARGET]].step.f16(half
+// NO_HALF: call float @llvm.[[TARGET]].step.f32(float
+// NATIVE_HALF: ret half
+// NO_HALF: ret float
+half test_step_half(half p0, half p1)
+{
+ return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].step.v2f16(<2 x half>
+// NO_HALF: call <2 x float> @llvm.[[TARGET]].step.v2f32(<2 x float>
+// NATIVE_HALF: ret <2 x half> %hlsl.step
+// NO_HALF: ret <2 x float> %hlsl.step
+half2 test_step_half2(half2 p0, half2 p1)
+{
+ return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].step.v3f16(<3 x half>
+// NO_HALF: call <3 x float> @llvm.[[TARGET]].step.v3f32(<3 x float>
+// NATIVE_HALF: ret <3 x half> %hlsl.step
+// NO_HALF: ret <3 x float> %hlsl.step
+half3 test_step_half3(half3 p0, half3 p1)
+{
+ return step(p0, p1);
+}
+// NATIVE_HALF: define [[FNATTRS]] <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].step.v4f16(<4 x half>
+// NO_HALF: call <4 x float> @llvm.[[TARGET]].step.v4f32(<4 x float>
+// NATIVE_HALF: ret <4 x half> %hlsl.step
+// NO_HALF: ret <4 x float> %hlsl.step
+half4 test_step_half4(half4 p0, half4 p1)
+{
+ return step(p0, p1);
+}
+
+// CHECK: define [[FNATTRS]] float @
+// CHECK: call float @llvm.[[TARGET]].step.f32(float
+// CHECK: ret float
+float test_step_float(float p0, float p1)
+{
+ return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <2 x float> @
+// CHECK: %hlsl.step = call <2 x float> @llvm.[[TARGET]].step.v2f32(
+// CHECK: ret <2 x float> %hlsl.step
+float2 test_step_float2(float2 p0, float2 p1)
+{
+ return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <3 x float> @
+// CHECK: %hlsl.step = call <3 x float> @llvm.[[TARGET]].step.v3f32(
+// CHECK: ret <3 x float> %hlsl.step
+float3 test_step_float3(float3 p0, float3 p1)
+{
+ return step(p0, p1);
+}
+// CHECK: define [[FNATTRS]] <4 x float> @
+// CHECK: %hlsl.step = call <4 x float> @llvm.[[TARGET]].step.v4f32(
+// CHECK: ret <4 x float> %hlsl.step
+float4 test_step_float4(float4 p0, float4 p1)
+{
+ return step(p0, p1);
+}
diff --git a/clang/test/Driver/cl-link.c b/clang/test/Driver/cl-link.c index f526044..9bf8a81 100644 --- a/clang/test/Driver/cl-link.c +++ b/clang/test/Driver/cl-link.c @@ -13,17 +13,17 @@ // ASAN: link.exe // ASAN: "-debug" // ASAN: "-incremental:no" -// ASAN: "{{[^"]*}}clang_rt.asan_dynamic.lib" -// ASAN: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk.lib" +// ASAN: "{{[^"]*}}clang_rt.asan_dynamic{{(-i386)?}}.lib" +// ASAN: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk{{(-i386)?}}.lib" // ASAN: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s // ASAN-MD: link.exe // ASAN-MD: "-debug" // ASAN-MD: "-incremental:no" -// ASAN-MD: "{{.*}}clang_rt.asan_dynamic.lib" +// ASAN-MD: "{{.*}}clang_rt.asan_dynamic{{(-i386)?}}.lib" // ASAN-MD: "-include:___asan_seh_interceptor" -// ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" +// ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk{{(-i386)?}}.lib" // ASAN-MD: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl /LD -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s @@ -37,8 +37,8 @@ // ASAN-DLL: "-dll" // ASAN-DLL: "-debug" // ASAN-DLL: "-incremental:no" -// ASAN-DLL: "{{.*}}clang_rt.asan_dynamic.lib" -// ASAN-DLL: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk.lib" +// ASAN-DLL: "{{.*}}clang_rt.asan_dynamic{{(-i386)?}}.lib" +// ASAN-DLL: "-wholearchive:{{.*}}clang_rt.asan_static_runtime_thunk{{(-i386)?}}.lib" // ASAN-DLL: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s diff --git a/clang/test/Driver/windows-cross.c b/clang/test/Driver/windows-cross.c index f6e831f..096358d 100644 --- a/clang/test/Driver/windows-cross.c +++ b/clang/test/Driver/windows-cross.c @@ -64,7 +64,7 @@ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS-EXE-X86 // CHECK-SANITIZE-ADDRESS-EXE-X86: "-fsanitize=address" -// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" "--undefined" "___asan_seh_interceptor" +// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic{{(-i386)?}}.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk{{(-i386)?}}.lib" "--undefined" "___asan_seh_interceptor" // RUN: not %clang -### --target=armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -fsanitize=tsan -x c++ %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-TSAN diff --git a/clang/test/Preprocessor/riscv-cmodel.c b/clang/test/Preprocessor/riscv-cmodel.c index 45b9a93..0a531c7 100644 --- a/clang/test/Preprocessor/riscv-cmodel.c +++ b/clang/test/Preprocessor/riscv-cmodel.c @@ -15,6 +15,7 @@ // CHECK-MEDLOW: #define __riscv_cmodel_medlow 1 // CHECK-MEDLOW-NOT: __riscv_cmodel_medany +// CHECK-MEDLOW-NOT: __riscv_cmodel_large // RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i -x c -E -dM %s \ // RUN: -mcmodel=medium -o - | FileCheck --check-prefix=CHECK-MEDANY %s @@ -28,3 +29,11 @@ // CHECK-MEDANY: #define __riscv_cmodel_medany 1 // CHECK-MEDANY-NOT: __riscv_cmodel_medlow +// CHECK-MEDANY-NOT: __riscv_cmodel_large + +// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -fno-pic -x c -E -dM %s \ +// RUN: -mcmodel=large -o - | FileCheck --check-prefix=CHECK-LARGE %s + +// CHECK-LARGE: #define __riscv_cmodel_large 1 +// CHECK-LARGE-NOT: __riscv_cmodel_medlow +// CHECK-LARGE-NOT: __riscv_cmodel_medany diff --git a/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl new file mode 100644 index 0000000..8235852 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/step-errors.hlsl @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
+
+void test_too_few_arg()
+{
+ return __builtin_hlsl_step();
+ // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+}
+
+void test_too_many_arg(float2 p0)
+{
+ return __builtin_hlsl_step(p0, p0, p0);
+ // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+}
+
+bool builtin_bool_to_float_type_promotion(bool p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+}
+
+bool builtin_step_int_to_float_promotion(int p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+}
+
+bool2 builtin_step_int2_to_float2_promotion(int2 p1)
+{
+ return __builtin_hlsl_step(p1, p1);
+ // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+}
diff --git a/clang/tools/clang-scan-deps/CMakeLists.txt b/clang/tools/clang-scan-deps/CMakeLists.txt index f0be6a5..10bc0ff 100644 --- a/clang/tools/clang-scan-deps/CMakeLists.txt +++ b/clang/tools/clang-scan-deps/CMakeLists.txt @@ -1,4 +1,5 @@ set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} Core Option Support diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 259058c..1db7245 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -15,6 +15,7 @@ #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" #include "clang/Tooling/JSONCompilationDatabase.h" +#include "clang/Tooling/Tooling.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CommandLine.h" @@ -24,6 +25,7 @@ #include "llvm/Support/LLVMDriver.h" #include "llvm/Support/Program.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" #include "llvm/Support/Timer.h" @@ -795,6 +797,7 @@ getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) { } int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { + llvm::InitializeAllTargetInfos(); std::string ErrorMessage; std::unique_ptr<tooling::CompilationDatabase> Compilations = getCompilationDatabase(argc, argv, ErrorMessage); @@ -810,6 +813,8 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { Compilations = expandResponseFiles(std::move(Compilations), llvm::vfs::getRealFileSystem()); + Compilations = inferTargetAndDriverMode(std::move(Compilations)); + // The command options are rewritten to run Clang in preprocessor only mode. auto AdjustingCompilations = std::make_unique<tooling::ArgumentsAdjustingCompilations>( diff --git a/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp b/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp index 70005da..2d67b6b 100644 --- a/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp +++ b/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp @@ -89,13 +89,13 @@ struct CasedTypeInfo { class ASTPropsEmitter { raw_ostream &Out; - RecordKeeper &Records; + const RecordKeeper &Records; std::map<HasProperties, NodeInfo> NodeInfos; std::vector<PropertyType> AllPropertyTypes; std::map<PropertyType, CasedTypeInfo> CasedTypeInfos; public: - ASTPropsEmitter(RecordKeeper &records, raw_ostream &out) + ASTPropsEmitter(const RecordKeeper &records, raw_ostream &out) : Out(out), Records(records) { // Find all the properties. @@ -587,28 +587,28 @@ void ASTPropsEmitter::emitWriteOfProperty(StringRef writerName, /// Emit an .inc file that defines the AbstractFooReader class /// for the given AST class hierarchy. template <class NodeClass> -static void emitASTReader(RecordKeeper &records, raw_ostream &out, +static void emitASTReader(const RecordKeeper &records, raw_ostream &out, StringRef description) { emitSourceFileHeader(description, out, records); ASTPropsEmitter(records, out).emitNodeReaderClass<NodeClass>(); } -void clang::EmitClangTypeReader(RecordKeeper &records, raw_ostream &out) { +void clang::EmitClangTypeReader(const RecordKeeper &records, raw_ostream &out) { emitASTReader<TypeNode>(records, out, "A CRTP reader for Clang Type nodes"); } /// Emit an .inc file that defines the AbstractFooWriter class /// for the given AST class hierarchy. template <class NodeClass> -static void emitASTWriter(RecordKeeper &records, raw_ostream &out, +static void emitASTWriter(const RecordKeeper &records, raw_ostream &out, StringRef description) { emitSourceFileHeader(description, out, records); ASTPropsEmitter(records, out).emitNodeWriterClass<NodeClass>(); } -void clang::EmitClangTypeWriter(RecordKeeper &records, raw_ostream &out) { +void clang::EmitClangTypeWriter(const RecordKeeper &records, raw_ostream &out) { emitASTWriter<TypeNode>(records, out, "A CRTP writer for Clang Type nodes"); } @@ -847,7 +847,8 @@ void ASTPropsEmitter::emitBasicReaderWriterFile(const ReaderWriterInfo &info) { /// Emit an .inc file that defines some helper classes for reading /// basic values. -void clang::EmitClangBasicReader(RecordKeeper &records, raw_ostream &out) { +void clang::EmitClangBasicReader(const RecordKeeper &records, + raw_ostream &out) { emitSourceFileHeader("Helper classes for BasicReaders", out, records); // Use any property, we won't be using those properties. @@ -857,7 +858,8 @@ void clang::EmitClangBasicReader(RecordKeeper &records, raw_ostream &out) { /// Emit an .inc file that defines some helper classes for writing /// basic values. -void clang::EmitClangBasicWriter(RecordKeeper &records, raw_ostream &out) { +void clang::EmitClangBasicWriter(const RecordKeeper &records, + raw_ostream &out) { emitSourceFileHeader("Helper classes for BasicWriters", out, records); // Use any property, we won't be using those properties. diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index c0582e3..01d16d2 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -35,11 +35,15 @@ void EmitClangDeclContext(const llvm::RecordKeeper &RK, llvm::raw_ostream &OS); void EmitClangASTNodes(const llvm::RecordKeeper &RK, llvm::raw_ostream &OS, const std::string &N, const std::string &S, std::string_view PriorizeIfSubclassOf = ""); -void EmitClangBasicReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangBasicWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangBasicReader(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitClangBasicWriter(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangTypeNodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangTypeReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangTypeWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangTypeReader(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitClangTypeWriter(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangAttrParserStringSwitches(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrSubjectMatchRulesParserStringSwitches( diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake index 5a97992..286a622 100644 --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -81,6 +81,8 @@ if("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang[+]*$") set(COMPILER_RT_TEST_COMPILER_ID Clang) elseif("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang.*.exe$") set(COMPILER_RT_TEST_COMPILER_ID Clang) +elseif("${COMPILER_RT_TEST_COMPILER}" MATCHES "cl.exe$") + set(COMPILER_RT_TEST_COMPILER_ID MSVC) else() set(COMPILER_RT_TEST_COMPILER_ID GNU) endif() diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc index 91be9e9..66744aa 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface.inc @@ -22,6 +22,7 @@ INTERFACE_FUNCTION(__sanitizer_verify_double_ended_contiguous_container) INTERFACE_WEAK_FUNCTION(__sanitizer_on_print) INTERFACE_WEAK_FUNCTION(__sanitizer_report_error_summary) INTERFACE_WEAK_FUNCTION(__sanitizer_sandbox_on_notify) +INTERFACE_WEAK_FUNCTION(__sanitizer_get_dtls_size) // Sanitizer weak hooks INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_memcmp) INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_strcmp) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h index cd0d45e..c424ab1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h @@ -49,6 +49,11 @@ __sanitizer_sandbox_on_notify(__sanitizer_sandbox_arguments *args); SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void __sanitizer_report_error_summary(const char *error_summary); +// Returns size of dynamically allocated block. This function can be overridden +// by the client. +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE __sanitizer::uptr +__sanitizer_get_dtls_size(const void *tls_begin); + SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump(); SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage( const __sanitizer::uptr *pcs, const __sanitizer::uptr len); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp index e5839f6..5e9a787 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp @@ -110,15 +110,16 @@ SANITIZER_WEAK_ATTRIBUTE const void *__sanitizer_get_allocated_begin(const void *p); } -static bool GetDTLSRange(uptr &tls_beg, uptr &tls_size) { - const void *start = __sanitizer_get_allocated_begin((void *)tls_beg); +SANITIZER_INTERFACE_WEAK_DEF(uptr, __sanitizer_get_dtls_size, + const void *tls_begin) { + const void *start = __sanitizer_get_allocated_begin(tls_begin); if (!start) - return false; - tls_beg = (uptr)start; - tls_size = __sanitizer_get_allocated_size(start); + return 0; + CHECK_EQ(start, tls_begin); + uptr tls_size = __sanitizer_get_allocated_size(start); VReport(2, "__tls_get_addr: glibc DTLS suspected; tls={%p,0x%zx}\n", - (void *)tls_beg, tls_size); - return true; + tls_begin, tls_size); + return tls_size; } DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res, @@ -142,10 +143,12 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res, // creation. VReport(2, "__tls_get_addr: static tls: %p\n", (void *)tls_beg); tls_size = 0; - } else if (!GetDTLSRange(tls_beg, tls_size)) { - VReport(2, "__tls_get_addr: Can't guess glibc version\n"); - // This may happen inside the DTOR of main thread, so just ignore it. - tls_size = 0; + } else { + tls_size = __sanitizer_get_dtls_size(reinterpret_cast<void *>(tls_beg)); + if (!tls_size) { + VReport(2, "__tls_get_addr: Can't guess glibc version\n"); + // This may happen inside the DTOR of main thread, so just ignore it. + } } dtv->beg = tls_beg; dtv->size = tls_size; @@ -160,6 +163,9 @@ bool DTLSInDestruction(DTLS *dtls) { } #else +SANITIZER_INTERFACE_WEAK_DEF(uptr, __sanitizer_get_dtls_size, const void *) { + return 0; +} DTLS::DTV *DTLS_on_tls_get_addr(void *arg, void *res, unsigned long, unsigned long) { return 0; } DTLS *DTLS_Get() { return 0; } diff --git a/compiler-rt/lib/sanitizer_common/weak_symbols.txt b/compiler-rt/lib/sanitizer_common/weak_symbols.txt index 1eb1ce8..77e7b5d 100644 --- a/compiler-rt/lib/sanitizer_common/weak_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/weak_symbols.txt @@ -1,4 +1,5 @@ ___sanitizer_free_hook +___sanitizer_get_dtls_size ___sanitizer_malloc_hook ___sanitizer_report_error_summary ___sanitizer_sandbox_on_notify diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 1c6fbc8..c533c7e 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -148,6 +148,9 @@ if compiler_id == "Clang": # requested it because it makes ASan reports more precise. config.debug_info_flags.append("-gcodeview") config.debug_info_flags.append("-gcolumn-info") +elif compiler_id == "MSVC": + config.debug_info_flags = ["/Z7"] + config.cxx_mode_flags = [] elif compiler_id == "GNU": config.cxx_mode_flags = ["-x c++"] config.debug_info_flags = ["-g"] diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c new file mode 100644 index 0000000..4ec129f --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/tls_get_addr.c @@ -0,0 +1,52 @@ +// RUN: %clang -g %s -o %t +// RUN: %clang -g %s -DBUILD_SO -fPIC -o %t-so.so -shared +// RUN: %run %t 2>&1 | FileCheck %s + +// REQUIRES: glibc + +// `__tls_get_addr` is somehow not invoked. +// XFAIL: i386-linux + +// These don't intercept __tls_get_addr. +// XFAIL: lsan,hwasan,ubsan + +#ifndef BUILD_SO +# include <assert.h> +# include <dlfcn.h> +# include <pthread.h> +# include <stdio.h> +# include <stdlib.h> + +// CHECK-COUNT-2: __sanitizer_get_dtls_size: +size_t __sanitizer_get_dtls_size(const void *ptr) { + fprintf(stderr, "__sanitizer_get_dtls_size: %p\n", ptr); + return 0; +} + +typedef long *(*get_t)(); +get_t GetTls; +void *Thread(void *unused) { return GetTls(); } + +int main(int argc, char *argv[]) { + char path[4096]; + snprintf(path, sizeof(path), "%s-so.so", argv[0]); + int i; + + void *handle = dlopen(path, RTLD_LAZY); + if (!handle) + fprintf(stderr, "%s\n", dlerror()); + assert(handle != 0); + GetTls = (get_t)dlsym(handle, "GetTls"); + assert(dlerror() == 0); + + pthread_t t; + pthread_create(&t, 0, Thread, 0); + pthread_join(t, 0); + pthread_create(&t, 0, Thread, 0); + pthread_join(t, 0); + return 0; +} +#else // BUILD_SO +__thread long huge_thread_local_array[1 << 17]; +long *GetTls() { return &huge_thread_local_array[0]; } +#endif diff --git a/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c b/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c index 0228c3b..7b5b9cf 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c +++ b/compiler-rt/test/sanitizer_common/TestCases/dlsym_alloc.c @@ -2,6 +2,8 @@ // FIXME: TSAN does not use DlsymAlloc. // UNSUPPORTED: tsan +// FIXME: investigate why this fails on macos +// UNSUPPORTED: darwin #include <stdlib.h> diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index 232d981..9935fe6 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -81,13 +81,7 @@ if is_msvc: # use_clang() and use_lld() respectively, so set them to "", if needed. if not hasattr(config, "clang_src_dir"): config.clang_src_dir = "" -# Facebook T92898286 -should_test_bolt = get_required_attr(config, "llvm_test_bolt") -if should_test_bolt: - llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"]) -else: - llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) -# End Facebook T92898286 +llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects)) if not hasattr(config, "lld_src_dir"): config.lld_src_dir = "" @@ -300,9 +294,3 @@ llvm_config.feature_config([("--build-mode", {"Debug|RelWithDebInfo": "debug-inf # Allow 'REQUIRES: XXX-registered-target' in tests. for arch in config.targets_to_build: config.available_features.add(arch.lower() + "-registered-target") - -# Facebook T92898286 -# Ensure the user's PYTHONPATH is included. -if "PYTHONPATH" in os.environ: - config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"] -# End Facebook T92898286 diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in index 2d53cd3..39458df 100644 --- a/cross-project-tests/lit.site.cfg.py.in +++ b/cross-project-tests/lit.site.cfg.py.in @@ -21,10 +21,6 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" -# Facebook T92898286 -config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") -# End Facebook T92898286 - import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h index b1304d7..c90c8c4 100644 --- a/flang/include/flang/Semantics/expression.h +++ b/flang/include/flang/Semantics/expression.h @@ -331,7 +331,7 @@ private: const semantics::Scope &, bool C919bAlreadyEnforced = false); MaybeExpr CompleteSubscripts(ArrayRef &&); MaybeExpr ApplySubscripts(DataRef &&, std::vector<Subscript> &&); - void CheckConstantSubscripts(ArrayRef &); + void CheckSubscripts(ArrayRef &); bool CheckRanks(const DataRef &); // Return false if error exists. bool CheckPolymorphic(const DataRef &); // ditto bool CheckDataRef(const DataRef &); // ditto diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 876c2ae..166dae9 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -2264,7 +2264,7 @@ std::optional<SpecificCall> IntrinsicInterface::Match( messages.Say("'kind=' argument must be a constant scalar integer " "whose value is a supported kind for the " "intrinsic result type"_err_en_US); - return std::nullopt; + // use default kind below for error recovery } else if (kindDummyArg->flags.test(ArgFlag::defaultsToSameKind)) { CHECK(sameArg); resultType = *sameArg->GetType(); @@ -2274,6 +2274,8 @@ std::optional<SpecificCall> IntrinsicInterface::Match( DynamicType{TypeCategory::Integer, defaults.sizeIntegerKind()}; } else { CHECK(kindDummyArg->flags.test(ArgFlag::defaultsToDefaultForResult)); + } + if (!resultType) { int kind{defaults.GetDefaultKind(*category)}; if (*category == TypeCategory::Character) { // ACHAR & CHAR resultType = DynamicType{kind, 1}; diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index f336d21..fa8a430 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1009,6 +1009,18 @@ bool ClauseProcessor::processMap( return clauseFound; } +bool ClauseProcessor::processNontemporal( + mlir::omp::NontemporalClauseOps &result) const { + return findRepeatableClause<omp::clause::Nontemporal>( + [&](const omp::clause::Nontemporal &clause, const parser::CharBlock &) { + for (const Object &object : clause.v) { + semantics::Symbol *sym = object.sym(); + mlir::Value symVal = converter.getSymbolAddress(*sym); + result.nontemporalVars.push_back(symVal); + } + }); +} + bool ClauseProcessor::processReduction( mlir::Location currentLocation, mlir::omp::ReductionClauseOps &result, llvm::SmallVectorImpl<mlir::Type> *outReductionTypes, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 8d02d36..be1d8a6 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -121,6 +121,7 @@ public: llvm::SmallVectorImpl<const semantics::Symbol *> *mapSyms = nullptr, llvm::SmallVectorImpl<mlir::Location> *mapSymLocs = nullptr, llvm::SmallVectorImpl<mlir::Type> *mapSymTypes = nullptr) const; + bool processNontemporal(mlir::omp::NontemporalClauseOps &result) const; bool processReduction( mlir::Location currentLocation, mlir::omp::ReductionClauseOps &result, llvm::SmallVectorImpl<mlir::Type> *reductionTypes = nullptr, diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 233aacb..99114dc 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1123,13 +1123,13 @@ static void genSimdClauses(lower::AbstractConverter &converter, ClauseProcessor cp(converter, semaCtx, clauses); cp.processAligned(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps); + cp.processNontemporal(clauseOps); cp.processOrder(clauseOps); cp.processReduction(loc, clauseOps); cp.processSafelen(clauseOps); cp.processSimdlen(clauseOps); - cp.processTODO<clause::Linear, clause::Nontemporal>( - loc, llvm::omp::Directive::OMPD_simd); + cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd); } static void genSingleClauses(lower::AbstractConverter &converter, diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index c7ec873..71d1c08 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -1363,6 +1363,14 @@ static bool CheckElementalConformance(parser::ContextualMessages &messages, const auto &dummy{proc.dummyArguments.at(index++)}; if (arg) { if (const auto *expr{arg->UnwrapExpr()}) { + if (const auto *wholeSymbol{evaluate::UnwrapWholeSymbolDataRef(arg)}) { + wholeSymbol = &ResolveAssociations(*wholeSymbol); + if (IsAssumedSizeArray(*wholeSymbol)) { + evaluate::SayWithDeclaration(messages, *wholeSymbol, + "Whole assumed-size array '%s' may not be used as an argument to an elemental procedure"_err_en_US, + wholeSymbol->name()); + } + } if (auto argShape{evaluate::GetShape(context, *expr)}) { if (GetRank(*argShape) > 0) { std::string argName{"actual argument ("s + expr->AsFortran() + diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index e94a49f..072ebe1 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -298,7 +298,7 @@ MaybeExpr ExpressionAnalyzer::CompleteSubscripts(ArrayRef &&ref) { // Subscripts of named constants are checked in folding. // Subscripts of DATA statement objects are checked in data statement // conversion to initializers. - CheckConstantSubscripts(ref); + CheckSubscripts(ref); } return Designate(DataRef{std::move(ref)}); } @@ -326,7 +326,7 @@ MaybeExpr ExpressionAnalyzer::ApplySubscripts( std::move(dataRef.u)); } -void ExpressionAnalyzer::CheckConstantSubscripts(ArrayRef &ref) { +void ExpressionAnalyzer::CheckSubscripts(ArrayRef &ref) { // Fold subscript expressions and check for an empty triplet. const Symbol &arraySymbol{ref.base().GetLastSymbol()}; Shape lb{GetLBOUNDs(foldingContext_, NamedEntity{arraySymbol})}; @@ -390,6 +390,13 @@ void ExpressionAnalyzer::CheckConstantSubscripts(ArrayRef &ref) { for (Subscript &ss : ref.subscript()) { auto dimLB{ToInt64(lb[dim])}; auto dimUB{ToInt64(ub[dim])}; + if (dimUB && dimLB && *dimUB < *dimLB) { + AttachDeclaration( + Say("Empty array dimension %d cannot be subscripted as an element or non-empty array section"_err_en_US, + dim + 1), + arraySymbol); + break; + } std::optional<ConstantSubscript> val[2]; int vals{0}; if (auto *triplet{std::get_if<Triplet>(&ss.u)}) { diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp index 61b070b..2cee35e 100644 --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -54,6 +54,10 @@ static RT_API_ATTRS bool CheckCompleteListDirectedField( } } +static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) { + return edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}; +} + template <int LOG2_BASE> static RT_API_ATTRS bool EditBOZInput( IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { @@ -70,6 +74,7 @@ static RT_API_ATTRS bool EditBOZInput( // Count significant digits after any leading white space & zeroes int digits{0}; int significantBits{0}; + const char32_t comma{GetSeparatorChar(edit)}; for (; next; next = io.NextInField(remaining, edit)) { char32_t ch{*next}; if (ch == ' ' || ch == '\t') { @@ -84,7 +89,7 @@ static RT_API_ATTRS bool EditBOZInput( } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') { } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') { } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') { - } else if (ch == ',') { + } else if (ch == comma) { break; // end non-list-directed field early } else { io.GetIoErrorHandler().SignalError( @@ -209,6 +214,7 @@ RT_API_ATTRS bool EditIntegerInput( common::UnsignedInt128 value{0}; bool any{!!sign}; bool overflow{false}; + const char32_t comma{GetSeparatorChar(edit)}; for (; next; next = io.NextInField(remaining, edit)) { char32_t ch{*next}; if (ch == ' ' || ch == '\t') { @@ -221,9 +227,23 @@ RT_API_ATTRS bool EditIntegerInput( int digit{0}; if (ch >= '0' && ch <= '9') { digit = ch - '0'; - } else if (ch == ',') { + } else if (ch == comma) { break; // end non-list-directed field early } else { + if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) { + // Ignore any fractional part that might appear in NAMELIST integer + // input, like a few other Fortran compilers do. + // TODO: also process exponents? Some compilers do, but they obviously + // can't just be ignored. + while ((next = io.NextInField(remaining, edit))) { + if (*next < '0' || *next > '9') { + break; + } + } + if (!next || *next == comma) { + break; + } + } io.GetIoErrorHandler().SignalError( "Bad character '%lc' in INTEGER input field", ch); return false; diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 2127451..bdc6a1e 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -223,3 +223,21 @@ subroutine simdloop_aligned_allocatable() A(i) = i end do end subroutine + +!CHECK-LABEL: func @_QPsimd_with_nontemporal_clause +subroutine simd_with_nontemporal_clause(n) + !CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsimd_with_nontemporal_clauseEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + !CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsimd_with_nontemporal_clauseEc"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) + integer :: i, n + integer :: A, B, C + !CHECK: %[[LB:.*]] = arith.constant 1 : i32 + !CHECK: %[[UB:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32> + !CHECK: %[[STEP:.*]] = arith.constant 1 : i32 + !CHECK: omp.simd nontemporal(%[[A_DECL]]#1, %[[C_DECL]]#1 : !fir.ref<i32>, !fir.ref<i32>) { + !CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + !$OMP SIMD NONTEMPORAL(A, C) + do i = 1, n + C = A + B + end do + !$OMP END SIMD +end subroutine diff --git a/flang/test/Semantics/elemental02.f90 b/flang/test/Semantics/elemental02.f90 new file mode 100644 index 0000000..7f8fb4a --- /dev/null +++ b/flang/test/Semantics/elemental02.f90 @@ -0,0 +1,13 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +subroutine s(a) + real a(*) + interface + elemental function ef(efarg) + real, intent(in) :: efarg + end + end interface +!ERROR: Whole assumed-size array 'a' may not be used as an argument to an elemental procedure + print *, sqrt(a) +!ERROR: Whole assumed-size array 'a' may not be used as an argument to an elemental procedure + print *, ef(a) +end diff --git a/flang/test/Semantics/expr-errors06.f90 b/flang/test/Semantics/expr-errors06.f90 index 84872c7..bdcb92c 100644 --- a/flang/test/Semantics/expr-errors06.f90 +++ b/flang/test/Semantics/expr-errors06.f90 @@ -1,7 +1,7 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 -Werror ! Check out-of-range subscripts subroutine subr(da) - real a(10), da(2,1) + real a(10), da(2,1), empty(1:0,1) integer, parameter :: n(2) = [1, 2] integer unknown !ERROR: DATA statement designator 'a(0_8)' is out of range @@ -39,4 +39,10 @@ subroutine subr(da) print *, da(1,0) !WARNING: Subscript 2 is greater than upper bound 1 for dimension 2 of array print *, da(1,2) + print *, empty([(j,j=1,0)],1) ! ok + print *, empty(1:0,1) ! ok + print *, empty(:,1) ! ok + print *, empty(i:j,k) ! ok + !ERROR: Empty array dimension 1 cannot be subscripted as an element or non-empty array section + print *, empty(i,1) end diff --git a/flang/test/Semantics/kinds06.f90 b/flang/test/Semantics/kinds06.f90 new file mode 100644 index 0000000..f5b488e --- /dev/null +++ b/flang/test/Semantics/kinds06.f90 @@ -0,0 +1,4 @@ +!RUN: %python %S/test_errors.py %s %flang_fc1 +!ERROR: 'kind=' argument must be a constant scalar integer whose value is a supported kind for the intrinsic result type +print *, real(1.,666) +end diff --git a/flang/unittests/Runtime/Namelist.cpp b/flang/unittests/Runtime/Namelist.cpp index f95c5d2..9037fa1 100644 --- a/flang/unittests/Runtime/Namelist.cpp +++ b/flang/unittests/Runtime/Namelist.cpp @@ -305,4 +305,33 @@ TEST(NamelistTests, Comma) { EXPECT_EQ(got, expect); } +// Tests REAL-looking input to integers +TEST(NamelistTests, RealValueForInt) { + OwningPtr<Descriptor> scDesc{ + MakeArray<TypeCategory::Integer, static_cast<int>(sizeof(int))>( + std::vector<int>{}, std::vector<int>{{}})}; + const NamelistGroup::Item items[]{{"j", *scDesc}}; + const NamelistGroup group{"nml", 1, items}; + static char t1[]{"&nml j=123.456/"}; + StaticDescriptor<1, true> statDesc; + Descriptor &internalDesc{statDesc.descriptor()}; + internalDesc.Establish(TypeCode{CFI_type_char}, + /*elementBytes=*/std::strlen(t1), t1, 0, nullptr, CFI_attribute_pointer); + auto inCookie{IONAME(BeginInternalArrayListInput)( + internalDesc, nullptr, 0, __FILE__, __LINE__)}; + ASSERT_TRUE(IONAME(InputNamelist)(inCookie, group)); + ASSERT_EQ(IONAME(EndIoStatement)(inCookie), IostatOk) + << "namelist real input for integer"; + char out[16]; + internalDesc.Establish(TypeCode{CFI_type_char}, /*elementBytes=*/sizeof out, + out, 0, nullptr, CFI_attribute_pointer); + auto outCookie{IONAME(BeginInternalArrayListOutput)( + internalDesc, nullptr, 0, __FILE__, __LINE__)}; + ASSERT_TRUE(IONAME(OutputNamelist)(outCookie, group)); + ASSERT_EQ(IONAME(EndIoStatement)(outCookie), IostatOk) << "namelist output"; + std::string got{out, sizeof out}; + static const std::string expect{" &NML J= 123/ "}; + EXPECT_EQ(got, expect); +} + // TODO: Internal NAMELIST error tests diff --git a/libcxx/.clang-format b/libcxx/.clang-format index c37b234..84a2afa 100644 --- a/libcxx/.clang-format +++ b/libcxx/.clang-format @@ -43,7 +43,6 @@ AttributeMacros: [ '_LIBCPP_NO_SANITIZE', '_LIBCPP_NO_UNIQUE_ADDRESS', '_LIBCPP_NOALIAS', - '_LIBCPP_NODISCARD', '_LIBCPP_OVERRIDABLE_FUNC_VIS', '_LIBCPP_STANDALONE_DEBUG', '_LIBCPP_TEMPLATE_DATA_VIS', diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index ffff811..23d9aa0 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -424,6 +424,7 @@ set(files __fwd/format.h __fwd/fstream.h __fwd/functional.h + __fwd/get.h __fwd/ios.h __fwd/istream.h __fwd/mdspan.h @@ -440,6 +441,7 @@ set(files __fwd/string_view.h __fwd/subrange.h __fwd/tuple.h + __fwd/variant.h __fwd/vector.h __hash_table __ios/fpos.h diff --git a/libcxx/include/__algorithm/adjacent_find.h b/libcxx/include/__algorithm/adjacent_find.h index 6f15456..f0708eb 100644 --- a/libcxx/include/__algorithm/adjacent_find.h +++ b/libcxx/include/__algorithm/adjacent_find.h @@ -26,7 +26,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template <class _Iter, class _Sent, class _BinaryPredicate> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) { if (__first == __last) return __first; @@ -40,13 +40,13 @@ __adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) { } template <class _ForwardIterator, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { return std::__adjacent_find(std::move(__first), std::move(__last), __pred); } template <class _ForwardIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator adjacent_find(_ForwardIterator __first, _ForwardIterator __last) { return std::adjacent_find(std::move(__first), std::move(__last), __equal_to()); } diff --git a/libcxx/include/__algorithm/all_of.h b/libcxx/include/__algorithm/all_of.h index ec84eea..1fcb74f 100644 --- a/libcxx/include/__algorithm/all_of.h +++ b/libcxx/include/__algorithm/all_of.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Predicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (!__pred(*__first)) diff --git a/libcxx/include/__algorithm/any_of.h b/libcxx/include/__algorithm/any_of.h index b5ff778..acb546b 100644 --- a/libcxx/include/__algorithm/any_of.h +++ b/libcxx/include/__algorithm/any_of.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Predicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool any_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (__pred(*__first)) diff --git a/libcxx/include/__algorithm/binary_search.h b/libcxx/include/__algorithm/binary_search.h index 6065fc3..79a5ec0 100644 --- a/libcxx/include/__algorithm/binary_search.h +++ b/libcxx/include/__algorithm/binary_search.h @@ -22,14 +22,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { __first = std::lower_bound<_ForwardIterator, _Tp, __comp_ref_type<_Compare> >(__first, __last, __value, __comp); return __first != __last && !__comp(__value, *__first); } template <class _ForwardIterator, class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { return std::binary_search(__first, __last, __value, __less<>()); } diff --git a/libcxx/include/__algorithm/count.h b/libcxx/include/__algorithm/count.h index 1cfe7f6..b3489a4 100644 --- a/libcxx/include/__algorithm/count.h +++ b/libcxx/include/__algorithm/count.h @@ -79,7 +79,7 @@ __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __l } template <class _InputIterator, class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator> count(_InputIterator __first, _InputIterator __last, const _Tp& __value) { __identity __proj; return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj); diff --git a/libcxx/include/__algorithm/count_if.h b/libcxx/include/__algorithm/count_if.h index 2578206..e702388 100644 --- a/libcxx/include/__algorithm/count_if.h +++ b/libcxx/include/__algorithm/count_if.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Predicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename iterator_traits<_InputIterator>::difference_type count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) { typename iterator_traits<_InputIterator>::difference_type __r(0); diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h index bfc8f72..23ff064 100644 --- a/libcxx/include/__algorithm/equal.h +++ b/libcxx/include/__algorithm/equal.h @@ -35,7 +35,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) { for (; __first1 != __last1; ++__first1, (void)++__first2) if (!__pred(*__first1, *__first2)) @@ -49,20 +49,20 @@ template <class _Tp, __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&) { return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); } template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) { return std::__equal_iter_impl( std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred); } template <class _InputIterator1, class _InputIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) { return std::equal(__first1, __last1, __first2, __equal_to()); } @@ -70,7 +70,7 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first #if _LIBCPP_STD_VER >= 14 template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __comp, _Proj1& __proj1, _Proj2& __proj2) { while (__first1 != __last1 && __first2 != __last2) { if (!std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) @@ -90,13 +90,13 @@ template <class _Tp, __is_identity<_Proj2>::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&, _Proj2&) { return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); } template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, @@ -119,7 +119,7 @@ equal(_InputIterator1 __first1, } template <class _InputIterator1, class _InputIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { return std::equal(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/libcxx/include/__algorithm/equal_range.h b/libcxx/include/__algorithm/equal_range.h index 676e436..28c37cd 100644 --- a/libcxx/include/__algorithm/equal_range.h +++ b/libcxx/include/__algorithm/equal_range.h @@ -60,7 +60,7 @@ __equal_range(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp } template <class _ForwardIterator, class _Tp, class _Compare> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator> equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { static_assert(__is_callable<_Compare&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); static_assert(is_copy_constructible<_ForwardIterator>::value, "Iterator has to be copy constructible"); @@ -73,7 +73,7 @@ equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu } template <class _ForwardIterator, class _Tp> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator> equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { return std::equal_range(std::move(__first), std::move(__last), __value, __less<>()); } diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h index 7f58dbb..3ab4ab8 100644 --- a/libcxx/include/__algorithm/find.h +++ b/libcxx/include/__algorithm/find.h @@ -167,7 +167,7 @@ struct __find_segment { // public API template <class _InputIterator, class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator find(_InputIterator __first, _InputIterator __last, const _Tp& __value) { __identity __proj; return std::__rewrap_iter( diff --git a/libcxx/include/__algorithm/find_end.h b/libcxx/include/__algorithm/find_end.h index 841e0fd..68a9da7 100644 --- a/libcxx/include/__algorithm/find_end.h +++ b/libcxx/include/__algorithm/find_end.h @@ -81,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> } template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, @@ -102,7 +102,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Fo } template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, @@ -112,7 +112,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Fo } template <class _ForwardIterator1, class _ForwardIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { return std::find_end(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/libcxx/include/__algorithm/find_first_of.h b/libcxx/include/__algorithm/find_first_of.h index 6b99f56..4a240f7 100644 --- a/libcxx/include/__algorithm/find_first_of.h +++ b/libcxx/include/__algorithm/find_first_of.h @@ -35,7 +35,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_fir } template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, @@ -45,7 +45,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Fo } template <class _ForwardIterator1, class _ForwardIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { return std::__find_first_of_ce(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/libcxx/include/__algorithm/find_if.h b/libcxx/include/__algorithm/find_if.h index 22092d3..fd63bcc 100644 --- a/libcxx/include/__algorithm/find_if.h +++ b/libcxx/include/__algorithm/find_if.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Predicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator find_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (__pred(*__first)) diff --git a/libcxx/include/__algorithm/find_if_not.h b/libcxx/include/__algorithm/find_if_not.h index cc20019..b4441b2 100644 --- a/libcxx/include/__algorithm/find_if_not.h +++ b/libcxx/include/__algorithm/find_if_not.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Predicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator find_if_not(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (!__pred(*__first)) diff --git a/libcxx/include/__algorithm/includes.h b/libcxx/include/__algorithm/includes.h index 0ad09a9..47b19d4 100644 --- a/libcxx/include/__algorithm/includes.h +++ b/libcxx/include/__algorithm/includes.h @@ -47,7 +47,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __includes( } template <class _InputIterator1, class _InputIterator2, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, @@ -67,7 +67,7 @@ includes(_InputIterator1 __first1, } template <class _InputIterator1, class _InputIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { return std::includes(std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __less<>()); } diff --git a/libcxx/include/__algorithm/is_heap.h b/libcxx/include/__algorithm/is_heap.h index c589b80..fa668c1 100644 --- a/libcxx/include/__algorithm/is_heap.h +++ b/libcxx/include/__algorithm/is_heap.h @@ -22,13 +22,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _RandomAccessIterator, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp)) == __last; } template <class _RandomAccessIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) { return std::is_heap(__first, __last, __less<>()); } diff --git a/libcxx/include/__algorithm/is_heap_until.h b/libcxx/include/__algorithm/is_heap_until.h index a174f24..7444d97 100644 --- a/libcxx/include/__algorithm/is_heap_until.h +++ b/libcxx/include/__algorithm/is_heap_until.h @@ -46,13 +46,13 @@ __is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co } template <class _RandomAccessIterator, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp)); } template <class _RandomAccessIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last) { return std::__is_heap_until(__first, __last, __less<>()); } diff --git a/libcxx/include/__algorithm/is_partitioned.h b/libcxx/include/__algorithm/is_partitioned.h index 1f7c8b0..700e452 100644 --- a/libcxx/include/__algorithm/is_partitioned.h +++ b/libcxx/include/__algorithm/is_partitioned.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Predicate> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_partitioned(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (!__pred(*__first)) diff --git a/libcxx/include/__algorithm/is_permutation.h b/libcxx/include/__algorithm/is_permutation.h index 9dcfcf1..b7949a5 100644 --- a/libcxx/include/__algorithm/is_permutation.h +++ b/libcxx/include/__algorithm/is_permutation.h @@ -113,7 +113,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation_impl( // 2+1 iterators, predicate. Not used by range algorithms. template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation( +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation( _ForwardIterator1 __first1, _Sentinel1 __last1, _ForwardIterator2 __first2, _BinaryPredicate&& __pred) { // Shorten sequences as much as possible by lopping of any equal prefix. for (; __first1 != __last1; ++__first1, (void)++__first2) { @@ -247,7 +247,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation( // 2+1 iterators, predicate template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __pred) { static_assert(__is_callable<_BinaryPredicate&, decltype(*__first1), decltype(*__first2)>::value, "The comparator has to be callable"); @@ -257,7 +257,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_pe // 2+1 iterators template <class _ForwardIterator1, class _ForwardIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) { return std::is_permutation(__first1, __last1, __first2, __equal_to()); } @@ -266,7 +266,7 @@ is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIt // 2+2 iterators template <class _ForwardIterator1, class _ForwardIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { return std::__is_permutation<_ClassicAlgPolicy>( std::move(__first1), @@ -280,7 +280,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo // 2+2 iterators, predicate template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, diff --git a/libcxx/include/__algorithm/is_sorted.h b/libcxx/include/__algorithm/is_sorted.h index 3befb1a..ff61a73 100644 --- a/libcxx/include/__algorithm/is_sorted.h +++ b/libcxx/include/__algorithm/is_sorted.h @@ -22,13 +22,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_sorted(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { return std::__is_sorted_until<__comp_ref_type<_Compare> >(__first, __last, __comp) == __last; } template <class _ForwardIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_sorted(_ForwardIterator __first, _ForwardIterator __last) { return std::is_sorted(__first, __last, __less<>()); } diff --git a/libcxx/include/__algorithm/is_sorted_until.h b/libcxx/include/__algorithm/is_sorted_until.h index 53a49f0..b64fb65 100644 --- a/libcxx/include/__algorithm/is_sorted_until.h +++ b/libcxx/include/__algorithm/is_sorted_until.h @@ -35,13 +35,13 @@ __is_sorted_until(_ForwardIterator __first, _ForwardIterator __last, _Compare __ } template <class _ForwardIterator, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator is_sorted_until(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { return std::__is_sorted_until<__comp_ref_type<_Compare> >(__first, __last, __comp); } template <class _ForwardIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator is_sorted_until(_ForwardIterator __first, _ForwardIterator __last) { return std::is_sorted_until(__first, __last, __less<>()); } diff --git a/libcxx/include/__algorithm/lexicographical_compare.h b/libcxx/include/__algorithm/lexicographical_compare.h index df23e6a..8ea7c17 100644 --- a/libcxx/include/__algorithm/lexicographical_compare.h +++ b/libcxx/include/__algorithm/lexicographical_compare.h @@ -98,7 +98,7 @@ __lexicographical_compare(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __las #endif // _LIBCPP_STD_VER >= 14 template <class _InputIterator1, class _InputIterator2, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, @@ -116,7 +116,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo } template <class _InputIterator1, class _InputIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { return std::lexicographical_compare(__first1, __last1, __first2, __last2, __less<>()); } diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h index d18ab83..54a64be 100644 --- a/libcxx/include/__algorithm/lower_bound.h +++ b/libcxx/include/__algorithm/lower_bound.h @@ -28,7 +28,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _AlgPolicy, class _Iter, class _Type, class _Proj, class _Comp> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting( +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting( _Iter __first, const _Type& __value, typename iterator_traits<_Iter>::difference_type __len, @@ -58,7 +58,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lo // whereas the one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of // comparisons. template <class _AlgPolicy, class _ForwardIterator, class _Sent, class _Type, class _Proj, class _Comp> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator __lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) { // step = 0, ensuring we can always short-circuit when distance is 1 later on if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value)) @@ -84,14 +84,14 @@ __lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __va } template <class _AlgPolicy, class _ForwardIterator, class _Sent, class _Type, class _Proj, class _Comp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator __lower_bound(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) { const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last); return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj); } template <class _ForwardIterator, class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { static_assert(__is_callable<_Compare&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); auto __proj = std::__identity(); @@ -99,7 +99,7 @@ lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu } template <class _ForwardIterator, class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { return std::lower_bound(__first, __last, __value, __less<>()); } diff --git a/libcxx/include/__algorithm/max.h b/libcxx/include/__algorithm/max.h index d4c99f6..1673e6b 100644 --- a/libcxx/include/__algorithm/max.h +++ b/libcxx/include/__algorithm/max.h @@ -25,13 +25,13 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) { return __comp(__a, __b) ? __b : __a; } template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) { return std::max(__a, __b, __less<>()); } @@ -39,13 +39,13 @@ max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) #ifndef _LIBCPP_CXX03_LANG template <class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp max(initializer_list<_Tp> __t, _Compare __comp) { return *std::__max_element<__comp_ref_type<_Compare> >(__t.begin(), __t.end(), __comp); } template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp max(initializer_list<_Tp> __t) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp max(initializer_list<_Tp> __t) { return *std::max_element(__t.begin(), __t.end(), __less<>()); } diff --git a/libcxx/include/__algorithm/max_element.h b/libcxx/include/__algorithm/max_element.h index 3e58c40..929f337 100644 --- a/libcxx/include/__algorithm/max_element.h +++ b/libcxx/include/__algorithm/max_element.h @@ -36,7 +36,7 @@ __max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp } template <class _ForwardIterator, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { static_assert( __is_callable<_Compare&, decltype(*__first), decltype(*__first)>::value, "The comparator has to be callable"); @@ -44,7 +44,7 @@ max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) } template <class _ForwardIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator max_element(_ForwardIterator __first, _ForwardIterator __last) { return std::max_element(__first, __last, __less<>()); } diff --git a/libcxx/include/__algorithm/min.h b/libcxx/include/__algorithm/min.h index 1bafad8..660e0b2 100644 --- a/libcxx/include/__algorithm/min.h +++ b/libcxx/include/__algorithm/min.h @@ -25,13 +25,13 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) { return __comp(__b, __a) ? __b : __a; } template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) { return std::min(__a, __b, __less<>()); } @@ -39,13 +39,13 @@ min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) #ifndef _LIBCPP_CXX03_LANG template <class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp min(initializer_list<_Tp> __t, _Compare __comp) { return *std::__min_element<__comp_ref_type<_Compare> >(__t.begin(), __t.end(), __comp); } template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp min(initializer_list<_Tp> __t) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp min(initializer_list<_Tp> __t) { return *std::min_element(__t.begin(), __t.end(), __less<>()); } diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h index 9a360f9..f40b24a0 100644 --- a/libcxx/include/__algorithm/min_element.h +++ b/libcxx/include/__algorithm/min_element.h @@ -48,7 +48,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter __min_element(_Iter __ } template <class _ForwardIterator, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { static_assert( __has_forward_iterator_category<_ForwardIterator>::value, "std::min_element requires a ForwardIterator"); @@ -59,7 +59,7 @@ min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) } template <class _ForwardIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator min_element(_ForwardIterator __first, _ForwardIterator __last) { return std::min_element(__first, __last, __less<>()); } diff --git a/libcxx/include/__algorithm/minmax.h b/libcxx/include/__algorithm/minmax.h index bb7a379..de0bec0 100644 --- a/libcxx/include/__algorithm/minmax.h +++ b/libcxx/include/__algorithm/minmax.h @@ -24,13 +24,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<const _Tp&, const _Tp&> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<const _Tp&, const _Tp&> minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) { return __comp(__b, __a) ? pair<const _Tp&, const _Tp&>(__b, __a) : pair<const _Tp&, const _Tp&>(__a, __b); } template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<const _Tp&, const _Tp&> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<const _Tp&, const _Tp&> minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) { return std::minmax(__a, __b, __less<>()); } @@ -38,7 +38,7 @@ minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __ #ifndef _LIBCPP_CXX03_LANG template <class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp> minmax(initializer_list<_Tp> __t, _Compare __comp) { static_assert(__is_callable<_Compare&, _Tp, _Tp>::value, "The comparator has to be callable"); __identity __proj; @@ -47,7 +47,7 @@ minmax(initializer_list<_Tp> __t, _Compare __comp) { } template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp> minmax(initializer_list<_Tp> __t) { return std::minmax(__t, __less<>()); } diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h index 23929c9..47e3a68 100644 --- a/libcxx/include/__algorithm/minmax_element.h +++ b/libcxx/include/__algorithm/minmax_element.h @@ -79,7 +79,7 @@ __minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) } template <class _ForwardIterator, class _Compare> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator> minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { static_assert( __has_forward_iterator_category<_ForwardIterator>::value, "std::minmax_element requires a ForwardIterator"); @@ -90,7 +90,7 @@ minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __com } template <class _ForwardIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator> minmax_element(_ForwardIterator __first, _ForwardIterator __last) { return std::minmax_element(__first, __last, __less<>()); } diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h index 632bec0..0fae7f6 100644 --- a/libcxx/include/__algorithm/mismatch.h +++ b/libcxx/include/__algorithm/mismatch.h @@ -37,7 +37,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template <class _Iter1, class _Sent1, class _Iter2, class _Pred, class _Proj1, class _Proj2> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { while (__first1 != __last1) { if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) @@ -49,7 +49,7 @@ __mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, } template <class _Iter1, class _Sent1, class _Iter2, class _Pred, class _Proj1, class _Proj2> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2); } @@ -57,7 +57,7 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro #if _LIBCPP_VECTORIZE_ALGORITHMS template <class _Iter> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter> __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) { using __value_type = __iter_value_type<_Iter>; constexpr size_t __unroll_count = 4; @@ -124,7 +124,7 @@ template <class _Tp, __enable_if_t<is_integral<_Tp>::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> && __is_identity<_Proj1>::value && __is_identity<_Proj2>::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred&, _Proj1&, _Proj2&) { return std::__mismatch_vectorized(__first1, __last1, __first2); } @@ -137,7 +137,7 @@ template <class _Tp, __is_identity<_Proj1>::value && __is_identity<_Proj2>::value && __can_map_to_integer_v<_Tp> && __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { if (__libcpp_is_constant_evaluated()) { return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2); @@ -150,7 +150,7 @@ __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __ #endif // _LIBCPP_VECTORIZE_ALGORITHMS template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) { __identity __proj; auto __res = std::__mismatch( @@ -159,14 +159,14 @@ mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __fi } template <class _InputIterator1, class _InputIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) { return std::mismatch(__first1, __last1, __first2, __equal_to()); } #if _LIBCPP_STD_VER >= 14 template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch( +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { while (__first1 != __last1 && __first2 != __last2) { if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) @@ -178,14 +178,14 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter } template <class _Tp, class _Pred, class _Proj1, class _Proj2> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { auto __len = std::min(__last1 - __first1, __last2 - __first2); return std::__mismatch(__first1, __first1 + __len, __first2, __pred, __proj1, __proj2); } template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, @@ -204,7 +204,7 @@ mismatch(_InputIterator1 __first1, } template <class _InputIterator1, class _InputIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { return std::mismatch(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/libcxx/include/__algorithm/none_of.h b/libcxx/include/__algorithm/none_of.h index 50841ba..e6bd197 100644 --- a/libcxx/include/__algorithm/none_of.h +++ b/libcxx/include/__algorithm/none_of.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _InputIterator, class _Predicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool none_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (__pred(*__first)) diff --git a/libcxx/include/__algorithm/pstl.h b/libcxx/include/__algorithm/pstl.h index 0bb052b..71e7f28 100644 --- a/libcxx/include/__algorithm/pstl.h +++ b/libcxx/include/__algorithm/pstl.h @@ -352,7 +352,7 @@ template <class _ExecutionPolicy, class _Predicate, class _RawPolicy = __remove_cvref_t<_ExecutionPolicy>, enable_if_t<is_execution_policy_v<_RawPolicy>, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_partitioned(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "is_partitioned requires ForwardIterators"); using _Implementation = __pstl::__dispatch<__pstl::__is_partitioned, __pstl::__current_configuration, _RawPolicy>; diff --git a/libcxx/include/__algorithm/remove.h b/libcxx/include/__algorithm/remove.h index fd01c23..b2d7023 100644 --- a/libcxx/include/__algorithm/remove.h +++ b/libcxx/include/__algorithm/remove.h @@ -24,7 +24,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Tp> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator remove(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { __first = std::find(__first, __last, __value); if (__first != __last) { diff --git a/libcxx/include/__algorithm/remove_if.h b/libcxx/include/__algorithm/remove_if.h index b14f3c0e..56fd745 100644 --- a/libcxx/include/__algorithm/remove_if.h +++ b/libcxx/include/__algorithm/remove_if.h @@ -23,7 +23,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template <class _ForwardIterator, class _Predicate> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator remove_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { __first = std::find_if<_ForwardIterator, _Predicate&>(__first, __last, __pred); if (__first != __last) { diff --git a/libcxx/include/__algorithm/search.h b/libcxx/include/__algorithm/search.h index 7316e5e..24dec22 100644 --- a/libcxx/include/__algorithm/search.h +++ b/libcxx/include/__algorithm/search.h @@ -160,7 +160,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> __searc } template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, @@ -173,7 +173,7 @@ search(_ForwardIterator1 __first1, } template <class _ForwardIterator1, class _ForwardIterator2> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { return std::search(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/libcxx/include/__algorithm/search_n.h b/libcxx/include/__algorithm/search_n.h index f980638..4019dfb 100644 --- a/libcxx/include/__algorithm/search_n.h +++ b/libcxx/include/__algorithm/search_n.h @@ -136,7 +136,7 @@ __search_n_impl(_Iter1 __first, _Sent1 __last, _DiffT __count, const _Type& __va } template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n( _ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { static_assert( __is_callable<_BinaryPredicate&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); @@ -145,7 +145,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Fo } template <class _ForwardIterator, class _Size, class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value) { return std::search_n(__first, __last, std::__convert_to_integral(__count), __value, __equal_to()); } diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h index bb0d86c..2335e50 100644 --- a/libcxx/include/__algorithm/set_intersection.h +++ b/libcxx/include/__algorithm/set_intersection.h @@ -84,7 +84,7 @@ template <class _AlgPolicy, class _InForwardIter2, class _Sent2, class _OutIter> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter> __set_intersection( _InForwardIter1 __first1, @@ -129,7 +129,7 @@ template <class _AlgPolicy, class _InInputIter2, class _Sent2, class _OutIter> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter> __set_intersection( _InInputIter1 __first1, @@ -160,7 +160,7 @@ __set_intersection( } template <class _AlgPolicy, class _Compare, class _InIter1, class _Sent1, class _InIter2, class _Sent2, class _OutIter> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter> __set_intersection( _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h index 549197b..56518da 100644 --- a/libcxx/include/__algorithm/simd_utils.h +++ b/libcxx/include/__algorithm/simd_utils.h @@ -110,19 +110,19 @@ using __simd_vector_underlying_type_t = decltype(std::__simd_vector_underlying_t // This isn't inlined without always_inline when loading chars. template <class _VecT, class _Iter> -_LIBCPP_NODISCARD _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept { +[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept { return [=]<size_t... _Indices>(index_sequence<_Indices...>) _LIBCPP_ALWAYS_INLINE noexcept { return _VecT{__iter[_Indices]...}; }(make_index_sequence<__simd_vector_size_v<_VecT>>{}); } template <class _Tp, size_t _Np> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>)); } template <class _Tp, size_t _Np> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { using __mask_vec = __simd_vector<bool, _Np>; // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 @@ -151,7 +151,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_T } template <class _Tp, size_t _Np> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept { return std::__find_first_set(~__vec); } diff --git a/libcxx/include/__algorithm/unique.h b/libcxx/include/__algorithm/unique.h index d597014..1f0c4ff 100644 --- a/libcxx/include/__algorithm/unique.h +++ b/libcxx/include/__algorithm/unique.h @@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // unique template <class _AlgPolicy, class _Iter, class _Sent, class _BinaryPredicate> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 std::pair<_Iter, _Iter> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 std::pair<_Iter, _Iter> __unique(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) { __first = std::__adjacent_find(__first, __last, __pred); if (__first != __last) { @@ -46,13 +46,13 @@ __unique(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) { } template <class _ForwardIterator, class _BinaryPredicate> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator unique(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { return std::__unique<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __pred).first; } template <class _ForwardIterator> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator unique(_ForwardIterator __first, _ForwardIterator __last) { return std::unique(__first, __last, __equal_to()); } diff --git a/libcxx/include/__algorithm/upper_bound.h b/libcxx/include/__algorithm/upper_bound.h index 102447e..e8be0ef 100644 --- a/libcxx/include/__algorithm/upper_bound.h +++ b/libcxx/include/__algorithm/upper_bound.h @@ -49,7 +49,7 @@ __upper_bound(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp } template <class _ForwardIterator, class _Tp, class _Compare> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { static_assert(__is_callable<_Compare&, const _Tp&, decltype(*__first)>::value, "The comparator has to be callable"); static_assert(is_copy_constructible<_ForwardIterator>::value, "Iterator has to be copy constructible"); @@ -58,7 +58,7 @@ upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu } template <class _ForwardIterator, class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { return std::upper_bound(std::move(__first), std::move(__last), __value, __less<>()); } diff --git a/libcxx/include/__bit/bit_cast.h b/libcxx/include/__bit/bit_cast.h index cd04567..7350250 100644 --- a/libcxx/include/__bit/bit_cast.h +++ b/libcxx/include/__bit/bit_cast.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #ifndef _LIBCPP_CXX03_LANG template <class _ToType, class _FromType> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr _ToType __bit_cast(const _FromType& __from) noexcept { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr _ToType __bit_cast(const _FromType& __from) noexcept { return __builtin_bit_cast(_ToType, __from); } diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h index 998a0b4..bb09e8e 100644 --- a/libcxx/include/__bit/countl.h +++ b/libcxx/include/__bit/countl.h @@ -27,15 +27,15 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { return __builtin_clz(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { return __builtin_clzl(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { return __builtin_clzll(__x); } diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h index 9e92021..2f75711 100644 --- a/libcxx/include/__bit/countr.h +++ b/libcxx/include/__bit/countr.h @@ -26,20 +26,20 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { return __builtin_ctz(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { return __builtin_ctzl(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { return __builtin_ctzll(__x); } template <class _Tp> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { #if __has_builtin(__builtin_ctzg) return __builtin_ctzg(__t, numeric_limits<_Tp>::digits); #else // __has_builtin(__builtin_ctzg) diff --git a/libcxx/include/__chrono/leap_second.h b/libcxx/include/__chrono/leap_second.h index be3ab423..b01d9fb 100644 --- a/libcxx/include/__chrono/leap_second.h +++ b/libcxx/include/__chrono/leap_second.h @@ -43,9 +43,9 @@ public: _LIBCPP_HIDE_FROM_ABI leap_second(const leap_second&) = default; _LIBCPP_HIDE_FROM_ABI leap_second& operator=(const leap_second&) = default; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr sys_seconds date() const noexcept { return __date_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sys_seconds date() const noexcept { return __date_; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr seconds value() const noexcept { return __value_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr seconds value() const noexcept { return __value_; } private: sys_seconds __date_; diff --git a/libcxx/include/__config b/libcxx/include/__config index 1c0b7c0..9f3bab3 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1116,15 +1116,6 @@ typedef __char32_t char32_t; # define _LIBCPP_USING_IF_EXISTS # endif -# if __has_cpp_attribute(__nodiscard__) -# define _LIBCPP_NODISCARD [[__nodiscard__]] -# else -// We can't use GCC's [[gnu::warn_unused_result]] and -// __attribute__((warn_unused_result)), because GCC does not silence them via -// (void) cast. -# define _LIBCPP_NODISCARD -# endif - # if __has_attribute(__no_destroy__) # define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__)) # else diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h index eef1fc0..f0d3968 100644 --- a/libcxx/include/__filesystem/path.h +++ b/libcxx/include/__filesystem/path.h @@ -812,7 +812,7 @@ public: _LIBCPP_HIDE_FROM_ABI path extension() const { return string_type(__extension()); } // query - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); } _LIBCPP_HIDE_FROM_ABI bool has_root_name() const { return !__root_name().empty(); } _LIBCPP_HIDE_FROM_ABI bool has_root_directory() const { return !__root_directory().empty(); } diff --git a/libcxx/include/__functional/identity.h b/libcxx/include/__functional/identity.h index 8468de3..1b1c6cf 100644 --- a/libcxx/include/__functional/identity.h +++ b/libcxx/include/__functional/identity.h @@ -26,7 +26,7 @@ struct __is_identity : false_type {}; struct __identity { template <class _Tp> - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& operator()(_Tp&& __t) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& operator()(_Tp&& __t) const _NOEXCEPT { return std::forward<_Tp>(__t); } diff --git a/libcxx/include/__fwd/get.h b/libcxx/include/__fwd/get.h new file mode 100644 index 0000000..6121ed0 --- /dev/null +++ b/libcxx/include/__fwd/get.h @@ -0,0 +1,24 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_GET_H +#define _LIBCPP___FWD_GET_H + +#include <__config> +#include <__fwd/array.h> +#include <__fwd/complex.h> +#include <__fwd/pair.h> +#include <__fwd/subrange.h> +#include <__fwd/tuple.h> +#include <__fwd/variant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#endif // _LIBCPP___FWD_GET_H diff --git a/libcxx/include/__fwd/variant.h b/libcxx/include/__fwd/variant.h new file mode 100644 index 0000000..71c792f --- /dev/null +++ b/libcxx/include/__fwd/variant.h @@ -0,0 +1,77 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_VARIANT_H +#define _LIBCPP___FWD_VARIANT_H + +#include <__config> +#include <__cstddef/size_t.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 17 + +template <class... _Types> +class _LIBCPP_TEMPLATE_VIS variant; + +template <class _Tp> +struct _LIBCPP_TEMPLATE_VIS variant_size; + +template <class _Tp> +inline constexpr size_t variant_size_v = variant_size<_Tp>::value; + +template <size_t _Ip, class _Tp> +struct _LIBCPP_TEMPLATE_VIS variant_alternative; + +template <size_t _Ip, class _Tp> +using variant_alternative_t = typename variant_alternative<_Ip, _Tp>::type; + +inline constexpr size_t variant_npos = static_cast<size_t>(-1); + +template <size_t _Ip, class... _Types> +_LIBCPP_HIDE_FROM_ABI +_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr variant_alternative_t<_Ip, variant<_Types...>>& +get(variant<_Types...>&); + +template <size_t _Ip, class... _Types> +_LIBCPP_HIDE_FROM_ABI +_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr variant_alternative_t<_Ip, variant<_Types...>>&& +get(variant<_Types...>&&); + +template <size_t _Ip, class... _Types> +_LIBCPP_HIDE_FROM_ABI +_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const variant_alternative_t<_Ip, variant<_Types...>>& +get(const variant<_Types...>&); + +template <size_t _Ip, class... _Types> +_LIBCPP_HIDE_FROM_ABI +_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const variant_alternative_t<_Ip, variant<_Types...>>&& +get(const variant<_Types...>&&); + +template <class _Tp, class... _Types> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr _Tp& get(variant<_Types...>&); + +template <class _Tp, class... _Types> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr _Tp&& get(variant<_Types...>&&); + +template <class _Tp, class... _Types> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const _Tp& get(const variant<_Types...>&); + +template <class _Tp, class... _Types> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const _Tp&& +get(const variant<_Types...>&&); + +#endif // _LIBCPP_STD_VER >= 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FWD_VARIANT_H diff --git a/libcxx/include/__math/abs.h b/libcxx/include/__math/abs.h index ab82a28..fc3bf3a 100644 --- a/libcxx/include/__math/abs.h +++ b/libcxx/include/__math/abs.h @@ -23,19 +23,19 @@ namespace __math { // fabs -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fabs(float __x) _NOEXCEPT { return __builtin_fabsf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fabs(float __x) _NOEXCEPT { return __builtin_fabsf(__x); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fabs(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fabs(double __x) _NOEXCEPT { return __builtin_fabs(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fabs(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fabs(long double __x) _NOEXCEPT { return __builtin_fabsl(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double fabs(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double fabs(_A1 __x) _NOEXCEPT { return __builtin_fabs((double)__x); } diff --git a/libcxx/include/__math/copysign.h b/libcxx/include/__math/copysign.h index 2c3b0dd..c3ca6a3 100644 --- a/libcxx/include/__math/copysign.h +++ b/libcxx/include/__math/copysign.h @@ -24,16 +24,16 @@ namespace __math { // copysign -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float copysign(float __x, float __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float copysign(float __x, float __y) _NOEXCEPT { return ::__builtin_copysignf(__x, __y); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double copysign(long double __x, long double __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double copysign(long double __x, long double __y) _NOEXCEPT { return ::__builtin_copysignl(__x, __y); } template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type copysign(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type copysign(_A1 __x, _A2 __y) _NOEXCEPT { return ::__builtin_copysign(__x, __y); } diff --git a/libcxx/include/__math/min_max.h b/libcxx/include/__math/min_max.h index 27997b4..db900c8 100644 --- a/libcxx/include/__math/min_max.h +++ b/libcxx/include/__math/min_max.h @@ -25,21 +25,21 @@ namespace __math { // fmax -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fmax(float __x, float __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fmax(float __x, float __y) _NOEXCEPT { return __builtin_fmaxf(__x, __y); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fmax(double __x, double __y) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fmax(double __x, double __y) _NOEXCEPT { return __builtin_fmax(__x, __y); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fmax(long double __x, long double __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fmax(long double __x, long double __y) _NOEXCEPT { return __builtin_fmaxl(__x, __y); } template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmax(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmax(_A1 __x, _A2 __y) _NOEXCEPT { using __result_type = typename __promote<_A1, _A2>::type; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::fmax((__result_type)__x, (__result_type)__y); @@ -47,21 +47,21 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::typ // fmin -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fmin(float __x, float __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fmin(float __x, float __y) _NOEXCEPT { return __builtin_fminf(__x, __y); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fmin(double __x, double __y) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fmin(double __x, double __y) _NOEXCEPT { return __builtin_fmin(__x, __y); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fmin(long double __x, long double __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fmin(long double __x, long double __y) _NOEXCEPT { return __builtin_fminl(__x, __y); } template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmin(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmin(_A1 __x, _A2 __y) _NOEXCEPT { using __result_type = typename __promote<_A1, _A2>::type; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::fmin((__result_type)__x, (__result_type)__y); diff --git a/libcxx/include/__math/roots.h b/libcxx/include/__math/roots.h index 359fd74..cef376f 100644 --- a/libcxx/include/__math/roots.h +++ b/libcxx/include/__math/roots.h @@ -39,19 +39,19 @@ inline _LIBCPP_HIDE_FROM_ABI double sqrt(_A1 __x) _NOEXCEPT { // cbrt -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float cbrt(float __x) _NOEXCEPT { return __builtin_cbrtf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float cbrt(float __x) _NOEXCEPT { return __builtin_cbrtf(__x); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double cbrt(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double cbrt(double __x) _NOEXCEPT { return __builtin_cbrt(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double cbrt(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double cbrt(long double __x) _NOEXCEPT { return __builtin_cbrtl(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double cbrt(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double cbrt(_A1 __x) _NOEXCEPT { return __builtin_cbrt((double)__x); } diff --git a/libcxx/include/__math/rounding_functions.h b/libcxx/include/__math/rounding_functions.h index f7246ba..474f585 100644 --- a/libcxx/include/__math/rounding_functions.h +++ b/libcxx/include/__math/rounding_functions.h @@ -26,37 +26,37 @@ namespace __math { // ceil -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float ceil(float __x) _NOEXCEPT { return __builtin_ceilf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float ceil(float __x) _NOEXCEPT { return __builtin_ceilf(__x); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double ceil(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double ceil(double __x) _NOEXCEPT { return __builtin_ceil(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double ceil(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double ceil(long double __x) _NOEXCEPT { return __builtin_ceill(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double ceil(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double ceil(_A1 __x) _NOEXCEPT { return __builtin_ceil((double)__x); } // floor -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float floor(float __x) _NOEXCEPT { return __builtin_floorf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float floor(float __x) _NOEXCEPT { return __builtin_floorf(__x); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double floor(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double floor(double __x) _NOEXCEPT { return __builtin_floor(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double floor(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double floor(long double __x) _NOEXCEPT { return __builtin_floorl(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double floor(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double floor(_A1 __x) _NOEXCEPT { return __builtin_floor((double)__x); } @@ -126,21 +126,21 @@ inline _LIBCPP_HIDE_FROM_ABI long lround(_A1 __x) _NOEXCEPT { // nearbyint -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float nearbyint(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float nearbyint(float __x) _NOEXCEPT { return __builtin_nearbyintf(__x); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double nearbyint(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double nearbyint(double __x) _NOEXCEPT { return __builtin_nearbyint(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double nearbyint(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double nearbyint(long double __x) _NOEXCEPT { return __builtin_nearbyintl(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double nearbyint(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double nearbyint(_A1 __x) _NOEXCEPT { return __builtin_nearbyint((double)__x); } @@ -186,55 +186,55 @@ inline _LIBCPP_HIDE_FROM_ABI double nexttoward(_A1 __x, long double __y) _NOEXCE // rint -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float rint(float __x) _NOEXCEPT { return __builtin_rintf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float rint(float __x) _NOEXCEPT { return __builtin_rintf(__x); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double rint(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double rint(double __x) _NOEXCEPT { return __builtin_rint(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double rint(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double rint(long double __x) _NOEXCEPT { return __builtin_rintl(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double rint(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double rint(_A1 __x) _NOEXCEPT { return __builtin_rint((double)__x); } // round -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float round(float __x) _NOEXCEPT { return __builtin_round(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float round(float __x) _NOEXCEPT { return __builtin_round(__x); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double round(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double round(double __x) _NOEXCEPT { return __builtin_round(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double round(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double round(long double __x) _NOEXCEPT { return __builtin_roundl(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double round(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double round(_A1 __x) _NOEXCEPT { return __builtin_round((double)__x); } // trunc -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float trunc(float __x) _NOEXCEPT { return __builtin_trunc(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float trunc(float __x) _NOEXCEPT { return __builtin_trunc(__x); } template <class = int> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double trunc(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double trunc(double __x) _NOEXCEPT { return __builtin_trunc(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double trunc(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double trunc(long double __x) _NOEXCEPT { return __builtin_truncl(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double trunc(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double trunc(_A1 __x) _NOEXCEPT { return __builtin_trunc((double)__x); } diff --git a/libcxx/include/__math/traits.h b/libcxx/include/__math/traits.h index 3d4f14f..0c96f76 100644 --- a/libcxx/include/__math/traits.h +++ b/libcxx/include/__math/traits.h @@ -12,7 +12,6 @@ #include <__config> #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> -#include <__type_traits/is_floating_point.h> #include <__type_traits/is_integral.h> #include <__type_traits/is_signed.h> #include <__type_traits/promote.h> @@ -34,52 +33,65 @@ namespace __math { # define _LIBCPP_SIGNBIT_CONSTEXPR #endif -template <class _A1, __enable_if_t<is_floating_point<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { +// The universal C runtime (UCRT) in the WinSDK provides floating point overloads +// for std::signbit(). By defining our overloads as templates, we can work around +// this issue as templates are less preferred than non-template functions. +template <class = void> +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT { + return __builtin_signbit(__x); +} + +template <class = void> +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT { + return __builtin_signbit(__x); +} + +template <class = void> +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT { return __builtin_signbit(__x); } template <class _A1, __enable_if_t<is_integral<_A1>::value && is_signed<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { return __x < 0; } template <class _A1, __enable_if_t<is_integral<_A1>::value && !is_signed<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1) _NOEXCEPT { return false; } // isfinite template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1) _NOEXCEPT { return true; } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(float __x) _NOEXCEPT { return __builtin_isfinite(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(double __x) _NOEXCEPT { return __builtin_isfinite(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(long double __x) _NOEXCEPT { return __builtin_isfinite(__x); } // isinf template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1) _NOEXCEPT { return false; } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(float __x) _NOEXCEPT { return __builtin_isinf(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI #ifdef _LIBCPP_PREFERRED_OVERLOAD _LIBCPP_PREFERRED_OVERLOAD #endif @@ -88,22 +100,22 @@ _LIBCPP_PREFERRED_OVERLOAD return __builtin_isinf(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(long double __x) _NOEXCEPT { return __builtin_isinf(__x); } // isnan template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1) _NOEXCEPT { return false; } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(float __x) _NOEXCEPT { return __builtin_isnan(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI #ifdef _LIBCPP_PREFERRED_OVERLOAD _LIBCPP_PREFERRED_OVERLOAD #endif @@ -112,33 +124,33 @@ _LIBCPP_PREFERRED_OVERLOAD return __builtin_isnan(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(long double __x) _NOEXCEPT { return __builtin_isnan(__x); } // isnormal template <class _A1, __enable_if_t<is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) _NOEXCEPT { return __x != 0; } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(float __x) _NOEXCEPT { return __builtin_isnormal(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(double __x) _NOEXCEPT { return __builtin_isnormal(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(long double __x) _NOEXCEPT { return __builtin_isnormal(__x); } // isgreater template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_isgreater((type)__x, (type)__y); } @@ -146,7 +158,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) // isgreaterequal template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_isgreaterequal((type)__x, (type)__y); } @@ -154,7 +166,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 // isless template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_isless((type)__x, (type)__y); } @@ -162,7 +174,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NO // islessequal template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_islessequal((type)__x, (type)__y); } @@ -170,7 +182,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y // islessgreater template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_islessgreater((type)__x, (type)__y); } @@ -178,7 +190,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 _ // isunordered template <class _A1, class _A2, __enable_if_t<is_arithmetic<_A1>::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_isunordered((type)__x, (type)__y); } diff --git a/libcxx/include/__memory/allocate_at_least.h b/libcxx/include/__memory/allocate_at_least.h index df73d9a..a10e4fba 100644 --- a/libcxx/include/__memory/allocate_at_least.h +++ b/libcxx/include/__memory/allocate_at_least.h @@ -35,7 +35,7 @@ struct __allocation_result { }; template <class _Alloc> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __allocation_result<typename allocator_traits<_Alloc>::pointer> __allocate_at_least(_Alloc& __alloc, size_t __n) { return {__alloc.allocate(__n), __n}; diff --git a/libcxx/include/__memory/allocator.h b/libcxx/include/__memory/allocator.h index 6a9eed9..cd146da 100644 --- a/libcxx/include/__memory/allocator.h +++ b/libcxx/include/__memory/allocator.h @@ -93,7 +93,7 @@ public: template <class _Up> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator(const allocator<_Up>&) _NOEXCEPT {} - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* allocate(size_t __n) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* allocate(size_t __n) { static_assert(sizeof(_Tp) >= 0, "cannot allocate memory for an incomplete type"); if (__n > allocator_traits<allocator>::max_size(*this)) __throw_bad_array_new_length(); @@ -138,7 +138,7 @@ public: return std::addressof(__x); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 _Tp* allocate(size_t __n, const void*) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 _Tp* allocate(size_t __n, const void*) { return allocate(__n); } diff --git a/libcxx/include/__memory/allocator_traits.h b/libcxx/include/__memory/allocator_traits.h index c5fcc89..082c36d 100644 --- a/libcxx/include/__memory/allocator_traits.h +++ b/libcxx/include/__memory/allocator_traits.h @@ -275,13 +275,13 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits { }; #endif // _LIBCPP_CXX03_LANG - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer allocate(allocator_type& __a, size_type __n) { return __a.allocate(__n); } template <class _Ap = _Alloc, __enable_if_t<__has_allocate_hint<_Ap, size_type, const_void_pointer>::value, int> = 0> - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer allocate(allocator_type& __a, size_type __n, const_void_pointer __hint) { _LIBCPP_SUPPRESS_DEPRECATED_PUSH return __a.allocate(__n, __hint); @@ -290,7 +290,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits { template <class _Ap = _Alloc, class = void, __enable_if_t<!__has_allocate_hint<_Ap, size_type, const_void_pointer>::value, int> = 0> - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer allocate(allocator_type& __a, size_type __n, const_void_pointer) { return __a.allocate(__n); } diff --git a/libcxx/include/__memory/assume_aligned.h b/libcxx/include/__memory/assume_aligned.h index 526eb33..c7ba2a9 100644 --- a/libcxx/include/__memory/assume_aligned.h +++ b/libcxx/include/__memory/assume_aligned.h @@ -23,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <size_t _Np, class _Tp> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __assume_aligned(_Tp* __ptr) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __assume_aligned(_Tp* __ptr) { static_assert(_Np != 0 && (_Np & (_Np - 1)) == 0, "std::assume_aligned<N>(p) requires N to be a power of two"); if (__libcpp_is_constant_evaluated()) { diff --git a/libcxx/include/__memory/temporary_buffer.h b/libcxx/include/__memory/temporary_buffer.h index 88799ca..633c9dc 100644 --- a/libcxx/include/__memory/temporary_buffer.h +++ b/libcxx/include/__memory/temporary_buffer.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI _LIBCPP_DEPRECATED_IN_CXX17 pair<_Tp*, ptrdiff_t> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_CFI _LIBCPP_DEPRECATED_IN_CXX17 pair<_Tp*, ptrdiff_t> get_temporary_buffer(ptrdiff_t __n) _NOEXCEPT { pair<_Tp*, ptrdiff_t> __r(0, 0); const ptrdiff_t __m = diff --git a/libcxx/include/__mutex/lock_guard.h b/libcxx/include/__mutex/lock_guard.h index ef56896..50765cd 100644 --- a/libcxx/include/__mutex/lock_guard.h +++ b/libcxx/include/__mutex/lock_guard.h @@ -27,13 +27,13 @@ private: mutex_type& __m_; public: - _LIBCPP_NODISCARD + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI explicit lock_guard(mutex_type& __m) _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability(__m)) : __m_(__m) { __m_.lock(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI lock_guard(mutex_type& __m, adopt_lock_t) + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI lock_guard(mutex_type& __m, adopt_lock_t) _LIBCPP_THREAD_SAFETY_ANNOTATION(requires_capability(__m)) : __m_(__m) {} _LIBCPP_HIDE_FROM_ABI ~lock_guard() _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()) { __m_.unlock(); } diff --git a/libcxx/include/__mutex/unique_lock.h b/libcxx/include/__mutex/unique_lock.h index db506f3..c404921 100644 --- a/libcxx/include/__mutex/unique_lock.h +++ b/libcxx/include/__mutex/unique_lock.h @@ -34,28 +34,28 @@ private: bool __owns_; public: - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {} - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI explicit unique_lock(mutex_type& __m) + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {} + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI explicit unique_lock(mutex_type& __m) : __m_(std::addressof(__m)), __owns_(true) { __m_->lock(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT : __m_(std::addressof(__m)), __owns_(false) {} - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, try_to_lock_t) + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, try_to_lock_t) : __m_(std::addressof(__m)), __owns_(__m.try_lock()) {} - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, adopt_lock_t) + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, adopt_lock_t) : __m_(std::addressof(__m)), __owns_(true) {} template <class _Clock, class _Duration> - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t) + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t) : __m_(std::addressof(__m)), __owns_(__m.try_lock_until(__t)) {} template <class _Rep, class _Period> - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d) + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d) : __m_(std::addressof(__m)), __owns_(__m.try_lock_for(__d)) {} _LIBCPP_HIDE_FROM_ABI ~unique_lock() { @@ -66,7 +66,7 @@ public: unique_lock(unique_lock const&) = delete; unique_lock& operator=(unique_lock const&) = delete; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI unique_lock(unique_lock&& __u) _NOEXCEPT + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI unique_lock(unique_lock&& __u) _NOEXCEPT : __m_(__u.__m_), __owns_(__u.__owns_) { __u.__m_ = nullptr; diff --git a/libcxx/include/__ranges/elements_view.h b/libcxx/include/__ranges/elements_view.h index f159f53..989d36f 100644 --- a/libcxx/include/__ranges/elements_view.h +++ b/libcxx/include/__ranges/elements_view.h @@ -16,7 +16,7 @@ #include <__concepts/derived_from.h> #include <__concepts/equality_comparable.h> #include <__config> -#include <__fwd/complex.h> +#include <__fwd/get.h> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__ranges/access.h> diff --git a/libcxx/include/__utility/forward.h b/libcxx/include/__utility/forward.h index d5275dc..66740664 100644 --- a/libcxx/include/__utility/forward.h +++ b/libcxx/include/__utility/forward.h @@ -21,13 +21,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& forward(_LIBCPP_LIFETIMEBOUND __libcpp_remove_reference_t<_Tp>& __t) _NOEXCEPT { return static_cast<_Tp&&>(__t); } template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& forward(_LIBCPP_LIFETIMEBOUND __libcpp_remove_reference_t<_Tp>&& __t) _NOEXCEPT { static_assert(!is_lvalue_reference<_Tp>::value, "cannot forward an rvalue as an lvalue"); return static_cast<_Tp&&>(__t); diff --git a/libcxx/include/__utility/move.h b/libcxx/include/__utility/move.h index b6a42db..66aec5a 100644 --- a/libcxx/include/__utility/move.h +++ b/libcxx/include/__utility/move.h @@ -26,7 +26,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __libcpp_remove_reference_t<_Tp>&& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __libcpp_remove_reference_t<_Tp>&& move(_LIBCPP_LIFETIMEBOUND _Tp&& __t) _NOEXCEPT { typedef _LIBCPP_NODEBUG __libcpp_remove_reference_t<_Tp> _Up; return static_cast<_Up&&>(__t); @@ -37,7 +37,7 @@ using __move_if_noexcept_result_t = __conditional_t<!is_nothrow_move_constructible<_Tp>::value && is_copy_constructible<_Tp>::value, const _Tp&, _Tp&&>; template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __move_if_noexcept_result_t<_Tp> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __move_if_noexcept_result_t<_Tp> move_if_noexcept(_LIBCPP_LIFETIMEBOUND _Tp& __x) _NOEXCEPT { return std::move(__x); } diff --git a/libcxx/include/array b/libcxx/include/array index 588664ac..fde7a70 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -232,7 +232,7 @@ struct _LIBCPP_TEMPLATE_VIS array { // capacity: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type size() const _NOEXCEPT { return _Size; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type max_size() const _NOEXCEPT { return _Size; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return _Size == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return _Size == 0; } // element access: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference operator[](size_type __n) _NOEXCEPT { @@ -340,7 +340,7 @@ struct _LIBCPP_TEMPLATE_VIS array<_Tp, 0> { // capacity: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type size() const _NOEXCEPT { return 0; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR size_type max_size() const _NOEXCEPT { return 0; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return true; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return true; } // element access: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 reference operator[](size_type) _NOEXCEPT { diff --git a/libcxx/include/barrier b/libcxx/include/barrier index ba29ebc..abc014e 100644 --- a/libcxx/include/barrier +++ b/libcxx/include/barrier @@ -125,7 +125,7 @@ public: __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0) {} - _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) { + [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) { _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( __update <= __expected_, "update is greater than the expected count for the current barrier phase"); @@ -277,7 +277,7 @@ public: barrier(barrier const&) = delete; barrier& operator=(barrier const&) = delete; - _LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) { + [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) { _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0"); return __b_.arrive(__update); } diff --git a/libcxx/include/deque b/libcxx/include/deque index 759de5d..f2f6122 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -739,7 +739,7 @@ public: _LIBCPP_HIDE_FROM_ABI void resize(size_type __n); _LIBCPP_HIDE_FROM_ABI void resize(size_type __n, const value_type& __v); _LIBCPP_HIDE_FROM_ABI void shrink_to_fit() _NOEXCEPT; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; } // element access: _LIBCPP_HIDE_FROM_ABI reference operator[](size_type __i) _NOEXCEPT; diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 3187b11..9a80413 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -756,7 +756,7 @@ public: return const_iterator(base::__before_begin()); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return base::__before_begin()->__next_ == nullptr; } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { diff --git a/libcxx/include/future b/libcxx/include/future index 9158ea3..8eadbcb 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -1845,7 +1845,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool __does_policy_contain(launch __policy, launch } template <class _Fp, class... _Args> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI future<typename __invoke_of<__decay_t<_Fp>, __decay_t<_Args>...>::type> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI future<typename __invoke_of<__decay_t<_Fp>, __decay_t<_Args>...>::type> async(launch __policy, _Fp&& __f, _Args&&... __args) { typedef __async_func<__decay_t<_Fp>, __decay_t<_Args>...> _BF; typedef typename _BF::_Rp _Rp; @@ -1870,7 +1870,7 @@ async(launch __policy, _Fp&& __f, _Args&&... __args) { } template <class _Fp, class... _Args> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI future<typename __invoke_of<__decay_t<_Fp>, __decay_t<_Args>...>::type> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI future<typename __invoke_of<__decay_t<_Fp>, __decay_t<_Args>...>::type> async(_Fp&& __f, _Args&&... __args) { return std::async(launch::any, std::forward<_Fp>(__f), std::forward<_Args>(__args)...); } diff --git a/libcxx/include/limits b/libcxx/include/limits index d55c7cd..b85c662 100644 --- a/libcxx/include/limits +++ b/libcxx/include/limits @@ -137,9 +137,9 @@ protected: typedef _Tp type; static _LIBCPP_CONSTEXPR const bool is_specialized = false; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return type(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return type(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return type(); } static _LIBCPP_CONSTEXPR const int digits = 0; static _LIBCPP_CONSTEXPR const int digits10 = 0; @@ -148,8 +148,8 @@ protected: static _LIBCPP_CONSTEXPR const bool is_integer = false; static _LIBCPP_CONSTEXPR const bool is_exact = false; static _LIBCPP_CONSTEXPR const int radix = 0; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(); } static _LIBCPP_CONSTEXPR const int min_exponent = 0; static _LIBCPP_CONSTEXPR const int min_exponent10 = 0; @@ -161,10 +161,10 @@ protected: static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = false; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(); } static _LIBCPP_CONSTEXPR const bool is_iec559 = false; static _LIBCPP_CONSTEXPR const bool is_bounded = false; @@ -198,15 +198,15 @@ protected: static _LIBCPP_CONSTEXPR const int max_digits10 = 0; static _LIBCPP_CONSTEXPR const type __min = __libcpp_compute_min<type, digits, is_signed>::value; static _LIBCPP_CONSTEXPR const type __max = is_signed ? type(type(~0) ^ __min) : type(~0); - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); } static _LIBCPP_CONSTEXPR const bool is_integer = true; static _LIBCPP_CONSTEXPR const bool is_exact = true; static _LIBCPP_CONSTEXPR const int radix = 2; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); } static _LIBCPP_CONSTEXPR const int min_exponent = 0; static _LIBCPP_CONSTEXPR const int min_exponent10 = 0; @@ -218,10 +218,10 @@ protected: static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = false; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); } static _LIBCPP_CONSTEXPR const bool is_iec559 = false; static _LIBCPP_CONSTEXPR const bool is_bounded = true; @@ -249,15 +249,15 @@ protected: static _LIBCPP_CONSTEXPR const int max_digits10 = 0; static _LIBCPP_CONSTEXPR const type __min = false; static _LIBCPP_CONSTEXPR const type __max = true; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __min; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __max; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return min(); } static _LIBCPP_CONSTEXPR const bool is_integer = true; static _LIBCPP_CONSTEXPR const bool is_exact = true; static _LIBCPP_CONSTEXPR const int radix = 2; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return type(0); } static _LIBCPP_CONSTEXPR const int min_exponent = 0; static _LIBCPP_CONSTEXPR const int min_exponent10 = 0; @@ -269,10 +269,10 @@ protected: static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = false; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_absent; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return type(0); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return type(0); } static _LIBCPP_CONSTEXPR const bool is_iec559 = false; static _LIBCPP_CONSTEXPR const bool is_bounded = true; @@ -294,15 +294,15 @@ protected: static _LIBCPP_CONSTEXPR const int digits = __FLT_MANT_DIG__; static _LIBCPP_CONSTEXPR const int digits10 = __FLT_DIG__; static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __FLT_MIN__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __FLT_MAX__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __FLT_MIN__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __FLT_MAX__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); } static _LIBCPP_CONSTEXPR const bool is_integer = false; static _LIBCPP_CONSTEXPR const bool is_exact = false; static _LIBCPP_CONSTEXPR const int radix = __FLT_RADIX__; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __FLT_EPSILON__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5F; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __FLT_EPSILON__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5F; } static _LIBCPP_CONSTEXPR const int min_exponent = __FLT_MIN_EXP__; static _LIBCPP_CONSTEXPR const int min_exponent10 = __FLT_MIN_10_EXP__; @@ -314,16 +314,16 @@ protected: static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = true; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_valf(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nanf(""); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nansf(""); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __FLT_DENORM_MIN__; } @@ -351,15 +351,15 @@ protected: static _LIBCPP_CONSTEXPR const int digits = __DBL_MANT_DIG__; static _LIBCPP_CONSTEXPR const int digits10 = __DBL_DIG__; static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __DBL_MIN__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __DBL_MAX__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __DBL_MIN__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __DBL_MAX__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); } static _LIBCPP_CONSTEXPR const bool is_integer = false; static _LIBCPP_CONSTEXPR const bool is_exact = false; static _LIBCPP_CONSTEXPR const int radix = __FLT_RADIX__; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __DBL_EPSILON__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __DBL_EPSILON__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5; } static _LIBCPP_CONSTEXPR const int min_exponent = __DBL_MIN_EXP__; static _LIBCPP_CONSTEXPR const int min_exponent10 = __DBL_MIN_10_EXP__; @@ -371,16 +371,16 @@ protected: static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = true; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_val(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nan(""); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nans(""); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __DBL_DENORM_MIN__; } @@ -408,15 +408,15 @@ protected: static _LIBCPP_CONSTEXPR const int digits = __LDBL_MANT_DIG__; static _LIBCPP_CONSTEXPR const int digits10 = __LDBL_DIG__; static _LIBCPP_CONSTEXPR const int max_digits10 = 2 + (digits * 30103l) / 100000l; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __LDBL_MIN__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __LDBL_MAX__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __LDBL_MIN__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __LDBL_MAX__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return -max(); } static _LIBCPP_CONSTEXPR const bool is_integer = false; static _LIBCPP_CONSTEXPR const bool is_exact = false; static _LIBCPP_CONSTEXPR const int radix = __FLT_RADIX__; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __LDBL_EPSILON__; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5L; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __LDBL_EPSILON__; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return 0.5L; } static _LIBCPP_CONSTEXPR const int min_exponent = __LDBL_MIN_EXP__; static _LIBCPP_CONSTEXPR const int min_exponent10 = __LDBL_MIN_10_EXP__; @@ -428,16 +428,16 @@ protected: static _LIBCPP_CONSTEXPR const bool has_signaling_NaN = true; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = denorm_present; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = false; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __builtin_huge_vall(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __builtin_nanl(""); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __builtin_nansl(""); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __LDBL_DENORM_MIN__; } @@ -465,9 +465,9 @@ class _LIBCPP_TEMPLATE_VIS numeric_limits : private __libcpp_numeric_limits<_Tp> public: static _LIBCPP_CONSTEXPR const bool is_specialized = __base::is_specialized; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __base::min(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __base::max(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return __base::lowest(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type min() _NOEXCEPT { return __base::min(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type max() _NOEXCEPT { return __base::max(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type lowest() _NOEXCEPT { return __base::lowest(); } static _LIBCPP_CONSTEXPR const int digits = __base::digits; static _LIBCPP_CONSTEXPR const int digits10 = __base::digits10; @@ -476,10 +476,10 @@ public: static _LIBCPP_CONSTEXPR const bool is_integer = __base::is_integer; static _LIBCPP_CONSTEXPR const bool is_exact = __base::is_exact; static _LIBCPP_CONSTEXPR const int radix = __base::radix; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type epsilon() _NOEXCEPT { return __base::epsilon(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type round_error() _NOEXCEPT { return __base::round_error(); } @@ -495,16 +495,16 @@ public: static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const float_denorm_style has_denorm = __base::has_denorm; static _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_CONSTEXPR const bool has_denorm_loss = __base::has_denorm_loss; _LIBCPP_SUPPRESS_DEPRECATED_POP - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type infinity() _NOEXCEPT { return __base::infinity(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type quiet_NaN() _NOEXCEPT { return __base::quiet_NaN(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type signaling_NaN() _NOEXCEPT { return __base::signaling_NaN(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR type denorm_min() _NOEXCEPT { return __base::denorm_min(); } diff --git a/libcxx/include/list b/libcxx/include/list index 2aa7744..dc3b679 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -755,7 +755,7 @@ public: _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return base::__sz(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return base::empty(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return base::empty(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return std::min<size_type>(base::__node_alloc_max_size(), numeric_limits<difference_type >::max()); } diff --git a/libcxx/include/map b/libcxx/include/map index 02bd17c..5d97538 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -1144,7 +1144,7 @@ public: _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); } _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); } @@ -1824,7 +1824,7 @@ public: _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); } _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); } diff --git a/libcxx/include/math.h b/libcxx/include/math.h index 4e6304a..509ecc4 100644 --- a/libcxx/include/math.h +++ b/libcxx/include/math.h @@ -388,22 +388,22 @@ namespace __math { // template on non-double overloads to make them weaker than same overloads from MSVC runtime template <class = int> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI int fpclassify(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI int fpclassify(float __x) _NOEXCEPT { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } template <class = int> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI int fpclassify(double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI int fpclassify(double __x) _NOEXCEPT { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } template <class = int> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI int fpclassify(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI int fpclassify(long double __x) _NOEXCEPT { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } template <class _A1, std::__enable_if_t<std::is_integral<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI int fpclassify(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI int fpclassify(_A1 __x) _NOEXCEPT { return __x == 0 ? FP_ZERO : FP_NORMAL; } diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index add8726d..cc41912 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1813,6 +1813,15 @@ module std_private_tuple_tuple_like_no_subrange [system] { module std_private_tuple_sfinae_helpers [system] { header "__tuple/sfinae_helpers.h" } module std_private_tuple_tuple_element [system] { header "__tuple/tuple_element.h" } module std_private_tuple_tuple_fwd [system] { header "__fwd/tuple.h" } +module std_private_get_fwd [system] { + header "__fwd/get.h" + export std_private_array_array_fwd + export std_private_complex_complex_fwd + export std_private_ranges_subrange_fwd + export std_private_tuple_tuple_fwd + export std_private_utility_pair_fwd + export std_private_variant_fwd +} module std_private_tuple_tuple_indices [system] { header "__tuple/tuple_indices.h" } module std_private_tuple_tuple_like [system] { header "__tuple/tuple_like.h" @@ -2103,5 +2112,6 @@ module std_private_utility_to_underlying [system] { header "__utility/ module std_private_utility_unreachable [system] { header "__utility/unreachable.h" } module std_private_variant_monostate [system] { header "__variant/monostate.h" } +module std_private_variant_fwd [system] { header "__fwd/variant.h" } module std_private_vector_fwd [system] { header "__fwd/vector.h" } diff --git a/libcxx/include/new b/libcxx/include/new index 207e4b4..3252b0b 100644 --- a/libcxx/include/new +++ b/libcxx/include/new @@ -203,8 +203,8 @@ inline constexpr destroying_delete_t destroying_delete{}; #if !defined(_LIBCPP_ABI_VCRUNTIME) -_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz) _THROW_BAD_ALLOC; -_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, const std::nothrow_t&) _NOEXCEPT +[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz) _THROW_BAD_ALLOC; +[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, const std::nothrow_t&) _NOEXCEPT; @@ -212,8 +212,8 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, const std::nothrow_ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz) _NOEXCEPT; # endif -_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz) _THROW_BAD_ALLOC; -_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, const std::nothrow_t&) _NOEXCEPT +[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz) _THROW_BAD_ALLOC; +[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, const std::nothrow_t&) _NOEXCEPT; @@ -222,8 +222,8 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz) # endif # ifndef _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION -_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; -_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* +[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; +[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; @@ -231,9 +231,9 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, c _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; # endif -_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* +[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; -_LIBCPP_NODISCARD _LIBCPP_OVERRIDABLE_FUNC_VIS void* +[[__nodiscard__]] _LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _LIBCPP_NOALIAS; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t) _NOEXCEPT; _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; @@ -242,11 +242,11 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::size_t __sz, # endif # endif -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void* +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void* operator new(std::size_t, void* __p) _NOEXCEPT { return __p; } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void* +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void* operator new[](std::size_t, void* __p) _NOEXCEPT { return __p; } @@ -334,7 +334,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __libcpp_deallocate_unsized(void* __ptr, size_ } template <class _Tp> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp* __launder(_Tp* __p) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp* __launder(_Tp* __p) _NOEXCEPT { static_assert(!(is_function<_Tp>::value), "can't launder functions"); static_assert(!(is_same<void, __remove_cv_t<_Tp> >::value), "can't launder cv-void"); return __builtin_launder(__p); diff --git a/libcxx/include/queue b/libcxx/include/queue index 9508de9..db9ad26 100644 --- a/libcxx/include/queue +++ b/libcxx/include/queue @@ -372,7 +372,7 @@ public: _LIBCPP_HIDE_FROM_ABI queue(queue&& __q, const _Alloc& __a) : c(std::move(__q.c), __a) {} #endif // _LIBCPP_CXX03_LANG - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); } _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); } _LIBCPP_HIDE_FROM_ABI reference front() { return c.front(); } @@ -416,7 +416,7 @@ public: swap(c, __q.c); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; } template <class _T1, class _OtherContainer> friend _LIBCPP_HIDE_FROM_ABI bool @@ -649,7 +649,7 @@ public: #endif - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); } _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); } _LIBCPP_HIDE_FROM_ABI const_reference top() const { return c.front(); } @@ -678,7 +678,7 @@ public: _LIBCPP_HIDE_FROM_ABI void swap(priority_queue& __q) _NOEXCEPT_(__is_nothrow_swappable_v<container_type>&& __is_nothrow_swappable_v<value_compare>); - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; } }; #if _LIBCPP_STD_VER >= 17 diff --git a/libcxx/include/regex b/libcxx/include/regex index d59abb8..d6b8768 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -4577,7 +4577,7 @@ public: // size: _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __matches_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __matches_.max_size(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return size() == 0; } // element access: _LIBCPP_HIDE_FROM_ABI difference_type length(size_type __sub = 0) const { diff --git a/libcxx/include/scoped_allocator b/libcxx/include/scoped_allocator index a49ff46..13e43c2 100644 --- a/libcxx/include/scoped_allocator +++ b/libcxx/include/scoped_allocator @@ -389,10 +389,10 @@ public: return _Base::outer_allocator(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI pointer allocate(size_type __n) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pointer allocate(size_type __n) { return allocator_traits<outer_allocator_type>::allocate(outer_allocator(), __n); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI pointer allocate(size_type __n, const_void_pointer __hint) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pointer allocate(size_type __n, const_void_pointer __hint) { return allocator_traits<outer_allocator_type>::allocate(outer_allocator(), __n, __hint); } diff --git a/libcxx/include/set b/libcxx/include/set index 7e9661a..b614e04 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -713,7 +713,7 @@ public: _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); } _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); } @@ -1170,7 +1170,7 @@ public: _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); } _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); } diff --git a/libcxx/include/stack b/libcxx/include/stack index 90f8933..f75769f 100644 --- a/libcxx/include/stack +++ b/libcxx/include/stack @@ -231,7 +231,7 @@ public: #endif - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const { return c.empty(); } _LIBCPP_HIDE_FROM_ABI size_type size() const { return c.size(); } _LIBCPP_HIDE_FROM_ABI reference top() { return c.back(); } _LIBCPP_HIDE_FROM_ABI const_reference top() const { return c.back(); } @@ -273,7 +273,7 @@ public: swap(c, __s.c); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Container& __get_container() const { return c; } template <class _T1, class _OtherContainer> friend bool operator==(const stack<_T1, _OtherContainer>& __x, const stack<_T1, _OtherContainer>& __y); diff --git a/libcxx/include/stdlib.h b/libcxx/include/stdlib.h index a74344d..358b10c 100644 --- a/libcxx/include/stdlib.h +++ b/libcxx/include/stdlib.h @@ -110,19 +110,19 @@ extern "C++" { // MSVCRT already has the correct prototype in <stdlib.h> if __cplusplus is defined # if !defined(_LIBCPP_MSVCRT) -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long abs(long __x) _NOEXCEPT { return __builtin_labs(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long long abs(long long __x) _NOEXCEPT { return __builtin_llabs(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long abs(long __x) _NOEXCEPT { return __builtin_labs(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long long abs(long long __x) _NOEXCEPT { return __builtin_llabs(__x); } # endif // !defined(_LIBCPP_MSVCRT) -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float abs(float __lcpp_x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float abs(float __lcpp_x) _NOEXCEPT { return __builtin_fabsf(__lcpp_x); // Use builtins to prevent needing math.h } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double abs(double __lcpp_x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double abs(double __lcpp_x) _NOEXCEPT { return __builtin_fabs(__lcpp_x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double abs(long double __lcpp_x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double abs(long double __lcpp_x) _NOEXCEPT { return __builtin_fabsl(__lcpp_x); } diff --git a/libcxx/include/string b/libcxx/include/string index 46c5a5a..e8c9bce 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -1321,7 +1321,7 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void shrink_to_fit() _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void clear() _NOEXCEPT; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT { return size() == 0; } diff --git a/libcxx/include/string_view b/libcxx/include/string_view index cf97e3a..3b32117 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -396,7 +396,7 @@ public: return numeric_limits<size_type>::max() / sizeof(value_type); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return __size_ == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool empty() const _NOEXCEPT { return __size_ == 0; } // [string.view.access], element access _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI const_reference operator[](size_type __pos) const _NOEXCEPT { diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index 5937d4f..26c85f2 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -421,7 +421,6 @@ namespace std */ #include <__config> -#include <__fwd/functional.h> // This is https://llvm.org/PR56938 #include <__type_traits/add_const.h> #include <__type_traits/add_cv.h> #include <__type_traits/add_lvalue_reference.h> diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index 69314ba..0743b2e 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -1204,7 +1204,7 @@ public: return allocator_type(__table_.__node_alloc()); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; } _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); } @@ -2003,7 +2003,7 @@ public: return allocator_type(__table_.__node_alloc()); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; } _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); } diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index fb50f78..bd8d3ab 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -733,7 +733,7 @@ public: return allocator_type(__table_.__node_alloc()); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; } _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); } @@ -1327,7 +1327,7 @@ public: return allocator_type(__table_.__node_alloc()); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __table_.size() == 0; } _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __table_.size(); } _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __table_.max_size(); } diff --git a/libcxx/include/variant b/libcxx/include/variant index 1cac603..2fa5623 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -221,6 +221,7 @@ namespace std { #include <__functional/invoke.h> #include <__functional/operations.h> #include <__functional/unary_function.h> +#include <__fwd/variant.h> #include <__memory/addressof.h> #include <__memory/construct_at.h> #include <__tuple/find_index.h> @@ -307,15 +308,7 @@ __throw_bad_variant_access() { # endif } -template <class... _Types> -class _LIBCPP_TEMPLATE_VIS variant; - -template <class _Tp> -struct _LIBCPP_TEMPLATE_VIS variant_size; - -template <class _Tp> -inline constexpr size_t variant_size_v = variant_size<_Tp>::value; - +// variant_size template <class _Tp> struct _LIBCPP_TEMPLATE_VIS variant_size<const _Tp> : variant_size<_Tp> {}; @@ -328,12 +321,7 @@ struct _LIBCPP_TEMPLATE_VIS variant_size<const volatile _Tp> : variant_size<_Tp> template <class... _Types> struct _LIBCPP_TEMPLATE_VIS variant_size<variant<_Types...>> : integral_constant<size_t, sizeof...(_Types)> {}; -template <size_t _Ip, class _Tp> -struct _LIBCPP_TEMPLATE_VIS variant_alternative; - -template <size_t _Ip, class _Tp> -using variant_alternative_t = typename variant_alternative<_Ip, _Tp>::type; - +// variant_alternative template <size_t _Ip, class _Tp> struct _LIBCPP_TEMPLATE_VIS variant_alternative<_Ip, const _Tp> : add_const<variant_alternative_t<_Ip, _Tp>> {}; @@ -349,8 +337,6 @@ struct _LIBCPP_TEMPLATE_VIS variant_alternative<_Ip, variant<_Types...>> { using type = __type_pack_element<_Ip, _Types...>; }; -inline constexpr size_t variant_npos = static_cast<size_t>(-1); - template <size_t _NumAlternatives> _LIBCPP_HIDE_FROM_ABI constexpr auto __choose_index_type() { # ifdef _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION @@ -370,9 +356,6 @@ template <class _IndexType> constexpr _IndexType __variant_npos = static_cast<_IndexType>(-1); template <class... _Types> -class _LIBCPP_TEMPLATE_VIS variant; - -template <class... _Types> _LIBCPP_HIDE_FROM_ABI constexpr variant<_Types...>& __as_variant(variant<_Types...>& __vs) noexcept { return __vs; } diff --git a/libcxx/include/vector b/libcxx/include/vector index fc0a486..4720f8e 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -636,7 +636,7 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type capacity() const _NOEXCEPT { return static_cast<size_type>(__end_cap() - this->__begin_); } - _LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return this->__begin_ == this->__end_; } _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT; @@ -2033,7 +2033,7 @@ public: return __internal_cap_to_external(__cap()); } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type size() const _NOEXCEPT { return __size_; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT { return __size_ == 0; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void reserve(size_type __n); diff --git a/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp index d89984a..6636120 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp @@ -20,8 +20,8 @@ #include "min_allocator.h" TEST_CONSTEXPR_CXX20 bool test() { - test_hash_enabled_for_type<std::vector<bool> >(); - test_hash_enabled_for_type<std::vector<bool, min_allocator<bool>>>(); + test_hash_enabled<std::vector<bool> >(); + test_hash_enabled<std::vector<bool, min_allocator<bool>>>(); return true; } diff --git a/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp index 2aab698..e3eae8b 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp @@ -22,8 +22,8 @@ int main(int, char**) { test_library_hash_specializations_available(); { - test_hash_enabled_for_type<std::error_code>(); - test_hash_enabled_for_type<std::error_condition>(); + test_hash_enabled<std::error_code>(); + test_hash_enabled<std::error_condition>(); } return 0; diff --git a/libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp b/libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp index 7aa5dc8..fff5f9b 100644 --- a/libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.observer.ptr/hash.pass.cpp @@ -33,7 +33,7 @@ void test_hash() { assert(h == std::hash<T*>()(&obj)); } - test_hash_enabled_for_type<std::experimental::observer_ptr<T>>(); + test_hash_enabled<std::experimental::observer_ptr<T>>(); } struct Bar {}; diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp index dd28c8f..6cc64e1 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.hash_enabled.pass.cpp @@ -20,7 +20,7 @@ namespace fs = std::filesystem; int main(int, char**) { test_library_hash_specializations_available(); - test_hash_enabled_for_type<fs::path>(); + test_hash_enabled<fs::path>(); return 0; } diff --git a/libcxx/test/std/numerics/c.math/signbit.pass.cpp b/libcxx/test/std/numerics/c.math/signbit.pass.cpp index c85033e3..a8a566f 100644 --- a/libcxx/test/std/numerics/c.math/signbit.pass.cpp +++ b/libcxx/test/std/numerics/c.math/signbit.pass.cpp @@ -70,9 +70,22 @@ struct TestInt { } }; +template <typename T> +struct ConvertibleTo { + operator T() const { return T(); } +}; + int main(int, char**) { types::for_each(types::floating_point_types(), TestFloat()); types::for_each(types::integral_types(), TestInt()); + // Make sure we can call `std::signbit` with convertible types. This checks + // whether overloads for all cv-unqualified floating-point types are working + // as expected. + { + assert(!std::signbit(ConvertibleTo<float>())); + assert(!std::signbit(ConvertibleTo<double>())); + assert(!std::signbit(ConvertibleTo<long double>())); + } return 0; } diff --git a/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp b/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp index 611f95f..643c6bec 100644 --- a/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp +++ b/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp @@ -53,18 +53,18 @@ struct std::char_traits<MyChar> { int main(int, char**) { test_library_hash_specializations_available(); { - test_hash_enabled_for_type<std::string>(); + test_hash_enabled<std::string>(); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - test_hash_enabled_for_type<std::wstring>(); + test_hash_enabled<std::wstring>(); #endif #ifndef TEST_HAS_NO_CHAR8_T - test_hash_enabled_for_type<std::u8string>(); + test_hash_enabled<std::u8string>(); #endif - test_hash_enabled_for_type<std::u16string>(); - test_hash_enabled_for_type<std::u32string>(); - test_hash_enabled_for_type<std::basic_string<char, std::char_traits<char>, test_allocator<char>>>(); - test_hash_disabled_for_type<std::basic_string<MyChar, std::char_traits<MyChar>, std::allocator<MyChar>>>(); - test_hash_disabled_for_type<std::basic_string<char, constexpr_char_traits<char>, std::allocator<char>>>(); + test_hash_enabled<std::u16string>(); + test_hash_enabled<std::u32string>(); + test_hash_enabled<std::basic_string<char, std::char_traits<char>, test_allocator<char>>>(); + test_hash_disabled<std::basic_string<MyChar, std::char_traits<MyChar>, std::allocator<MyChar>>>(); + test_hash_disabled<std::basic_string<char, constexpr_char_traits<char>, std::allocator<char>>>(); } return 0; diff --git a/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp b/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp index b2ffd20..13abb94 100644 --- a/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp @@ -53,17 +53,17 @@ struct std::char_traits<MyChar> { int main(int, char**) { test_library_hash_specializations_available(); { - test_hash_enabled_for_type<std::string_view>(); + test_hash_enabled<std::string_view>(); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - test_hash_enabled_for_type<std::wstring_view>(); + test_hash_enabled<std::wstring_view>(); #endif #ifndef TEST_HAS_NO_CHAR8_T - test_hash_enabled_for_type<std::u8string_view>(); + test_hash_enabled<std::u8string_view>(); #endif - test_hash_enabled_for_type<std::u16string_view>(); - test_hash_enabled_for_type<std::u32string_view>(); - test_hash_disabled_for_type<std::basic_string_view<MyChar, std::char_traits<MyChar>>>(); - test_hash_disabled_for_type<std::basic_string_view<char, constexpr_char_traits<char>>>(); + test_hash_enabled<std::u16string_view>(); + test_hash_enabled<std::u32string_view>(); + test_hash_disabled<std::basic_string_view<MyChar, std::char_traits<MyChar>>>(); + test_hash_disabled<std::basic_string_view<char, constexpr_char_traits<char>>>(); } return 0; diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp index 62c8c74..98caff9 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp @@ -24,7 +24,7 @@ int main(int, char**) { test_library_hash_specializations_available(); { - test_hash_enabled_for_type<std::thread::id>(); + test_hash_enabled<std::thread::id>(); } return 0; diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp index 0c3915b..c6d54a8 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp @@ -40,8 +40,8 @@ int main(int, char**) } #if TEST_STD_VER >= 11 { - test_hash_enabled_for_type<std::shared_ptr<int>>(); - test_hash_enabled_for_type<std::shared_ptr<A>>(); + test_hash_enabled<std::shared_ptr<int>>(); + test_hash_enabled<std::shared_ptr<A>>(); } #endif diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp index 707038e..32fc949 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp @@ -35,16 +35,16 @@ void test_enabled_with_deleter() { using RawDel = typename std::decay<Del>::type; RawDel d(1); UPtr p(nullptr, std::forward<Del>(d)); - test_hash_enabled_for_type<UPtr>(p); - test_hash_enabled_for_type<pointer>(); + test_hash_enabled<UPtr>(p); + test_hash_enabled<pointer>(); } template <class ValueT, class Del> void test_disabled_with_deleter() { using UPtr = std::unique_ptr<ValueT, Del>; using pointer = typename UPtr::pointer; - test_hash_disabled_for_type<UPtr>(); - test_hash_disabled_for_type<pointer>(); + test_hash_disabled<UPtr>(); + test_hash_disabled<pointer>(); } template <class T> diff --git a/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp b/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp index ae14b57..54cf407 100644 --- a/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp @@ -63,16 +63,16 @@ int main(int, char**) assert(std::hash<optional<T>>{}(opt) == std::hash<T>{}(*opt)); } { - test_hash_enabled_for_type<std::optional<int> >(); - test_hash_enabled_for_type<std::optional<int*> >(); - test_hash_enabled_for_type<std::optional<const int> >(); - test_hash_enabled_for_type<std::optional<int* const> >(); + test_hash_enabled<std::optional<int> >(); + test_hash_enabled<std::optional<int*> >(); + test_hash_enabled<std::optional<const int> >(); + test_hash_enabled<std::optional<int* const> >(); - test_hash_disabled_for_type<std::optional<A>>(); - test_hash_disabled_for_type<std::optional<const A>>(); + test_hash_disabled<std::optional<A>>(); + test_hash_disabled<std::optional<const A>>(); - test_hash_enabled_for_type<std::optional<B>>(); - test_hash_enabled_for_type<std::optional<const B>>(); + test_hash_enabled<std::optional<B>>(); + test_hash_enabled<std::optional<const B>>(); } return 0; diff --git a/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp index 0e34a5f..c2dc2ca 100644 --- a/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp +++ b/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp @@ -22,10 +22,10 @@ int main(int, char**) { test_library_hash_specializations_available(); { - test_hash_enabled_for_type<std::bitset<0> >(); - test_hash_enabled_for_type<std::bitset<1> >(); - test_hash_enabled_for_type<std::bitset<1024> >(); - test_hash_enabled_for_type<std::bitset<100000> >(); + test_hash_enabled<std::bitset<0> >(); + test_hash_enabled<std::bitset<1> >(); + test_hash_enabled<std::bitset<1024> >(); + test_hash_enabled<std::bitset<100000> >(); } return 0; diff --git a/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp index a361758..9c0de17 100644 --- a/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp +++ b/libcxx/test/std/utilities/type.index/type.index.synopsis/hash_type_index.pass.cpp @@ -34,7 +34,7 @@ int main(int, char**) } #if TEST_STD_VER >= 11 { - test_hash_enabled_for_type<std::type_index>(std::type_index(typeid(int))); + test_hash_enabled<std::type_index>(std::type_index(typeid(int))); } #endif diff --git a/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp b/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp index ffd5f82..656b1d8 100644 --- a/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp @@ -103,7 +103,7 @@ void test_hash_monostate() { static_assert(std::is_copy_constructible<H>::value, ""); } { - test_hash_enabled_for_type<std::monostate>(); + test_hash_enabled<std::monostate>(); } } @@ -131,16 +131,16 @@ struct std::hash<B> { void test_hash_variant_enabled() { { - test_hash_enabled_for_type<std::variant<int> >(); - test_hash_enabled_for_type<std::variant<int*, long, double, const int> >(); + test_hash_enabled<std::variant<int> >(); + test_hash_enabled<std::variant<int*, long, double, const int> >(); } { - test_hash_disabled_for_type<std::variant<int, A>>(); - test_hash_disabled_for_type<std::variant<const A, void*>>(); + test_hash_disabled<std::variant<int, A>>(); + test_hash_disabled<std::variant<const A, void*>>(); } { - test_hash_enabled_for_type<std::variant<int, B>>(); - test_hash_enabled_for_type<std::variant<const B, int>>(); + test_hash_enabled<std::variant<int, B>>(); + test_hash_enabled<std::variant<const B, int>>(); } } diff --git a/libcxx/test/support/poisoned_hash_helper.h b/libcxx/test/support/poisoned_hash_helper.h index a073350..93b579d 100644 --- a/libcxx/test/support/poisoned_hash_helper.h +++ b/libcxx/test/support/poisoned_hash_helper.h @@ -10,131 +10,47 @@ #ifndef SUPPORT_POISONED_HASH_HELPER_H #define SUPPORT_POISONED_HASH_HELPER_H +#include <functional> #include <cassert> #include <cstddef> #include <type_traits> #include <utility> #include "test_macros.h" -#include "test_workarounds.h" +#include "type_algorithms.h" -#if TEST_STD_VER < 11 -#error this header may only be used in C++11 or newer -#endif - -template <class ...Args> struct TypeList; - -// Test that the specified Hash meets the requirements of an enabled hash -template <class Hash, class Key, class InputKey = Key> -TEST_CONSTEXPR_CXX20 void test_hash_enabled(InputKey const& key = InputKey{}); - -template <class T, class InputKey = T> -TEST_CONSTEXPR_CXX20 void test_hash_enabled_for_type(InputKey const& key = InputKey{}) { - return test_hash_enabled<std::hash<T>, T, InputKey>(key); +template <class Hash, class Key, class Res = decltype(std::declval<Hash&>()(std::declval<Key>()))> +constexpr bool can_hash_impl(int) { + return std::is_same<Res, std::size_t>::value; } - -// Test that the specified Hash meets the requirements of a disabled hash. -template <class Hash, class Key> -void test_hash_disabled(); - -template <class T> -void test_hash_disabled_for_type() { - return test_hash_disabled<std::hash<T>, T>(); +template <class, class> +constexpr bool can_hash_impl(long) { + return false; } - -namespace PoisonedHashDetail { - enum Enum {}; - enum EnumClass : bool {}; - struct Class {}; +template <class Hash, class Key> +constexpr bool can_hash() { + return can_hash_impl<Hash, Key>(0); } -// Each header that declares the template hash provides enabled -// specializations of hash for nullptr t and all cv-unqualified -// arithmetic, enumeration, and pointer types. -using LibraryHashTypes = TypeList< -#if TEST_STD_VER > 14 - decltype(nullptr), -#endif - bool, - char, - signed char, - unsigned char, -#ifndef TEST_HAS_NO_WIDE_CHARACTERS - wchar_t, -#endif - char16_t, - char32_t, - short, - unsigned short, - int, - unsigned int, - long, - unsigned long, - long long, - unsigned long long, -#ifndef TEST_HAS_NO_INT128 - __int128_t, - __uint128_t, -#endif - float, - double, - long double, - PoisonedHashDetail::Enum, - PoisonedHashDetail::EnumClass, - void*, - void const*, - PoisonedHashDetail::Class* - >; - - -// Test that each of the library hash specializations for arithmetic types, -// enum types, and pointer types are available and enabled. -template <class Types = LibraryHashTypes> -void test_library_hash_specializations_available(Types = Types{}); - - -namespace PoisonedHashDetail { - -template <class T, class = typename T::foo_bar_baz> -constexpr bool instantiate(int) { return true; } -template <class> constexpr bool instantiate(long) { return true; } -template <class T> constexpr bool instantiate() { return instantiate<T>(0); } - template <class To> struct ConvertibleToSimple { - operator To() const { - return To{}; - } + operator To() const { return To{}; } }; template <class To> struct ConvertibleTo { To to{}; operator To&() & { return to; } - operator To const&() const & { return to; } + operator To const&() const& { return to; } operator To&&() && { return std::move(to); } - operator To const&&() const && { return std::move(to); } + operator To const&&() const&& { return std::move(to); } }; -template <class Hasher, class Key, class Res = decltype(std::declval<Hasher&>()(std::declval<Key>()))> -constexpr bool can_hash(int) { - return std::is_same<Res, std::size_t>::value; -} -template <class, class> -constexpr bool can_hash(long) { - return false; -} -template <class Hasher, class Key> -constexpr bool can_hash() { - return can_hash<Hasher, Key>(0); -} -} // namespace PoisonedHashDetail - -template <class Hash, class Key, class InputKey> -TEST_CONSTEXPR_CXX20 void test_hash_enabled(InputKey const& key) { - using namespace PoisonedHashDetail; - +// Test that the specified Hash meets the requirements of an enabled hash +template <class Key, class Hash = std::hash<Key>> +TEST_CONSTEXPR_CXX20 void test_hash_enabled(Key const& key = Key{}) { static_assert(std::is_destructible<Hash>::value, ""); + // Enabled hash requirements static_assert(std::is_default_constructible<Hash>::value, ""); static_assert(std::is_copy_constructible<Hash>::value, ""); @@ -167,13 +83,11 @@ TEST_CONSTEXPR_CXX20 void test_hash_enabled(InputKey const& key) { const Hash h{}; assert(h(key) == h(key)); - } -template <class Hash, class Key> +// Test that the specified Hash meets the requirements of a disabled hash. +template <class Key, class Hash = std::hash<Key>> void test_hash_disabled() { - using namespace PoisonedHashDetail; - // Disabled hash requirements static_assert(!std::is_default_constructible<Hash>::value, ""); static_assert(!std::is_copy_constructible<Hash>::value, ""); @@ -181,11 +95,8 @@ void test_hash_disabled() { static_assert(!std::is_copy_assignable<Hash>::value, ""); static_assert(!std::is_move_assignable<Hash>::value, ""); - static_assert(!std::is_function< - typename std::remove_pointer< - typename std::remove_reference<Hash>::type - >::type - >::value, ""); + static_assert( + !std::is_function<typename std::remove_pointer<typename std::remove_reference<Hash>::type>::type>::value, ""); // Hashable requirements static_assert(!can_hash<Hash, Key&>(), ""); @@ -205,41 +116,33 @@ void test_hash_disabled() { static_assert(!can_hash<Hash, ConvertibleTo<Key> const&&>(), ""); } +enum Enum {}; +enum EnumClass : bool {}; +struct Class {}; -template <class First, class ...Rest> -struct TypeList<First, Rest...> { - template <template <class> class Trait, bool Expect = true> - static constexpr bool assertTrait() { - static_assert(Trait<First>::value == Expect, ""); - return TypeList<Rest...>::template assertTrait<Trait, Expect>(); - } - - template <class Trait> - static void applyTrait() { - Trait::template apply<First>(); - TypeList<Rest...>::template applyTrait<Trait>(); - } -}; +// Each header that declares the std::hash template provides enabled +// specializations of std::hash for std::nullptr_t and all cv-unqualified +// arithmetic, enumeration, and pointer types. +#if TEST_STD_VER >= 17 +using MaybeNullptr = types::type_list<std::nullptr_t>; +#else +using MaybeNullptr = types::type_list<>; +#endif +using LibraryHashTypes = types:: + concatenate_t<types::arithmetic_types, types::type_list<Enum, EnumClass, void*, void const*, Class*>, MaybeNullptr>; -template <> -struct TypeList<> { - template <template <class> class Trait, bool Expect = true> - static constexpr bool assertTrait() { - return true; +struct TestHashEnabled { + template <class T> + void operator()() const { + test_hash_enabled<T>(); } - template <class Trait> - static void applyTrait() {} -}; - - -struct TestLibraryTrait { - template <class Type> - static void apply() { test_hash_enabled<std::hash<Type>, Type>(); } }; -template <class Types> -void test_library_hash_specializations_available(Types) { - Types::template applyTrait<TestLibraryTrait >(); +// Test that each of the library hash specializations for arithmetic types, +// enum types, and pointer types are available and enabled. +template <class Types = LibraryHashTypes> +void test_library_hash_specializations_available() { + types::for_each(Types(), TestHashEnabled()); } #endif // SUPPORT_POISONED_HASH_HELPER_H diff --git a/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp b/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp deleted file mode 100644 index 8145074..0000000 --- a/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// Test that the header `poisoned_hash_helper.h` doesn't include any -// headers that provide hash<T> specializations. This is required so that the -// 'test_library_hash_specializations_available()' function returns false -// by default, unless a STL header providing hash has already been included. - -#include "poisoned_hash_helper.h" - -#include "test_macros.h" - -template <class T, std::size_t = sizeof(T)> -constexpr bool is_complete_imp(int) { return true; } -template <class> constexpr bool is_complete_imp(long) { return false; } -template <class T> constexpr bool is_complete() { return is_complete_imp<T>(0); } - -template <class T> struct has_complete_hash { - enum { value = is_complete<std::hash<T> >() }; -}; - -int main(int, char**) { - static_assert(LibraryHashTypes::assertTrait<has_complete_hash, false>(), ""); - - return 0; -} diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 5f00ead..b00a594 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -142,6 +142,30 @@ private: size_t size; }; +// A chunk for ARM64EC auxiliary IAT. +class AuxImportChunk : public NonSectionChunk { +public: + explicit AuxImportChunk(ImportFile *file) : file(file) { + setAlignment(sizeof(uint64_t)); + } + size_t getSize() const override { return sizeof(uint64_t); } + + void writeTo(uint8_t *buf) const override { + uint64_t impchkVA = 0; + if (file->impchkThunk) + impchkVA = file->impchkThunk->getRVA() + file->ctx.config.imageBase; + write64le(buf, impchkVA); + } + + void getBaserels(std::vector<Baserel> *res) override { + if (file->impchkThunk) + res->emplace_back(rva, file->ctx.config.machine); + } + +private: + ImportFile *file; +}; + static std::vector<std::vector<DefinedImportData *>> binImports(COFFLinkerContext &ctx, const std::vector<DefinedImportData *> &imports) { @@ -160,7 +184,15 @@ binImports(COFFLinkerContext &ctx, // Sort symbols by name for each group. std::vector<DefinedImportData *> &syms = kv.second; llvm::sort(syms, [](DefinedImportData *a, DefinedImportData *b) { - return a->getName() < b->getName(); + auto getBaseName = [](DefinedImportData *sym) { + StringRef name = sym->getName(); + name.consume_front("__imp_"); + // Skip aux_ part of ARM64EC function symbol name. + if (sym->file->impchkThunk) + name.consume_front("aux_"); + return name; + }; + return getBaseName(a) < getBaseName(b); }); v.push_back(std::move(syms)); } @@ -687,16 +719,24 @@ void IdataContents::create(COFFLinkerContext &ctx) { if (s->getExternalName().empty()) { lookups.push_back(make<OrdinalOnlyChunk>(ctx, ord)); addresses.push_back(make<OrdinalOnlyChunk>(ctx, ord)); - continue; + } else { + auto *c = make<HintNameChunk>(s->getExternalName(), ord); + lookups.push_back(make<LookupChunk>(ctx, c)); + addresses.push_back(make<LookupChunk>(ctx, c)); + hints.push_back(c); + } + + if (s->file->impECSym) { + auto chunk = make<AuxImportChunk>(s->file); + auxIat.push_back(chunk); + s->file->impECSym->setLocation(chunk); } - auto *c = make<HintNameChunk>(s->getExternalName(), ord); - lookups.push_back(make<LookupChunk>(ctx, c)); - addresses.push_back(make<LookupChunk>(ctx, c)); - hints.push_back(c); } // Terminate with null values. lookups.push_back(make<NullChunk>(ctx.config.wordsize)); addresses.push_back(make<NullChunk>(ctx.config.wordsize)); + if (ctx.config.machine == ARM64EC) + auxIat.push_back(make<NullChunk>(ctx.config.wordsize)); for (int i = 0, e = syms.size(); i < e; ++i) syms[i]->setLocation(addresses[base + i]); diff --git a/lld/COFF/DLL.h b/lld/COFF/DLL.h index 7cf71f5..48b0f17 100644 --- a/lld/COFF/DLL.h +++ b/lld/COFF/DLL.h @@ -31,6 +31,7 @@ public: std::vector<Chunk *> addresses; std::vector<Chunk *> hints; std::vector<Chunk *> dllNames; + std::vector<Chunk *> auxIat; }; // Windows-specific. diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index a1fe644..9994639 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -2447,6 +2447,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { ctx.symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0); ctx.symtab.addAbsolute("__arm64x_redirection_metadata", 0); ctx.symtab.addAbsolute("__arm64x_redirection_metadata_count", 0); + ctx.symtab.addAbsolute("__hybrid_auxiliary_iat", 0); ctx.symtab.addAbsolute("__hybrid_code_map", 0); ctx.symtab.addAbsolute("__hybrid_code_map_count", 0); ctx.symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0); diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 3dbdf8f..5692204 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -1071,19 +1071,39 @@ void ImportFile::parse() { this->hdr = hdr; externalName = extName; - impSym = ctx.symtab.addImportData(impName, this); + bool isCode = hdr->getType() == llvm::COFF::IMPORT_CODE; + + if (ctx.config.machine != ARM64EC) { + impSym = ctx.symtab.addImportData(impName, this, location); + } else { + // In addition to the regular IAT, ARM64EC also contains an auxiliary IAT, + // which holds addresses that are guaranteed to be callable directly from + // ARM64 code. Function symbol naming is swapped: __imp_ symbols refer to + // the auxiliary IAT, while __imp_aux_ symbols refer to the regular IAT. For + // data imports, the naming is reversed. + StringRef auxImpName = saver().save("__imp_aux_" + name); + if (isCode) { + impSym = ctx.symtab.addImportData(auxImpName, this, location); + impECSym = ctx.symtab.addImportData(impName, this, auxLocation); + } else { + impSym = ctx.symtab.addImportData(impName, this, location); + impECSym = ctx.symtab.addImportData(auxImpName, this, auxLocation); + } + if (!impECSym) + return; + } // If this was a duplicate, we logged an error but may continue; // in this case, impSym is nullptr. if (!impSym) return; if (hdr->getType() == llvm::COFF::IMPORT_CONST) - static_cast<void>(ctx.symtab.addImportData(name, this)); + static_cast<void>(ctx.symtab.addImportData(name, this, location)); // If type is function, we need to create a thunk which jump to an // address pointed by the __imp_ symbol. (This allows you to call // DLL functions just like regular non-DLL functions.) - if (hdr->getType() == llvm::COFF::IMPORT_CODE) { + if (isCode) { if (ctx.config.machine != ARM64EC) { thunkSym = ctx.symtab.addImportThunk(name, impSym, makeImportThunk()); } else { diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 3b83701..8140a03 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -362,6 +362,10 @@ public: const coff_import_header *hdr; Chunk *location = nullptr; + // Auxiliary IAT symbol and chunk on ARM64EC. + DefinedImportData *impECSym = nullptr; + Chunk *auxLocation = nullptr; + // We want to eliminate dllimported symbols if no one actually refers to them. // These "Live" bits are used to keep track of which import library members // are actually in use. diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index a6575ec..582a8562 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -584,7 +584,7 @@ void SymbolTable::initializeECThunks() { Symbol *sym = exitThunks.lookup(file->thunkSym); if (!sym) - sym = exitThunks.lookup(file->impSym); + sym = exitThunks.lookup(file->impECSym); file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym); } } @@ -785,11 +785,12 @@ Symbol *SymbolTable::addCommon(InputFile *f, StringRef n, uint64_t size, return s; } -DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f) { +DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f, + Chunk *&location) { auto [s, wasInserted] = insert(n, nullptr); s->isUsedInRegularObj = true; if (wasInserted || isa<Undefined>(s) || s->isLazy()) { - replaceSymbol<DefinedImportData>(s, n, f); + replaceSymbol<DefinedImportData>(s, n, f, location); return cast<DefinedImportData>(s); } diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 13e151e..bf97cf4 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -103,7 +103,8 @@ public: Symbol *addCommon(InputFile *f, StringRef n, uint64_t size, const llvm::object::coff_symbol_generic *s = nullptr, CommonChunk *c = nullptr); - DefinedImportData *addImportData(StringRef n, ImportFile *f); + DefinedImportData *addImportData(StringRef n, ImportFile *f, + Chunk *&location); Symbol *addImportThunk(StringRef name, DefinedImportData *s, ImportThunkChunk *chunk); void addLibcall(StringRef name); diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 724330e..2df60a0 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -354,23 +354,23 @@ public: // table in an output. The former has "__imp_" prefix. class DefinedImportData : public Defined { public: - DefinedImportData(StringRef n, ImportFile *f) - : Defined(DefinedImportDataKind, n), file(f) { - } + DefinedImportData(StringRef n, ImportFile *file, Chunk *&location) + : Defined(DefinedImportDataKind, n), file(file), location(location) {} static bool classof(const Symbol *s) { return s->kind() == DefinedImportDataKind; } - uint64_t getRVA() { return file->location->getRVA(); } - Chunk *getChunk() { return file->location; } - void setLocation(Chunk *addressTable) { file->location = addressTable; } + uint64_t getRVA() { return getChunk()->getRVA(); } + Chunk *getChunk() { return location; } + void setLocation(Chunk *addressTable) { location = addressTable; } StringRef getDLLName() { return file->dllName; } StringRef getExternalName() { return file->externalName; } uint16_t getOrdinal() { return file->hdr->OrdinalHint; } ImportFile *file; + Chunk *&location; // This is a pointer to the synthetic symbol associated with the load thunk // for this symbol that will be called if the DLL is delay-loaded. This is diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index b589a16..9a80400 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -914,6 +914,8 @@ void Writer::addSyntheticIdata() { if (!idata.hints.empty()) add(".idata$6", idata.hints); add(".idata$7", idata.dllNames); + if (!idata.auxIat.empty()) + add(".idata$9", idata.auxIat); } void Writer::appendECImportTables() { @@ -936,6 +938,15 @@ void Writer::appendECImportTables() { rdataSec->contribSections.insert(rdataSec->contribSections.begin(), importAddresses); } + + // The auxiliary IAT is always placed at the end of the .rdata section + // and is aligned to 4KB. + if (PartialSection *auxIat = findPartialSection(".idata$9", rdata)) { + auxIat->chunks.front()->setAlignment(0x1000); + rdataSec->chunks.insert(rdataSec->chunks.end(), auxIat->chunks.begin(), + auxIat->chunks.end()); + rdataSec->addContributingPartialSection(auxIat); + } } // Locate the first Chunk and size of the import directory list and the @@ -1095,7 +1106,8 @@ void Writer::createSections() { // ARM64EC has specific placement and alignment requirements for the IAT. // Delay adding its chunks until appendECImportTables. - if (isArm64EC(ctx.config.machine) && pSec->name == ".idata$5") + if (isArm64EC(ctx.config.machine) && + (pSec->name == ".idata$5" || pSec->name == ".idata$9")) continue; OutputSection *sec = createSection(name, outChars); @@ -2254,6 +2266,11 @@ void Writer::setECSymbols() { Symbol *entryPointCountSym = ctx.symtab.findUnderscore("__arm64x_redirection_metadata_count"); cast<DefinedAbsolute>(entryPointCountSym)->setVA(exportThunks.size()); + + Symbol *iatSym = ctx.symtab.findUnderscore("__hybrid_auxiliary_iat"); + replaceSymbol<DefinedSynthetic>(iatSym, "__hybrid_auxiliary_iat", + idata.auxIat.empty() ? nullptr + : idata.auxIat.front()); } // Write section contents to a mmap'ed file. diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s index 75dc610..8d59d29 100644 --- a/lld/test/COFF/Inputs/loadconfig-arm64ec.s +++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s @@ -76,7 +76,7 @@ __chpe_metadata: .rva __os_arm64x_check_icall .rva __os_arm64x_check_icall_cfg .word 0 // __arm64x_native_entrypoint - .word 0 // __hybrid_auxiliary_iat + .rva __hybrid_auxiliary_iat .word __x64_code_ranges_to_entry_points_count .word __arm64x_redirection_metadata_count .rva __os_arm64x_get_x64_information diff --git a/lld/test/COFF/arm64ec-import.test b/lld/test/COFF/arm64ec-import.test index 44a84c0..f8279ce 100644 --- a/lld/test/COFF/arm64ec-import.test +++ b/lld/test/COFF/arm64ec-import.test @@ -63,14 +63,37 @@ DISASM-NEXT: 180002000: ff 25 02 10 00 00 jmpq *0x1002(%rip) RUN: llvm-readobj --hex-dump=.test out.dll | FileCheck --check-prefix=TESTSEC %s RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC %s -TESTSEC: 0x180006000 08300000 00300000 10300000 20300000 -TESTSEC-NEXT: 0x180006010 08100000 1c100000 00200000 +TESTSEC: 0x180007000 08500000 00300000 10500000 20500000 +TESTSEC-NEXT: 0x180007010 08300000 00500000 10300000 20300000 +TESTSEC-NEXT: 0x180007020 08100000 1c100000 00200000 RUN: llvm-readobj --headers out.dll | FileCheck -check-prefix=HEADERS %s HEADERS: LoadConfigTableRVA: 0x4010 HEADERS: IATRVA: 0x3000 HEADERS: IATSize: 0x1000 +RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=LOADCONFIG %s +LOADCONFIG: AuxiliaryIAT: 0x5000 + +RUN: llvm-readobj --hex-dump=.rdata out.dll | FileCheck -check-prefix=RDATA %s +RDATA: 0x180005000 00000000 00000000 08100080 01000000 +RDATA-NEXT: 0x180005010 1c100080 01000000 00000000 00000000 +RDATA-NEXT: 0x180005020 30100080 01000000 00000000 00000000 + +RUN: llvm-readobj --coff-basereloc out.dll | FileCheck -check-prefix=BASERELOC %s +BASERELOC: BaseReloc [ +BASERELOC-NOT: Address: 0x5000 +BASERELOC: Address: 0x5008 +BASERELOC-NEXT: } +BASERELOC-NEXT: Entry { +BASERELOC-NEXT: Type: DIR64 +BASERELOC-NEXT: Address: 0x5010 +BASERELOC-NEXT: } +BASERELOC-NEXT: Entry { +BASERELOC-NEXT: Type: DIR64 +BASERELOC-NEXT: Address: 0x5020 +BASERELOC-NEXT: } + #--- test.s .section .test, "r" .globl arm64ec_data_sym @@ -80,6 +103,10 @@ arm64ec_data_sym: .rva __imp_data .rva __imp_func2 .rva __imp_t2func + .rva __imp_aux_func + .rva __imp_aux_data + .rva __imp_aux_func2 + .rva __imp_aux_t2func .rva __impchk_func .rva __impchk_func2 .rva func diff --git a/lldb/include/lldb/Core/SourceManager.h b/lldb/include/lldb/Core/SourceManager.h index e386271..d929f7b 100644 --- a/lldb/include/lldb/Core/SourceManager.h +++ b/lldb/include/lldb/Core/SourceManager.h @@ -74,7 +74,7 @@ public: const Checksum &GetChecksum() const { return m_checksum; } - llvm::once_flag &GetChecksumWarningOnceFlag() { + std::once_flag &GetChecksumWarningOnceFlag() { return m_checksum_warning_once_flag; } @@ -92,7 +92,7 @@ public: Checksum m_checksum; /// Once flag for emitting a checksum mismatch warning. - llvm::once_flag m_checksum_warning_once_flag; + std::once_flag m_checksum_warning_once_flag; // Keep the modification time that this file data is valid for llvm::sys::TimePoint<> m_mod_time; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 5b9de6f..70540fe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2932,14 +2932,22 @@ void DWARFASTParserClang::ParseSingleMember( last_field_info = this_field_info; last_field_info.SetIsBitfield(true); } else { - last_field_info.bit_offset = field_bit_offset; + FieldInfo this_field_info; + this_field_info.is_bitfield = false; + this_field_info.bit_offset = field_bit_offset; + // TODO: we shouldn't silently ignore the bit_size if we fail + // to GetByteSize. if (std::optional<uint64_t> clang_type_size = member_type->GetByteSize(nullptr)) { - last_field_info.bit_size = *clang_type_size * character_width; + this_field_info.bit_size = *clang_type_size * character_width; } - last_field_info.SetIsBitfield(false); + if (this_field_info.GetFieldEnd() <= last_field_info.GetEffectiveFieldEnd()) + this_field_info.SetEffectiveFieldEnd( + last_field_info.GetEffectiveFieldEnd()); + + last_field_info = this_field_info; } // Don't turn artificial members such as vtable pointers into real FieldDecls @@ -3738,7 +3746,7 @@ void DWARFASTParserClang::AddUnnamedBitfieldToRecordTypeIfNeeded( const FieldInfo ¤t_field) { // TODO: get this value from target const uint64_t word_width = 32; - uint64_t last_field_end = previous_field.bit_offset + previous_field.bit_size; + uint64_t last_field_end = previous_field.GetEffectiveFieldEnd(); if (!previous_field.IsBitfield()) { // The last field was not a bit-field... diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 3809ee9..1ffb09b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -258,9 +258,27 @@ protected: private: struct FieldInfo { + /// Size in bits that this field occupies. Can but + /// need not be the DW_AT_bit_size of the field. uint64_t bit_size = 0; + + /// Offset of this field in bits from the beginning + /// of the containing struct. Can but need not + /// be the DW_AT_data_bit_offset of the field. uint64_t bit_offset = 0; + + /// In case this field is folded into the storage + /// of a previous member's storage (for example + /// with [[no_unique_address]]), the effective field + /// end is the offset in bits from the beginning of + /// the containing struct where the field we were + /// folded into ended. + std::optional<uint64_t> effective_field_end; + + /// Set to 'true' if this field is a bit-field. bool is_bitfield = false; + + /// Set to 'true' if this field is DW_AT_artificial. bool is_artificial = false; FieldInfo() = default; @@ -276,6 +294,19 @@ private: // bit offset than any previous bitfield + size. return (bit_size + bit_offset) <= next_bit_offset; } + + /// Returns the offset in bits of where the storage this field + /// occupies ends. + uint64_t GetFieldEnd() const { return bit_size + bit_offset; } + + void SetEffectiveFieldEnd(uint64_t val) { effective_field_end = val; } + + /// If this field was folded into storage of a previous field, + /// returns the offset in bits of where that storage ends. Otherwise, + /// returns the regular field end (see \ref GetFieldEnd). + uint64_t GetEffectiveFieldEnd() const { + return effective_field_end.value_or(GetFieldEnd()); + } }; /// Parsed form of all attributes that are relevant for parsing type members. diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index dfeb765..96520c7 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -265,11 +265,6 @@ if is_configured("lldb_libs_dir"): if is_configured("lldb_framework_dir"): dotest_cmd += ["--framework", config.lldb_framework_dir] -# Facebook T92898286 -if is_configured("llvm_test_bolt"): - dotest_cmd += ["-E", '"--post-link-optimize"'] -# End Facebook T92898286 - if ( "lldb-repro-capture" in config.available_features or "lldb-repro-replay" in config.available_features diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in index 602f457..8b2d09a 100644 --- a/lldb/test/API/lit.site.cfg.py.in +++ b/lldb/test/API/lit.site.cfg.py.in @@ -1,9 +1,5 @@ @LIT_SITE_CFG_IN_HEADER@ -#Facebook T92898286 -import lit.util -#End Facebook T92898286 - config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -43,10 +39,6 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api") -# Facebook T92898286 -config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") -# End Facebook T92898286 - # Plugins lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@' if lldb_build_intel_pt == '1': diff --git a/lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp b/lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp index 1c9cc36..980180e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/no_unique_address-with-bitfields.cpp @@ -1,10 +1,10 @@ -// LLDB currently erroneously adds an unnamed bitfield -// into the AST when an overlapping no_unique_address -// field precedes a bitfield. - // RUN: %clang --target=x86_64-apple-macosx -c -gdwarf -o %t %s // RUN: %lldb %t \ // RUN: -o "target var global" \ +// RUN: -o "target var global2" \ +// RUN: -o "target var global3" \ +// RUN: -o "target var global4" \ +// RUN: -o "target var global5" \ // RUN: -o "image dump ast" \ // RUN: -o exit | FileCheck %s @@ -12,12 +12,12 @@ // CHECK: CXXRecordDecl {{.*}} struct Foo definition // CHECK: |-FieldDecl {{.*}} data 'char[5]' // CHECK-NEXT: |-FieldDecl {{.*}} padding 'Empty' -// CHECK-NEXT: |-FieldDecl {{.*}} 'int' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 8 -// CHECK-NEXT: `-FieldDecl {{.*}} sloc> flag 'unsigned long' +// CHECK-NEXT: `-FieldDecl {{.*}} flag 'unsigned long' // CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 struct Empty {}; +struct Empty2 {}; +struct Empty3 {}; struct Foo { char data[5]; @@ -26,3 +26,85 @@ struct Foo { }; Foo global; + +// CHECK: CXXRecordDecl {{.*}} struct ConsecutiveOverlap definition +// CHECK: |-FieldDecl {{.*}} data 'char[5]' +// CHECK-NEXT: |-FieldDecl {{.*}} p1 'Empty' +// CHECK-NEXT: |-FieldDecl {{.*}} p2 'Empty2' +// CHECK-NEXT: |-FieldDecl {{.*}} p3 'Empty3' +// CHECK-NEXT: `-FieldDecl {{.*}} flag 'unsigned long' +// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 + +struct ConsecutiveOverlap { + char data[5]; + [[no_unique_address]] Empty p1; + [[no_unique_address]] Empty2 p2; + [[no_unique_address]] Empty3 p3; + unsigned long flag : 1; +}; + +ConsecutiveOverlap global2; + +// FIXME: we fail to deduce the unnamed bitfields here. +// +// CHECK: CXXRecordDecl {{.*}} struct MultipleAtOffsetZero definition +// CHECK: |-FieldDecl {{.*}} data 'char[5]' +// CHECK-NEXT: |-FieldDecl {{.*}} p1 'Empty' +// CHECK-NEXT: |-FieldDecl {{.*}} f1 'unsigned long' +// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 1 +// CHECK-NEXT: |-FieldDecl {{.*}} p2 'Empty2' +// CHECK-NEXT: `-FieldDecl {{.*}} f2 'unsigned long' +// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 + +struct MultipleAtOffsetZero { + char data[5]; + [[no_unique_address]] Empty p1; + int : 4; + unsigned long f1 : 1; + [[no_unique_address]] Empty2 p2; + int : 4; + unsigned long f2 : 1; +}; + +MultipleAtOffsetZero global3; + +// FIXME: we fail to deduce the unnamed bitfields here. +// +// CHECK: CXXRecordDecl {{.*}} struct MultipleEmpty definition +// CHECK: |-FieldDecl {{.*}} data 'char[5]' +// CHECK-NEXT: |-FieldDecl {{.*}} p1 'Empty' +// CHECK-NEXT: |-FieldDecl {{.*}} f1 'unsigned long' +// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 1 +// CHECK-NEXT: |-FieldDecl {{.*}} p2 'Empty' +// CHECK-NEXT: `-FieldDecl {{.*}} f2 'unsigned long' +// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 + +struct MultipleEmpty { + char data[5]; + [[no_unique_address]] Empty p1; + int : 4; + unsigned long f1 : 1; + [[no_unique_address]] Empty p2; + int : 4; + unsigned long f2 : 1; +}; + +MultipleEmpty global4; + +// CHECK: CXXRecordDecl {{.*}} struct FieldBitfieldOverlap definition +// CHECK: |-FieldDecl {{.*}} a 'int' +// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 3 +// CHECK-NEXT: |-FieldDecl {{.*}} p1 'Empty' +// CHECK-NEXT: |-FieldDecl {{.*}} b 'int' +// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 6 +// CHECK-NEXT: `-FieldDecl {{.*}} c 'int' +// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1 + +struct FieldBitfieldOverlap { + int a : 3; + [[no_unique_address]] Empty p1; + int b : 6; + int c : 1; +}; + +FieldBitfieldOverlap global5; diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py index 7b7be06..255955f 100644 --- a/lldb/test/Shell/helper/toolchain.py +++ b/lldb/test/Shell/helper/toolchain.py @@ -165,11 +165,6 @@ def use_support_substitutions(config): if config.cmake_sysroot: host_flags += ["--sysroot={}".format(config.cmake_sysroot)] - # Facebook T92898286 - if config.llvm_test_bolt: - host_flags += ["--post-link-optimize"] - # End Facebook T92898286 - host_flags = " ".join(host_flags) config.substitutions.append(("%clang_host", "%clang " + host_flags)) config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags)) diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index fe83237..b69e7bc 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -1,10 +1,5 @@ @LIT_SITE_CFG_IN_HEADER@ -#Facebook T92898286 -import lit.util -#End Facebook T92898286 - - config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") @@ -36,10 +31,6 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell") config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell") -# Facebook T92898286 -config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@") -# End Facebook T92898286 - import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index e8f2913f..c637feb 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -711,10 +711,6 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH option(LLVM_USE_SPLIT_DWARF "Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF) -# Facebook T92898286 -option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF) -# End Facebook T92898286 - # Define an option controlling whether we should build for 32-bit on 64-bit # platforms, where supported. if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX")) diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index 083d5c9..68498a3 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -471,19 +471,23 @@ protected: setNumEntries(other.getNumEntries()); setNumTombstones(other.getNumTombstones()); - if (std::is_trivially_copyable<KeyT>::value && - std::is_trivially_copyable<ValueT>::value) - memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(), - getNumBuckets() * sizeof(BucketT)); - else - for (size_t i = 0; i < getNumBuckets(); ++i) { - ::new (&getBuckets()[i].getFirst()) - KeyT(other.getBuckets()[i].getFirst()); - if (!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getEmptyKey()) && - !KeyInfoT::isEqual(getBuckets()[i].getFirst(), getTombstoneKey())) - ::new (&getBuckets()[i].getSecond()) - ValueT(other.getBuckets()[i].getSecond()); + BucketT *Buckets = getBuckets(); + const BucketT *OtherBuckets = other.getBuckets(); + const size_t NumBuckets = getNumBuckets(); + if constexpr (std::is_trivially_copyable_v<KeyT> && + std::is_trivially_copyable_v<ValueT>) { + memcpy(reinterpret_cast<void *>(Buckets), OtherBuckets, + NumBuckets * sizeof(BucketT)); + } else { + const KeyT EmptyKey = getEmptyKey(); + const KeyT TombstoneKey = getTombstoneKey(); + for (size_t I = 0; I < NumBuckets; ++I) { + ::new (&Buckets[I].getFirst()) KeyT(OtherBuckets[I].getFirst()); + if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey) && + !KeyInfoT::isEqual(Buckets[I].getFirst(), TombstoneKey)) + ::new (&Buckets[I].getSecond()) ValueT(OtherBuckets[I].getSecond()); } + } } static unsigned getHashValue(const KeyT &Val) { @@ -496,7 +500,7 @@ protected: } static const KeyT getEmptyKey() { - static_assert(std::is_base_of<DenseMapBase, DerivedT>::value, + static_assert(std::is_base_of_v<DenseMapBase, DerivedT>, "Must pass the derived type to this template!"); return KeyInfoT::getEmptyKey(); } @@ -570,7 +574,7 @@ private: template <typename KeyArg, typename... ValueArgs> BucketT *InsertIntoBucket(BucketT *TheBucket, KeyArg &&Key, ValueArgs &&...Values) { - TheBucket = InsertIntoBucketImpl(Key, Key, TheBucket); + TheBucket = InsertIntoBucketImpl(Key, TheBucket); TheBucket->getFirst() = std::forward<KeyArg>(Key); ::new (&TheBucket->getSecond()) ValueT(std::forward<ValueArgs>(Values)...); @@ -580,7 +584,7 @@ private: template <typename LookupKeyT> BucketT *InsertIntoBucketWithLookup(BucketT *TheBucket, KeyT &&Key, ValueT &&Value, LookupKeyT &Lookup) { - TheBucket = InsertIntoBucketImpl(Key, Lookup, TheBucket); + TheBucket = InsertIntoBucketImpl(Lookup, TheBucket); TheBucket->getFirst() = std::move(Key); ::new (&TheBucket->getSecond()) ValueT(std::move(Value)); @@ -588,8 +592,7 @@ private: } template <typename LookupKeyT> - BucketT *InsertIntoBucketImpl(const KeyT &Key, const LookupKeyT &Lookup, - BucketT *TheBucket) { + BucketT *InsertIntoBucketImpl(const LookupKeyT &Lookup, BucketT *TheBucket) { incrementEpoch(); // If the load of the hash table is more than 3/4, or if fewer than 1/8 of diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index d3b7ba9..b3e64b2 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -9,7 +9,6 @@ #ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H #define LLVM_ANALYSIS_CTXPROFANALYSIS_H -#include "llvm/ADT/DenseMap.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/IntrinsicInst.h" diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 00ead11..de7e7be 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -119,6 +119,9 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, const DominatorTree *DT = nullptr, bool UseInstrInfo = true); +bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, + const SimplifyQuery &Q); + bool isOnlyUsedInZeroComparison(const Instruction *CxtI); bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index f1017bd..97c6963 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -87,7 +87,7 @@ def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLV def int_dx_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>; def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty]>; +def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 766fc0d..a4c0195 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -67,6 +67,7 @@ let TargetPrefix = "spv" in { def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_spv_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyfloat_ty]>; def int_spv_fdot : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>], diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 6605c6f..4352099 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -276,6 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &); void initializeSelectOptimizePass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry &); void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &); +void initializeScalarizerLegacyPassPass(PassRegistry &); void initializeScavengerTestPass(PassRegistry &); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &); void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 1da0215..92b59a6 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -130,6 +130,7 @@ struct ForcePassLinking { (void)llvm::createLowerAtomicPass(); (void)llvm::createLoadStoreVectorizerPass(); (void)llvm::createPartiallyInlineLibCallsPass(); + (void)llvm::createScalarizerPass(); (void)llvm::createSeparateConstOffsetFromGEPPass(); (void)llvm::createSpeculativeExecutionPass(); (void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass(); diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h index e034819..beda07d 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h @@ -15,13 +15,11 @@ #ifndef LLVM_PROFILEDATA_CTXINSTRPROFILEREADER_H #define LLVM_PROFILEDATA_CTXINSTRPROFILEREADER_H -#include "llvm/ADT/DenseSet.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/IR/GlobalValue.h" #include "llvm/ProfileData/PGOCtxProfWriter.h" #include "llvm/Support/Error.h" #include <map> -#include <vector> namespace llvm { /// A node (context) in the loaded contextual profile, suitable for mutation @@ -34,7 +32,7 @@ namespace llvm { class PGOCtxProfContext final { public: using CallTargetMapTy = std::map<GlobalValue::GUID, PGOCtxProfContext>; - using CallsiteMapTy = DenseMap<uint32_t, CallTargetMapTy>; + using CallsiteMapTy = std::map<uint32_t, CallTargetMapTy>; private: friend class PGOCtxProfileReader; @@ -97,7 +95,16 @@ public: return Callsites.find(I)->second; } - void getContainedGuids(DenseSet<GlobalValue::GUID> &Guids) const; + /// Insert this node's GUID as well as the GUIDs of the transitive closure of + /// child nodes, into the provided set (technically, all that is required of + /// `TSetOfGUIDs` is to have an `insert(GUID)` member) + template <class TSetOfGUIDs> + void getContainedGuids(TSetOfGUIDs &Guids) const { + Guids.insert(GUID); + for (const auto &[_, Callsite] : Callsites) + for (const auto &[_, Callee] : Callsite) + Callee.getContainedGuids(Guids); + } }; class PGOCtxProfileReader final { diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 95fe239..5b57d5c 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -125,6 +125,7 @@ class ConstantPointerNull; class PoisonValue; class BlockAddress; class ConstantTokenNone; +class GlobalValue; class Context; class Function; class Instruction; @@ -326,6 +327,7 @@ protected: friend class UndefValue; // For `Val`. friend class PoisonValue; // For `Val`. friend class BlockAddress; // For `Val`. + friend class GlobalValue; // For `Val`. /// All values point to the context. Context &Ctx; @@ -1115,6 +1117,80 @@ public: #endif }; +class GlobalValue : public Constant { +protected: + GlobalValue(ClassID ID, llvm::GlobalValue *C, Context &Ctx) + : Constant(ID, C, Ctx) {} + friend class Context; // For constructor. + Use getOperandUseInternal(unsigned OpIdx, bool Verify) const override { + return getOperandUseDefault(OpIdx, Verify); + } + +public: + unsigned getUseOperandNo(const Use &Use) const override { + return getUseOperandNoDefault(Use); + } + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + switch (From->getSubclassID()) { + case ClassID::Function: + case ClassID::GlobalVariable: + case ClassID::GlobalAlias: + case ClassID::GlobalIFunc: + return true; + default: + return false; + } + } + + unsigned getAddressSpace() const { + return cast<llvm::GlobalValue>(Val)->getAddressSpace(); + } + bool hasGlobalUnnamedAddr() const { + return cast<llvm::GlobalValue>(Val)->hasGlobalUnnamedAddr(); + } + + /// Returns true if this value's address is not significant in this module. + /// This attribute is intended to be used only by the code generator and LTO + /// to allow the linker to decide whether the global needs to be in the symbol + /// table. It should probably not be used in optimizations, as the value may + /// have uses outside the module; use hasGlobalUnnamedAddr() instead. + bool hasAtLeastLocalUnnamedAddr() const { + return cast<llvm::GlobalValue>(Val)->hasAtLeastLocalUnnamedAddr(); + } + + using UnnamedAddr = llvm::GlobalValue::UnnamedAddr; + + UnnamedAddr getUnnamedAddr() const { + return cast<llvm::GlobalValue>(Val)->getUnnamedAddr(); + } + void setUnnamedAddr(UnnamedAddr V); + + static UnnamedAddr getMinUnnamedAddr(UnnamedAddr A, UnnamedAddr B) { + return llvm::GlobalValue::getMinUnnamedAddr(A, B); + } + + bool hasComdat() const { return cast<llvm::GlobalValue>(Val)->hasComdat(); } + + // TODO: We need a SandboxIR Comdat if we want to implement getComdat(). + using VisibilityTypes = llvm::GlobalValue::VisibilityTypes; + VisibilityTypes getVisibility() const { + return cast<llvm::GlobalValue>(Val)->getVisibility(); + } + bool hasDefaultVisibility() const { + return cast<llvm::GlobalValue>(Val)->hasDefaultVisibility(); + } + bool hasHiddenVisibility() const { + return cast<llvm::GlobalValue>(Val)->hasHiddenVisibility(); + } + bool hasProtectedVisibility() const { + return cast<llvm::GlobalValue>(Val)->hasProtectedVisibility(); + } + void setVisibility(VisibilityTypes V); + + // TODO: Add missing functions. +}; + class BlockAddress final : public Constant { BlockAddress(llvm::BlockAddress *C, Context &Ctx) : Constant(ClassID::BlockAddress, C, Ctx) {} @@ -3845,8 +3921,9 @@ protected: friend class PointerType; // For LLVMCtx. friend class CmpInst; // For LLVMCtx. TODO: cleanup when sandboxir::VectorType // is complete - friend class IntegerType; // For LLVMCtx. - friend class StructType; // For LLVMCtx. + friend class IntegerType; // For LLVMCtx. + friend class StructType; // For LLVMCtx. + friend class TargetExtType; // For LLVMCtx. Tracker IRTracker; /// Maps LLVM Value to the corresponding sandboxir::Value. Owns all diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index bd2f533e..7b72f9b 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -34,6 +34,9 @@ DEF_CONST(ConstantAggregateZero, ConstantAggregateZero) DEF_CONST(ConstantPointerNull, ConstantPointerNull) DEF_CONST(UndefValue, UndefValue) DEF_CONST(PoisonValue, PoisonValue) +DEF_CONST(GlobalVariable, GlobalVariable) +DEF_CONST(GlobalIFunc, GlobalIFunc) +DEF_CONST(GlobalAlias, GlobalAlias) DEF_CONST(BlockAddress, BlockAddress) DEF_CONST(ConstantTokenNone, ConstantTokenNone) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 6ab63ba..921fe94 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6249,7 +6249,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> { /// Return the address space of the associated value. \p NoAddressSpace is /// returned if the associated value is dead. This functions is not supposed /// to be called if the AA is invalid. - virtual int32_t getAddressSpace() const = 0; + virtual uint32_t getAddressSpace() const = 0; /// Create an abstract attribute view for the position \p IRP. static AAAddressSpace &createForPosition(const IRPosition &IRP, @@ -6268,7 +6268,7 @@ struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> { } // No address space which indicates the associated value is dead. - static const int32_t NoAddressSpace = -1; + static const uint32_t NoAddressSpace = ~0U; /// Unique ID (due to the unique address) static const char ID; diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index 87d0d98..68d9ae8 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -450,7 +450,8 @@ public: bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false, unsigned Depth = 0, const Instruction *CxtI = nullptr) { - return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT); + return llvm::isKnownToBeAPowerOfTwo(V, OrZero, Depth, + SQ.getWithInstruction(CxtI)); } bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0, diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h index 45e25cb..4d2a1a2 100644 --- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h +++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h @@ -24,6 +24,7 @@ namespace llvm { class Function; +class FunctionPass; struct ScalarizerPassOptions { // These options correspond 1:1 to cl::opt options defined in @@ -50,6 +51,10 @@ public: void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; } void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; } }; + +/// Create a legacy pass manager instance of the Scalarizer pass +FunctionPass *createScalarizerPass( + const ScalarizerPassOptions &Options = ScalarizerPassOptions()); } #endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */ diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index e9fc9bb..ba3ba7c 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -265,9 +265,6 @@ bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { }); } -static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, - const SimplifyQuery &Q); - bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, @@ -2210,12 +2207,15 @@ static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero, /// Return true if we can infer that \p V is known to be a power of 2 from /// dominating condition \p Cond (e.g., ctpop(V) == 1). static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero, - const Value *Cond) { + const Value *Cond, + bool CondIsTrue) { ICmpInst::Predicate Pred; const APInt *RHSC; if (!match(Cond, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Specific(V)), m_APInt(RHSC)))) return false; + if (!CondIsTrue) + Pred = ICmpInst::getInversePredicate(Pred); // ctpop(V) u< 2 if (OrZero && Pred == ICmpInst::ICMP_ULT && *RHSC == 2) return true; @@ -2227,8 +2227,8 @@ static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero, /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, - const SimplifyQuery &Q) { +bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, + const SimplifyQuery &Q) { assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); if (isa<Constant>(V)) @@ -2244,12 +2244,32 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, if (!AssumeVH) continue; CallInst *I = cast<CallInst>(AssumeVH); - if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0)) && + if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0), + /*CondIsTrue=*/true) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) return true; } } + // Handle dominating conditions. + if (Q.DC && Q.CxtI && Q.DT) { + for (BranchInst *BI : Q.DC->conditionsFor(V)) { + Value *Cond = BI->getCondition(); + + BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); + if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond, + /*CondIsTrue=*/true) && + Q.DT->dominates(Edge0, Q.CxtI->getParent())) + return true; + + BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); + if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond, + /*CondIsTrue=*/false) && + Q.DT->dominates(Edge1, Q.CxtI->getParent())) + return true; + } + } + auto *I = dyn_cast<Instruction>(V); if (!I) return false; @@ -9980,8 +10000,7 @@ void llvm::findValuesAffectedByCondition( } } - if (IsAssume && HasRHSC && - match(A, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))) + if (HasRHSC && match(A, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))) AddAffected(X); } else if (match(Cond, m_FCmp(Pred, m_Value(A), m_Value(B)))) { AddCmpOperands(A, B); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index db33d52..53ce219 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -635,7 +635,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap) { - unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + Register VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. unsigned DstRCIdx = Node->getConstantOperandVal(1); @@ -678,7 +678,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, // insert copies for them in TwoAddressInstructionPass anyway. if (!R || !R->getReg().isPhysical()) { unsigned SubIdx = Op->getAsZExtVal(); - unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + Register SubReg = getVR(Node->getOperand(i - 1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); const TargetRegisterClass *SRC = TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); @@ -1274,7 +1274,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } case ISD::CopyFromReg: { - unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + Register SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap); break; } @@ -1343,7 +1343,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, SmallVector<unsigned, 8> GroupIdx; // Remember registers that are part of early-clobber defs. - SmallVector<unsigned, 8> ECRegs; + SmallVector<Register, 8> ECRegs; // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { @@ -1424,7 +1424,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // used), but this does not match the semantics of our early-clobber flag. // If an early-clobber operand register is also an input operand register, // then remove the early-clobber flag. - for (unsigned Reg : ECRegs) { + for (Register Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { MachineOperand *MO = MIB->findRegisterDefOperand(Reg, TRI, false, false); diff --git a/llvm/lib/ProfileData/PGOCtxProfReader.cpp b/llvm/lib/ProfileData/PGOCtxProfReader.cpp index 8354e30..496854e 100644 --- a/llvm/lib/ProfileData/PGOCtxProfReader.cpp +++ b/llvm/lib/ProfileData/PGOCtxProfReader.cpp @@ -44,14 +44,6 @@ PGOCtxProfContext::getOrEmplace(uint32_t Index, GlobalValue::GUID G, return Iter->second; } -void PGOCtxProfContext::getContainedGuids( - DenseSet<GlobalValue::GUID> &Guids) const { - Guids.insert(GUID); - for (const auto &[_, Callsite] : Callsites) - for (const auto &[_, Callee] : Callsite) - Callee.getContainedGuids(Guids); -} - Expected<BitstreamEntry> PGOCtxProfileReader::advance() { return Cursor.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); } diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 05d05f7..8a7c398 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -2495,6 +2495,20 @@ PoisonValue *PoisonValue::getElementValue(unsigned Idx) const { cast<llvm::PoisonValue>(Val)->getElementValue(Idx))); } +void GlobalValue::setUnnamedAddr(UnnamedAddr V) { + Ctx.getTracker() + .emplaceIfTracking<GenericSetter<&GlobalValue::getUnnamedAddr, + &GlobalValue::setUnnamedAddr>>(this); + cast<llvm::GlobalValue>(Val)->setUnnamedAddr(V); +} + +void GlobalValue::setVisibility(VisibilityTypes V) { + Ctx.getTracker() + .emplaceIfTracking<GenericSetter<&GlobalValue::getVisibility, + &GlobalValue::setVisibility>>(this); + cast<llvm::GlobalValue>(Val)->setVisibility(V); +} + BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) { auto *LLVMC = llvm::BlockAddress::get(cast<llvm::Function>(F->Val), cast<llvm::BasicBlock>(BB->Val)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index ffeec31b..687a733 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1065,19 +1065,16 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, Attributor A(Functions, InfoCache, AC); - for (Function &F : M) { - if (F.isIntrinsic()) - continue; - - A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); - A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); - A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F)); - CallingConv::ID CC = F.getCallingConv(); + for (auto *F : Functions) { + A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F)); + A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F)); + A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F)); + CallingConv::ID CC = F->getCallingConv(); if (!AMDGPU::isEntryFunctionCC(CC)) { - A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); - A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F)); + A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F)); + A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F)); } else if (CC == CallingConv::AMDGPU_KERNEL) { - addPreloadKernArgHint(F, TM); + addPreloadKernArgHint(*F, TM); } for (auto &I : instructions(F)) { diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index aa77698..839060b 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/BinaryFormat/DXContainer.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" @@ -40,6 +41,7 @@ class DXContainerGlobals : public llvm::ModulePass { GlobalVariable *buildSignature(Module &M, Signature &Sig, StringRef Name, StringRef SectionName); void addSignature(Module &M, SmallVector<GlobalValue *> &Globals); + void addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV); void addPipelineStateValidationInfo(Module &M, SmallVector<GlobalValue *> &Globals); @@ -59,6 +61,7 @@ public: AU.setPreservesAll(); AU.addRequired<ShaderFlagsAnalysisWrapper>(); AU.addRequired<DXILMetadataAnalysisWrapperPass>(); + AU.addRequired<DXILResourceWrapperPass>(); } }; @@ -140,6 +143,56 @@ void DXContainerGlobals::addSignature(Module &M, Globals.emplace_back(buildSignature(M, OutputSig, "dx.osg1", "OSG1")); } +void DXContainerGlobals::addResourcesForPSV(Module &M, PSVRuntimeInfo &PSV) { + const DXILResourceMap &ResMap = + getAnalysis<DXILResourceWrapperPass>().getResourceMap(); + + for (const dxil::ResourceInfo &ResInfo : ResMap) { + const dxil::ResourceInfo::ResourceBinding &Binding = ResInfo.getBinding(); + dxbc::PSV::v2::ResourceBindInfo BindInfo; + BindInfo.LowerBound = Binding.LowerBound; + BindInfo.UpperBound = Binding.LowerBound + Binding.Size - 1; + BindInfo.Space = Binding.Space; + + dxbc::PSV::ResourceType ResType = dxbc::PSV::ResourceType::Invalid; + bool IsUAV = ResInfo.getResourceClass() == dxil::ResourceClass::UAV; + switch (ResInfo.getResourceKind()) { + case dxil::ResourceKind::Sampler: + ResType = dxbc::PSV::ResourceType::Sampler; + break; + case dxil::ResourceKind::CBuffer: + ResType = dxbc::PSV::ResourceType::CBV; + break; + case dxil::ResourceKind::StructuredBuffer: + ResType = IsUAV ? dxbc::PSV::ResourceType::UAVStructured + : dxbc::PSV::ResourceType::SRVStructured; + if (IsUAV && ResInfo.getUAV().HasCounter) + ResType = dxbc::PSV::ResourceType::UAVStructuredWithCounter; + break; + case dxil::ResourceKind::RTAccelerationStructure: + ResType = dxbc::PSV::ResourceType::SRVRaw; + break; + case dxil::ResourceKind::RawBuffer: + ResType = IsUAV ? dxbc::PSV::ResourceType::UAVRaw + : dxbc::PSV::ResourceType::SRVRaw; + break; + default: + ResType = IsUAV ? dxbc::PSV::ResourceType::UAVTyped + : dxbc::PSV::ResourceType::SRVTyped; + break; + } + BindInfo.Type = ResType; + + BindInfo.Kind = + static_cast<dxbc::PSV::ResourceKind>(ResInfo.getResourceKind()); + // TODO: Add support for dxbc::PSV::ResourceFlag::UsedByAtomic64, tracking + // with https://github.com/llvm/llvm-project/issues/104392 + BindInfo.Flags.Flags = 0u; + + PSV.Resources.emplace_back(BindInfo); + } +} + void DXContainerGlobals::addPipelineStateValidationInfo( Module &M, SmallVector<GlobalValue *> &Globals) { SmallString<256> Data; @@ -155,6 +208,8 @@ void DXContainerGlobals::addPipelineStateValidationInfo( PSV.BaseData.ShaderStage = static_cast<uint8_t>(MMI.ShaderStage - Triple::Pixel); + addResourcesForPSV(M, PSV); + // Hardcoded values here to unblock loading the shader into D3D. // // TODO: Lots more stuff to do here! @@ -185,6 +240,7 @@ INITIALIZE_PASS_BEGIN(DXContainerGlobals, "dxil-globals", "DXContainer Global Emitter", false, true) INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper) INITIALIZE_PASS_DEPENDENCY(DXILMetadataAnalysisWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapperPass) INITIALIZE_PASS_END(DXContainerGlobals, "dxil-globals", "DXContainer Global Emitter", false, true) diff --git a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp index c02eb768..d315d9b 100644 --- a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp +++ b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp @@ -8,6 +8,7 @@ #include "DXILFinalizeLinkage.h"
#include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
@@ -48,6 +49,10 @@ bool DXILFinalizeLinkageLegacy::runOnModule(Module &M) { return finalizeLinkage(M);
}
+void DXILFinalizeLinkageLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DXILResourceWrapperPass>();
+}
+
char DXILFinalizeLinkageLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(DXILFinalizeLinkageLegacy, DEBUG_TYPE,
diff --git a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.h b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.h index aab1bc3..62d3a8a 100644 --- a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.h +++ b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.h @@ -32,6 +32,7 @@ public: DXILFinalizeLinkageLegacy() : ModulePass(ID) {} bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; static char ID; // Pass identification. }; } // namespace llvm diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 72fa989..dd73b895 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -14,6 +14,7 @@ #include "DirectX.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" @@ -50,6 +51,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_sdot: case Intrinsic::dx_udot: case Intrinsic::dx_sign: + case Intrinsic::dx_step: return true; } return false; @@ -322,6 +324,28 @@ static Value *expandPowIntrinsic(CallInst *Orig) { return Exp2Call; } +static Value *expandStepIntrinsic(CallInst *Orig) { + + Value *X = Orig->getOperand(0); + Value *Y = Orig->getOperand(1); + Type *Ty = X->getType(); + IRBuilder<> Builder(Orig); + + Constant *One = ConstantFP::get(Ty->getScalarType(), 1.0); + Constant *Zero = ConstantFP::get(Ty->getScalarType(), 0.0); + Value *Cond = Builder.CreateFCmpOLT(Y, X); + + if (Ty != Ty->getScalarType()) { + auto *XVec = dyn_cast<FixedVectorType>(Ty); + One = ConstantVector::getSplat( + ElementCount::getFixed(XVec->getNumElements()), One); + Zero = ConstantVector::getSplat( + ElementCount::getFixed(XVec->getNumElements()), Zero); + } + + return Builder.CreateSelect(Cond, Zero, One); +} + static Intrinsic::ID getMaxForClamp(Type *ElemTy, Intrinsic::ID ClampIntrinsic) { if (ClampIntrinsic == Intrinsic::dx_uclamp) @@ -433,8 +457,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::dx_sign: Result = expandSignIntrinsic(Orig); break; + case Intrinsic::dx_step: + Result = expandStepIntrinsic(Orig); } - if (Result) { Orig->replaceAllUsesWith(Result); Orig->eraseFromParent(); @@ -471,6 +496,10 @@ bool DXILIntrinsicExpansionLegacy::runOnModule(Module &M) { return expansionIntrinsics(M); } +void DXILIntrinsicExpansionLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DXILResourceWrapperPass>(); +} + char DXILIntrinsicExpansionLegacy::ID = 0; INITIALIZE_PASS_BEGIN(DXILIntrinsicExpansionLegacy, DEBUG_TYPE, diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h index c86681a..c8ee4b1 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h @@ -26,6 +26,7 @@ public: bool runOnModule(Module &M) override; DXILIntrinsicExpansionLegacy() : ModulePass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; static char ID; // Pass identification. }; } // namespace llvm diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp index f6b7355..b050240 100644 --- a/llvm/lib/Target/DirectX/DXILPrepare.cpp +++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/AttributeMask.h" #include "llvm/IR/IRBuilder.h" @@ -249,6 +250,7 @@ public: AU.addPreserved<ShaderFlagsAnalysisWrapper>(); AU.addPreserved<DXILResourceMDWrapper>(); AU.addPreserved<DXILMetadataAnalysisWrapperPass>(); + AU.addPreserved<DXILResourceWrapperPass>(); } static char ID; // Pass identification. }; diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index a29fc21..606022a9 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCSectionDXContainer.h" #include "llvm/MC/SectionKind.h" #include "llvm/MC/TargetRegistry.h" @@ -36,6 +37,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #include <optional> using namespace llvm; @@ -44,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget()); auto *PR = PassRegistry::getPassRegistry(); initializeDXILIntrinsicExpansionLegacyPass(*PR); + initializeScalarizerLegacyPassPass(*PR); initializeDXILPrepareModulePass(*PR); initializeEmbedDXILPassPass(*PR); initializeWriteDXILPassPass(*PR); @@ -83,6 +86,9 @@ public: FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } void addCodeGenPrepare() override { addPass(createDXILIntrinsicExpansionLegacyPass()); + ScalarizerPassOptions DxilScalarOptions; + DxilScalarOptions.ScalarizeLoadStore = true; + addPass(createScalarizerPass(DxilScalarOptions)); addPass(createDXILOpLoweringLegacyPass()); addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createDXILTranslateMetadataLegacyPass()); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 9723789..6177884 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -337,6 +337,8 @@ private: bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVectorImpl<CCValAssign> &ArgLocs) const; + + bool softPromoteHalfType() const override { return true; } }; } // end namespace llvm diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp index 3692384..bf47361 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp @@ -24,9 +24,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" #include "NVPTXUtilities.h" -#include "MCTargetDesc/NVPTXBaseInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -55,8 +55,8 @@ public: char NVPTXLowerAlloca::ID = 1; -INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", - "Lower Alloca", false, false) +INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", "Lower Alloca", false, + false) // ============================================================================= // Main function for this pass. @@ -70,14 +70,38 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) { for (auto &I : BB) { if (auto allocaInst = dyn_cast<AllocaInst>(&I)) { Changed = true; + + PointerType *AllocInstPtrTy = + cast<PointerType>(allocaInst->getType()->getScalarType()); + unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace(); + assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC || + AllocAddrSpace == ADDRESS_SPACE_LOCAL) && + "AllocaInst can only be in Generic or Local address space for " + "NVPTX."); + + Instruction *AllocaInLocalAS = allocaInst; auto ETy = allocaInst->getAllocatedType(); - auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); - auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, ""); - auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); - auto NewASCToGeneric = - new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, ""); - NewASCToLocal->insertAfter(allocaInst); - NewASCToGeneric->insertAfter(NewASCToLocal); + + // We need to make sure that LLVM has info that alloca needs to go to + // ADDRESS_SPACE_LOCAL for InferAddressSpace pass. + // + // For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to + // ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that + // the alloca's users still use a generic pointer to operate on. + // + // For allocas already in ADDRESS_SPACE_LOCAL, we just need + // addrspacecast to ADDRESS_SPACE_GENERIC. + if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) { + auto ASCastToLocalAS = new AddrSpaceCastInst( + allocaInst, PointerType::get(ETy, ADDRESS_SPACE_LOCAL), ""); + ASCastToLocalAS->insertAfter(allocaInst); + AllocaInLocalAS = ASCastToLocalAS; + } + + auto AllocaInGenericAS = new AddrSpaceCastInst( + AllocaInLocalAS, PointerType::get(ETy, ADDRESS_SPACE_GENERIC), ""); + AllocaInGenericAS->insertAfter(AllocaInLocalAS); + for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) { // Check Load, Store, GEP, and BitCast Uses on alloca and make them // use the converted generic address, in order to expose non-generic @@ -87,23 +111,23 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) { auto LI = dyn_cast<LoadInst>(AllocaUse.getUser()); if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) { - LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); + LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS); continue; } auto SI = dyn_cast<StoreInst>(AllocaUse.getUser()); if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) { - SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric); + SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS); continue; } auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser()); if (GI && GI->getPointerOperand() == allocaInst) { - GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); + GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS); continue; } auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser()); if (BI && BI->getOperand(0) == allocaInst) { - BI->setOperand(0, NewASCToGeneric); + BI->setOperand(0, AllocaInGenericAS); continue; } } diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index fa141c3..c802274 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -348,6 +348,11 @@ def FeatureStdExtZfinx def HasStdExtZfinx : Predicate<"Subtarget->hasStdExtZfinx()">, AssemblerPredicate<(all_of FeatureStdExtZfinx), "'Zfinx' (Float in Integer)">; +def HasStdExtFOrZfinx : Predicate<"Subtarget->hasStdExtFOrZfinx()">, + AssemblerPredicate<(any_of FeatureStdExtF, + FeatureStdExtZfinx), + "'F' (Single-Precision Floating-Point) or " + "'Zfinx' (Float in Integer)">; def FeatureStdExtZdinx : RISCVExtension<"zdinx", 1, 0, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 6d0952a..fe5623e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -863,8 +863,6 @@ def DRET : Priv<"dret", 0b0111101>, Sched<[]> { // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) //===----------------------------------------------------------------------===// -def : InstAlias<"nop", (ADDI X0, X0, 0)>; - // Note that the size is 32 because up to 8 32-bit instructions are needed to // generate an arbitrary 64-bit immediate. However, the size does not really // matter since PseudoLI is currently only used in the AsmParser where it gets @@ -890,8 +888,10 @@ def PseudoLD : PseudoLoad<"ld">; def PseudoSD : PseudoStore<"sd">; } // Predicates = [IsRV64] -def : InstAlias<"li $rd, $imm", (ADDI GPR:$rd, X0, simm12:$imm)>; -def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>; +def : InstAlias<"nop", (ADDI X0, X0, 0), 3>; +def : InstAlias<"li $rd, $imm", (ADDI GPR:$rd, X0, simm12:$imm), 2>; +def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>; + def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>; def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>; @@ -961,14 +961,14 @@ def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw let Predicates = [HasStdExtZihintpause] in def : InstAlias<"pause", (FENCE 0x1, 0x0)>; // 0x1 == w -def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0)>; -def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, CYCLE.Encoding, X0)>; -def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, TIME.Encoding, X0)>; +def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0), 2>; +def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, CYCLE.Encoding, X0), 2>; +def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, TIME.Encoding, X0), 2>; let Predicates = [IsRV32] in { -def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, INSTRETH.Encoding, X0)>; -def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, CYCLEH.Encoding, X0)>; -def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, TIMEH.Encoding, X0)>; +def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, INSTRETH.Encoding, X0), 2>; +def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, CYCLEH.Encoding, X0), 2>; +def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, TIMEH.Encoding, X0), 2>; } // Predicates = [IsRV32] def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, csr_sysreg:$csr, X0)>; @@ -990,13 +990,13 @@ def : InstAlias<"csrrs $rd, $csr, $imm", (CSRRSI GPR:$rd, csr_sysreg:$csr, uimm5 def : InstAlias<"csrrc $rd, $csr, $imm", (CSRRCI GPR:$rd, csr_sysreg:$csr, uimm5:$imm)>; } -def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>; +def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0), 2>; def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>; -def : InstAlias<"hfence.gvma", (HFENCE_GVMA X0, X0)>; +def : InstAlias<"hfence.gvma", (HFENCE_GVMA X0, X0), 2>; def : InstAlias<"hfence.gvma $rs", (HFENCE_GVMA GPR:$rs, X0)>; -def : InstAlias<"hfence.vvma", (HFENCE_VVMA X0, X0)>; +def : InstAlias<"hfence.vvma", (HFENCE_VVMA X0, X0), 2>; def : InstAlias<"hfence.vvma $rs", (HFENCE_VVMA GPR:$rs, X0)>; let Predicates = [HasStdExtZihintntl] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 1442bc1..a00acb3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -400,23 +400,10 @@ def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">, // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtF] in { -def : InstAlias<"flw $rd, (${rs1})", (FLW FPR32:$rd, GPR:$rs1, 0), 0>; -def : InstAlias<"fsw $rs2, (${rs1})", (FSW FPR32:$rs2, GPR:$rs1, 0), 0>; - -def : InstAlias<"fmv.s $rd, $rs", (FSGNJ_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; -def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; -def : InstAlias<"fneg.s $rd, $rs", (FSGNJN_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; - -// fgt.s/fge.s are recognised by the GNU assembler but the canonical -// flt.s/fle.s forms will always be printed. Therefore, set a zero weight. -def : InstAlias<"fgt.s $rd, $rs, $rt", - (FLT_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>; -def : InstAlias<"fge.s $rd, $rs, $rt", - (FLE_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>; - +let Predicates = [HasStdExtFOrZfinx] in { // The following csr instructions actually alias instructions from the base ISA. -// However, it only makes sense to support them when the F extension is enabled. +// However, it only makes sense to support them when the F or Zfinx extension is +// enabled. // NOTE: "frcsr", "frrm", and "frflags" are more specialized version of "csrr". def : InstAlias<"frcsr $rd", (CSRRS GPR:$rd, SysRegFCSR.Encoding, X0), 2>; def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, SysRegFCSR.Encoding, GPR:$rs)>; @@ -439,6 +426,22 @@ def : InstAlias<"fsflags $rd, $rs", (CSRRW GPR:$rd, SysRegFFLAGS.Encoding, GP def : InstAlias<"fsflags $rs", (CSRRW X0, SysRegFFLAGS.Encoding, GPR:$rs), 2>; def : InstAlias<"fsflagsi $rd, $imm", (CSRRWI GPR:$rd, SysRegFFLAGS.Encoding, uimm5:$imm)>; def : InstAlias<"fsflagsi $imm", (CSRRWI X0, SysRegFFLAGS.Encoding, uimm5:$imm), 2>; +} // Predicates = [HasStdExtFOrZfinx] + +let Predicates = [HasStdExtF] in { +def : InstAlias<"flw $rd, (${rs1})", (FLW FPR32:$rd, GPR:$rs1, 0), 0>; +def : InstAlias<"fsw $rs2, (${rs1})", (FSW FPR32:$rs2, GPR:$rs1, 0), 0>; + +def : InstAlias<"fmv.s $rd, $rs", (FSGNJ_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; +def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; +def : InstAlias<"fneg.s $rd, $rs", (FSGNJN_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; + +// fgt.s/fge.s are recognised by the GNU assembler but the canonical +// flt.s/fle.s forms will always be printed. Therefore, set a zero weight. +def : InstAlias<"fgt.s $rd, $rs, $rt", + (FLT_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>; +def : InstAlias<"fge.s $rd, $rs, $rt", + (FLE_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>; // fmv.w.x and fmv.x.w were previously known as fmv.s.x and fmv.x.s. Both // spellings should be supported by standard tools. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index 3bd6da2..b586b10 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -203,9 +203,9 @@ let Predicates = [HasVendorXCValu, IsRV32], // General ALU Operations def CV_ABS : CVInstAluR<0b0101000, 0b011, "cv.abs">, Sched<[]>; - def CV_SLET : CVInstAluRR<0b0101001, 0b011, "cv.slet">, + def CV_SLE : CVInstAluRR<0b0101001, 0b011, "cv.sle">, Sched<[]>; - def CV_SLETU : CVInstAluRR<0b0101010, 0b011, "cv.sletu">, + def CV_SLEU : CVInstAluRR<0b0101010, 0b011, "cv.sleu">, Sched<[]>; def CV_MIN : CVInstAluRR<0b0101011, 0b011, "cv.min">, Sched<[]>; @@ -276,6 +276,10 @@ let Predicates = [HasVendorXCValu, IsRV32], // hasSideEffects = 0, mayLoad = 0, mayStore = 0, // Constraints = "$rd = $rd_wb" +let Predicates = [HasVendorXCValu, IsRV32] in { + def : MnemonicAlias<"cv.slet", "cv.sle">; + def : MnemonicAlias<"cv.sletu", "cv.sleu">; +} class CVInstSIMDRR<bits<5> funct5, bit F, bit funct1, bits<3> funct3, RISCVOpcode opcode, dag outs, @@ -778,8 +782,8 @@ multiclass PatCoreVAluGprGprImm<Intrinsic intr> { let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in { def : PatGpr<abs, CV_ABS>; - def : PatGprGpr<setle, CV_SLET>; - def : PatGprGpr<setule, CV_SLETU>; + def : PatGprGpr<setle, CV_SLE>; + def : PatGprGpr<setule, CV_SLEU>; def : PatGprGpr<smin, CV_MIN>; def : PatGprGpr<umin, CV_MINU>; def : PatGprGpr<smax, CV_MAX>; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 831d7f7..b526c9f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -263,6 +263,9 @@ private: bool selectSpvThreadId(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectStep(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectUnmergeValues(MachineInstr &I) const; Register buildI32Constant(uint32_t Val, MachineInstr &I, @@ -1710,6 +1713,25 @@ bool SPIRVInstructionSelector::selectSign(Register ResVReg, return Result; } +bool SPIRVInstructionSelector::selectStep(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + + assert(I.getNumOperands() == 4); + assert(I.getOperand(2).isReg()); + assert(I.getOperand(3).isReg()); + MachineBasicBlock &BB = *I.getParent(); + + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450)) + .addImm(GL::Step) + .addUse(I.getOperand(2).getReg()) + .addUse(I.getOperand(3).getReg()) + .constrainAllUses(TII, TRI, RBI); +} + bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { @@ -2468,6 +2490,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, .addUse(GR.getSPIRVTypeID(ResType)) .addUse(GR.getOrCreateConstInt(3, I, IntTy, TII)); } + case Intrinsic::spv_step: + return selectStep(ResVReg, ResType, I); default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td index 420c429..6181ee8 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver4.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td @@ -1855,6 +1855,14 @@ def Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[ // NOTE: PXORrr,PANDNrr are not zero-cycle! def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>; +// TODO: This should be extended to incorporate all of the AVX512 zeroing +// idioms that can be executed by the renamer. +def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicZ]> +]>; +def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr)>; + def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>, SchedVar<NoSchedPred, [WriteVecLogicY]> @@ -1930,6 +1938,12 @@ def : IsZeroIdiomFunction<[ VPSUBUSBYrr, VPSUBUSWYrr, VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr ], ZeroIdiomPredicate>, + + // AVX ZMM Zero-idioms. + // TODO: This should be expanded to incorporate all AVX512 zeroing idioms. + DepBreakingClass<[ + VPXORDZrr + ], ZeroIdiomPredicate>, ]>; def : IsDepBreakingFunction<[ diff --git a/llvm/lib/Transforms/Coroutines/CMakeLists.txt b/llvm/lib/Transforms/Coroutines/CMakeLists.txt index c650817..46ef5cd 100644 --- a/llvm/lib/Transforms/Coroutines/CMakeLists.txt +++ b/llvm/lib/Transforms/Coroutines/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMCoroutines CoroSplit.cpp SuspendCrossingInfo.cpp SpillUtils.cpp + MaterializationUtils.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Coroutines diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 8ee4bfa..c08f56b 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -16,10 +16,10 @@ //===----------------------------------------------------------------------===// #include "CoroInternal.h" +#include "MaterializationUtils.h" #include "SpillUtils.h" #include "SuspendCrossingInfo.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/StackLifetime.h" @@ -36,135 +36,12 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> -#include <deque> #include <optional> using namespace llvm; extern cl::opt<bool> UseNewDbgInfoFormat; -// The "coro-suspend-crossing" flag is very noisy. There is another debug type, -// "coro-frame", which results in leaner debug spew. -#define DEBUG_TYPE "coro-suspend-crossing" - -namespace { - -// RematGraph is used to construct a DAG for rematerializable instructions -// When the constructor is invoked with a candidate instruction (which is -// materializable) it builds a DAG of materializable instructions from that -// point. -// Typically, for each instruction identified as re-materializable across a -// suspend point, a RematGraph will be created. -struct RematGraph { - // Each RematNode in the graph contains the edges to instructions providing - // operands in the current node. - struct RematNode { - Instruction *Node; - SmallVector<RematNode *> Operands; - RematNode() = default; - RematNode(Instruction *V) : Node(V) {} - }; - - RematNode *EntryNode; - using RematNodeMap = - SmallMapVector<Instruction *, std::unique_ptr<RematNode>, 8>; - RematNodeMap Remats; - const std::function<bool(Instruction &)> &MaterializableCallback; - SuspendCrossingInfo &Checker; - - RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback, - Instruction *I, SuspendCrossingInfo &Checker) - : MaterializableCallback(MaterializableCallback), Checker(Checker) { - std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(I); - EntryNode = FirstNode.get(); - std::deque<std::unique_ptr<RematNode>> WorkList; - addNode(std::move(FirstNode), WorkList, cast<User>(I)); - while (WorkList.size()) { - std::unique_ptr<RematNode> N = std::move(WorkList.front()); - WorkList.pop_front(); - addNode(std::move(N), WorkList, cast<User>(I)); - } - } - - void addNode(std::unique_ptr<RematNode> NUPtr, - std::deque<std::unique_ptr<RematNode>> &WorkList, - User *FirstUse) { - RematNode *N = NUPtr.get(); - if (Remats.count(N->Node)) - return; - - // We haven't see this node yet - add to the list - Remats[N->Node] = std::move(NUPtr); - for (auto &Def : N->Node->operands()) { - Instruction *D = dyn_cast<Instruction>(Def.get()); - if (!D || !MaterializableCallback(*D) || - !Checker.isDefinitionAcrossSuspend(*D, FirstUse)) - continue; - - if (Remats.count(D)) { - // Already have this in the graph - N->Operands.push_back(Remats[D].get()); - continue; - } - - bool NoMatch = true; - for (auto &I : WorkList) { - if (I->Node == D) { - NoMatch = false; - N->Operands.push_back(I.get()); - break; - } - } - if (NoMatch) { - // Create a new node - std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(D); - N->Operands.push_back(ChildNode.get()); - WorkList.push_back(std::move(ChildNode)); - } - } - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - static std::string getBasicBlockLabel(const BasicBlock *BB) { - if (BB->hasName()) - return BB->getName().str(); - - std::string S; - raw_string_ostream OS(S); - BB->printAsOperand(OS, false); - return OS.str().substr(1); - } - - void dump() const { - dbgs() << "Entry ("; - dbgs() << getBasicBlockLabel(EntryNode->Node->getParent()); - dbgs() << ") : " << *EntryNode->Node << "\n"; - for (auto &E : Remats) { - dbgs() << *(E.first) << "\n"; - for (RematNode *U : E.second->Operands) - dbgs() << " " << *U->Node << "\n"; - } - } -#endif -}; -} // end anonymous namespace - -namespace llvm { - -template <> struct GraphTraits<RematGraph *> { - using NodeRef = RematGraph::RematNode *; - using ChildIteratorType = RematGraph::RematNode **; - - static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; } - static ChildIteratorType child_begin(NodeRef N) { - return N->Operands.begin(); - } - static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); } -}; - -} // end namespace llvm - -#undef DEBUG_TYPE // "coro-suspend-crossing" #define DEBUG_TYPE "coro-frame" namespace { @@ -268,15 +145,6 @@ static void dumpSpills(StringRef Title, const coro::SpillInfo &Spills) { I->dump(); } } -static void dumpRemats( - StringRef Title, - const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) { - dbgs() << "------------- " << Title << "--------------\n"; - for (const auto &E : RM) { - E.second->dump(); - dbgs() << "--\n"; - } -} static void dumpAllocas(const SmallVectorImpl<coro::AllocaInfo> &Allocas) { dbgs() << "------------- Allocas --------------\n"; @@ -1284,7 +1152,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { // If we have a single edge PHINode, remove it and replace it with a // reload from the coroutine frame. (We already took care of multi edge - // PHINodes by rewriting them in the rewritePHIs function). + // PHINodes by normalizing them in the rewritePHIs function). if (auto *PN = dyn_cast<PHINode>(U)) { assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " @@ -1634,93 +1502,6 @@ static void rewritePHIs(Function &F) { rewritePHIs(*BB); } -/// Default materializable callback -// Check for instructions that we can recreate on resume as opposed to spill -// the result into a coroutine frame. -bool coro::defaultMaterializable(Instruction &V) { - return (isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) || - isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V)); -} - -// For each instruction identified as materializable across the suspend point, -// and its associated DAG of other rematerializable instructions, -// recreate the DAG of instructions after the suspend point. -static void rewriteMaterializableInstructions( - const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> - &AllRemats) { - // This has to be done in 2 phases - // Do the remats and record the required defs to be replaced in the - // original use instructions - // Once all the remats are complete, replace the uses in the final - // instructions with the new defs - typedef struct { - Instruction *Use; - Instruction *Def; - Instruction *Remat; - } ProcessNode; - - SmallVector<ProcessNode> FinalInstructionsToProcess; - - for (const auto &E : AllRemats) { - Instruction *Use = E.first; - Instruction *CurrentMaterialization = nullptr; - RematGraph *RG = E.second.get(); - ReversePostOrderTraversal<RematGraph *> RPOT(RG); - SmallVector<Instruction *> InstructionsToProcess; - - // If the target use is actually a suspend instruction then we have to - // insert the remats into the end of the predecessor (there should only be - // one). This is so that suspend blocks always have the suspend instruction - // as the first instruction. - auto InsertPoint = &*Use->getParent()->getFirstInsertionPt(); - if (isa<AnyCoroSuspendInst>(Use)) { - BasicBlock *SuspendPredecessorBlock = - Use->getParent()->getSinglePredecessor(); - assert(SuspendPredecessorBlock && "malformed coro suspend instruction"); - InsertPoint = SuspendPredecessorBlock->getTerminator(); - } - - // Note: skip the first instruction as this is the actual use that we're - // rematerializing everything for. - auto I = RPOT.begin(); - ++I; - for (; I != RPOT.end(); ++I) { - Instruction *D = (*I)->Node; - CurrentMaterialization = D->clone(); - CurrentMaterialization->setName(D->getName()); - CurrentMaterialization->insertBefore(InsertPoint); - InsertPoint = CurrentMaterialization; - - // Replace all uses of Def in the instructions being added as part of this - // rematerialization group - for (auto &I : InstructionsToProcess) - I->replaceUsesOfWith(D, CurrentMaterialization); - - // Don't replace the final use at this point as this can cause problems - // for other materializations. Instead, for any final use that uses a - // define that's being rematerialized, record the replace values - for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i) - if (Use->getOperand(i) == D) // Is this operand pointing to oldval? - FinalInstructionsToProcess.push_back( - {Use, D, CurrentMaterialization}); - - InstructionsToProcess.push_back(CurrentMaterialization); - } - } - - // Finally, replace the uses with the defines that we've just rematerialized - for (auto &R : FinalInstructionsToProcess) { - if (auto *PN = dyn_cast<PHINode>(R.Use)) { - assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " - "values in the PHINode"); - PN->replaceAllUsesWith(R.Remat); - PN->eraseFromParent(); - continue; - } - R.Use->replaceUsesOfWith(R.Def, R.Remat); - } -} - // Splits the block at a particular instruction unless it is the first // instruction in the block with a single predecessor. static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { @@ -1741,10 +1522,6 @@ static void splitAround(Instruction *I, const Twine &Name) { splitBlockIfNotFirst(I->getNextNode(), "After" + Name); } -static bool isSuspendBlock(BasicBlock *BB) { - return isa<AnyCoroSuspendInst>(BB->front()); -} - /// After we split the coroutine, will the given basic block be along /// an obvious exit path for the resumption function? static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, @@ -1754,7 +1531,7 @@ static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, if (depth == 0) return false; // If this is a suspend block, we're about to exit the resumption function. - if (isSuspendBlock(BB)) + if (coro::isSuspendBlock(BB)) return true; // Recurse into the successors. @@ -1995,7 +1772,8 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, DomSet.insert(&F.getEntryBlock()); for (auto *CSI : Shape.CoroSuspends) { BasicBlock *SuspendBlock = CSI->getParent(); - assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() && + assert(coro::isSuspendBlock(SuspendBlock) && + SuspendBlock->getSingleSuccessor() && "should have split coro.suspend into its own block"); DomSet.insert(SuspendBlock->getSingleSuccessor()); } @@ -2227,68 +2005,6 @@ void coro::salvageDebugInfo( } } -static void doRematerializations( - Function &F, SuspendCrossingInfo &Checker, - const std::function<bool(Instruction &)> &MaterializableCallback) { - if (F.hasOptNone()) - return; - - coro::SpillInfo Spills; - - // See if there are materializable instructions across suspend points - // We record these as the starting point to also identify materializable - // defs of uses in these operations - for (Instruction &I : instructions(F)) { - if (!MaterializableCallback(I)) - continue; - for (User *U : I.users()) - if (Checker.isDefinitionAcrossSuspend(I, U)) - Spills[&I].push_back(cast<Instruction>(U)); - } - - // Process each of the identified rematerializable instructions - // and add predecessor instructions that can also be rematerialized. - // This is actually a graph of instructions since we could potentially - // have multiple uses of a def in the set of predecessor instructions. - // The approach here is to maintain a graph of instructions for each bottom - // level instruction - where we have a unique set of instructions (nodes) - // and edges between them. We then walk the graph in reverse post-dominator - // order to insert them past the suspend point, but ensure that ordering is - // correct. We also rely on CSE removing duplicate defs for remats of - // different instructions with a def in common (rather than maintaining more - // complex graphs for each suspend point) - - // We can do this by adding new nodes to the list for each suspend - // point. Then using standard GraphTraits to give a reverse post-order - // traversal when we insert the nodes after the suspend - SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> AllRemats; - for (auto &E : Spills) { - for (Instruction *U : E.second) { - // Don't process a user twice (this can happen if the instruction uses - // more than one rematerializable def) - if (AllRemats.count(U)) - continue; - - // Constructor creates the whole RematGraph for the given Use - auto RematUPtr = - std::make_unique<RematGraph>(MaterializableCallback, U, Checker); - - LLVM_DEBUG(dbgs() << "***** Next remat group *****\n"; - ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get()); - for (auto I = RPOT.begin(); I != RPOT.end(); - ++I) { (*I)->Node->dump(); } dbgs() - << "\n";); - - AllRemats[U] = std::move(RematUPtr); - } - } - - // Rewrite materializable instructions to be materialized at the use - // point. - LLVM_DEBUG(dumpRemats("Materializations", AllRemats)); - rewriteMaterializableInstructions(AllRemats); -} - void coro::normalizeCoroutine(Function &F, coro::Shape &Shape, TargetTransformInfo &TTI) { // Don't eliminate swifterror in async functions that won't be split. @@ -2324,8 +2040,8 @@ void coro::normalizeCoroutine(Function &F, coro::Shape &Shape, IRBuilder<> Builder(AsyncEnd); SmallVector<Value *, 8> Args(AsyncEnd->args()); auto Arguments = ArrayRef<Value *>(Args).drop_front(3); - auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn, - TTI, Arguments, Builder); + auto *Call = coro::createMustTailCall( + AsyncEnd->getDebugLoc(), MustTailCallFn, TTI, Arguments, Builder); splitAround(Call, "MustTailCall.Before.CoroEnd"); } } diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 698c21a..891798f 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -21,6 +21,7 @@ class CallGraph; namespace coro { +bool isSuspendBlock(BasicBlock *BB); bool declaresAnyIntrinsic(const Module &M); bool declaresIntrinsics(const Module &M, const std::initializer_list<StringRef>); diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index dc3829d..8ea460b 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -2080,12 +2080,13 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, return Shape; } -static void updateCallGraphAfterCoroutineSplit( +static LazyCallGraph::SCC &updateCallGraphAfterCoroutineSplit( LazyCallGraph::Node &N, const coro::Shape &Shape, const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, LazyCallGraph &CG, CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, FunctionAnalysisManager &FAM) { + auto *CurrentSCC = &C; if (!Clones.empty()) { switch (Shape.ABI) { case coro::ABI::Switch: @@ -2105,13 +2106,16 @@ static void updateCallGraphAfterCoroutineSplit( } // Let the CGSCC infra handle the changes to the original function. - updateCGAndAnalysisManagerForCGSCCPass(CG, C, N, AM, UR, FAM); + CurrentSCC = &updateCGAndAnalysisManagerForCGSCCPass(CG, *CurrentSCC, N, AM, + UR, FAM); } // Do some cleanup and let the CGSCC infra see if we've cleaned up any edges // to the split functions. postSplitCleanup(N.getFunction()); - updateCGAndAnalysisManagerForFunctionPass(CG, C, N, AM, UR, FAM); + CurrentSCC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentSCC, N, + AM, UR, FAM); + return *CurrentSCC; } /// Replace a call to llvm.coro.prepare.retcon. @@ -2200,6 +2204,7 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, if (Coroutines.empty() && PrepareFns.empty()) return PreservedAnalyses::all(); + auto *CurrentSCC = &C; // Split all the coroutines. for (LazyCallGraph::Node *N : Coroutines) { Function &F = N->getFunction(); @@ -2211,7 +2216,8 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, coro::Shape Shape = splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F), OptimizeFrame, MaterializableCallback); - updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); + CurrentSCC = &updateCallGraphAfterCoroutineSplit( + *N, Shape, Clones, *CurrentSCC, CG, AM, UR, FAM); auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); ORE.emit([&]() { @@ -2223,14 +2229,14 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, if (!Shape.CoroSuspends.empty()) { // Run the CGSCC pipeline on the original and newly split functions. - UR.CWorklist.insert(&C); + UR.CWorklist.insert(CurrentSCC); for (Function *Clone : Clones) UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone))); } } for (auto *PrepareFn : PrepareFns) { - replaceAllPrepares(PrepareFn, CG, C); + replaceAllPrepares(PrepareFn, CG, *CurrentSCC); } return PreservedAnalyses::none(); diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index be25733..cdc442b 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -100,6 +100,10 @@ static bool isCoroutineIntrinsicName(StringRef Name) { } #endif +bool coro::isSuspendBlock(BasicBlock *BB) { + return isa<AnyCoroSuspendInst>(BB->front()); +} + bool coro::declaresAnyIntrinsic(const Module &M) { for (StringRef Name : CoroIntrinsics) { assert(isCoroutineIntrinsicName(Name) && "not a coroutine intrinsic"); diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp new file mode 100644 index 0000000..708e873 --- /dev/null +++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp @@ -0,0 +1,308 @@ +//===- MaterializationUtils.cpp - Builds and manipulates coroutine frame +//-------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file contains classes used to materialize insts after suspends points. +//===----------------------------------------------------------------------===// + +#include "MaterializationUtils.h" +#include "SpillUtils.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include <deque> + +using namespace llvm; + +using namespace coro; + +// The "coro-suspend-crossing" flag is very noisy. There is another debug type, +// "coro-frame", which results in leaner debug spew. +#define DEBUG_TYPE "coro-suspend-crossing" + +namespace { + +// RematGraph is used to construct a DAG for rematerializable instructions +// When the constructor is invoked with a candidate instruction (which is +// materializable) it builds a DAG of materializable instructions from that +// point. +// Typically, for each instruction identified as re-materializable across a +// suspend point, a RematGraph will be created. +struct RematGraph { + // Each RematNode in the graph contains the edges to instructions providing + // operands in the current node. + struct RematNode { + Instruction *Node; + SmallVector<RematNode *> Operands; + RematNode() = default; + RematNode(Instruction *V) : Node(V) {} + }; + + RematNode *EntryNode; + using RematNodeMap = + SmallMapVector<Instruction *, std::unique_ptr<RematNode>, 8>; + RematNodeMap Remats; + const std::function<bool(Instruction &)> &MaterializableCallback; + SuspendCrossingInfo &Checker; + + RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback, + Instruction *I, SuspendCrossingInfo &Checker) + : MaterializableCallback(MaterializableCallback), Checker(Checker) { + std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(I); + EntryNode = FirstNode.get(); + std::deque<std::unique_ptr<RematNode>> WorkList; + addNode(std::move(FirstNode), WorkList, cast<User>(I)); + while (WorkList.size()) { + std::unique_ptr<RematNode> N = std::move(WorkList.front()); + WorkList.pop_front(); + addNode(std::move(N), WorkList, cast<User>(I)); + } + } + + void addNode(std::unique_ptr<RematNode> NUPtr, + std::deque<std::unique_ptr<RematNode>> &WorkList, + User *FirstUse) { + RematNode *N = NUPtr.get(); + if (Remats.count(N->Node)) + return; + + // We haven't see this node yet - add to the list + Remats[N->Node] = std::move(NUPtr); + for (auto &Def : N->Node->operands()) { + Instruction *D = dyn_cast<Instruction>(Def.get()); + if (!D || !MaterializableCallback(*D) || + !Checker.isDefinitionAcrossSuspend(*D, FirstUse)) + continue; + + if (Remats.count(D)) { + // Already have this in the graph + N->Operands.push_back(Remats[D].get()); + continue; + } + + bool NoMatch = true; + for (auto &I : WorkList) { + if (I->Node == D) { + NoMatch = false; + N->Operands.push_back(I.get()); + break; + } + } + if (NoMatch) { + // Create a new node + std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(D); + N->Operands.push_back(ChildNode.get()); + WorkList.push_back(std::move(ChildNode)); + } + } + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + static std::string getBasicBlockLabel(const BasicBlock *BB) { + if (BB->hasName()) + return BB->getName().str(); + + std::string S; + raw_string_ostream OS(S); + BB->printAsOperand(OS, false); + return OS.str().substr(1); + } + + void dump() const { + dbgs() << "Entry ("; + dbgs() << getBasicBlockLabel(EntryNode->Node->getParent()); + dbgs() << ") : " << *EntryNode->Node << "\n"; + for (auto &E : Remats) { + dbgs() << *(E.first) << "\n"; + for (RematNode *U : E.second->Operands) + dbgs() << " " << *U->Node << "\n"; + } + } +#endif +}; + +} // namespace + +namespace llvm { +template <> struct GraphTraits<RematGraph *> { + using NodeRef = RematGraph::RematNode *; + using ChildIteratorType = RematGraph::RematNode **; + + static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; } + static ChildIteratorType child_begin(NodeRef N) { + return N->Operands.begin(); + } + static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); } +}; + +} // end namespace llvm + +// For each instruction identified as materializable across the suspend point, +// and its associated DAG of other rematerializable instructions, +// recreate the DAG of instructions after the suspend point. +static void rewriteMaterializableInstructions( + const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> + &AllRemats) { + // This has to be done in 2 phases + // Do the remats and record the required defs to be replaced in the + // original use instructions + // Once all the remats are complete, replace the uses in the final + // instructions with the new defs + typedef struct { + Instruction *Use; + Instruction *Def; + Instruction *Remat; + } ProcessNode; + + SmallVector<ProcessNode> FinalInstructionsToProcess; + + for (const auto &E : AllRemats) { + Instruction *Use = E.first; + Instruction *CurrentMaterialization = nullptr; + RematGraph *RG = E.second.get(); + ReversePostOrderTraversal<RematGraph *> RPOT(RG); + SmallVector<Instruction *> InstructionsToProcess; + + // If the target use is actually a suspend instruction then we have to + // insert the remats into the end of the predecessor (there should only be + // one). This is so that suspend blocks always have the suspend instruction + // as the first instruction. + auto InsertPoint = &*Use->getParent()->getFirstInsertionPt(); + if (isa<AnyCoroSuspendInst>(Use)) { + BasicBlock *SuspendPredecessorBlock = + Use->getParent()->getSinglePredecessor(); + assert(SuspendPredecessorBlock && "malformed coro suspend instruction"); + InsertPoint = SuspendPredecessorBlock->getTerminator(); + } + + // Note: skip the first instruction as this is the actual use that we're + // rematerializing everything for. + auto I = RPOT.begin(); + ++I; + for (; I != RPOT.end(); ++I) { + Instruction *D = (*I)->Node; + CurrentMaterialization = D->clone(); + CurrentMaterialization->setName(D->getName()); + CurrentMaterialization->insertBefore(InsertPoint); + InsertPoint = CurrentMaterialization; + + // Replace all uses of Def in the instructions being added as part of this + // rematerialization group + for (auto &I : InstructionsToProcess) + I->replaceUsesOfWith(D, CurrentMaterialization); + + // Don't replace the final use at this point as this can cause problems + // for other materializations. Instead, for any final use that uses a + // define that's being rematerialized, record the replace values + for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i) + if (Use->getOperand(i) == D) // Is this operand pointing to oldval? + FinalInstructionsToProcess.push_back( + {Use, D, CurrentMaterialization}); + + InstructionsToProcess.push_back(CurrentMaterialization); + } + } + + // Finally, replace the uses with the defines that we've just rematerialized + for (auto &R : FinalInstructionsToProcess) { + if (auto *PN = dyn_cast<PHINode>(R.Use)) { + assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " + "values in the PHINode"); + PN->replaceAllUsesWith(R.Remat); + PN->eraseFromParent(); + continue; + } + R.Use->replaceUsesOfWith(R.Def, R.Remat); + } +} + +/// Default materializable callback +// Check for instructions that we can recreate on resume as opposed to spill +// the result into a coroutine frame. +bool llvm::coro::defaultMaterializable(Instruction &V) { + return (isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) || + isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V)); +} + +bool llvm::coro::isTriviallyMaterializable(Instruction &V) { + return defaultMaterializable(V); +} + +#ifndef NDEBUG +static void dumpRemats( + StringRef Title, + const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) { + dbgs() << "------------- " << Title << "--------------\n"; + for (const auto &E : RM) { + E.second->dump(); + dbgs() << "--\n"; + } +} +#endif + +void coro::doRematerializations( + Function &F, SuspendCrossingInfo &Checker, + std::function<bool(Instruction &)> IsMaterializable) { + if (F.hasOptNone()) + return; + + coro::SpillInfo Spills; + + // See if there are materializable instructions across suspend points + // We record these as the starting point to also identify materializable + // defs of uses in these operations + for (Instruction &I : instructions(F)) { + if (!IsMaterializable(I)) + continue; + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) + Spills[&I].push_back(cast<Instruction>(U)); + } + + // Process each of the identified rematerializable instructions + // and add predecessor instructions that can also be rematerialized. + // This is actually a graph of instructions since we could potentially + // have multiple uses of a def in the set of predecessor instructions. + // The approach here is to maintain a graph of instructions for each bottom + // level instruction - where we have a unique set of instructions (nodes) + // and edges between them. We then walk the graph in reverse post-dominator + // order to insert them past the suspend point, but ensure that ordering is + // correct. We also rely on CSE removing duplicate defs for remats of + // different instructions with a def in common (rather than maintaining more + // complex graphs for each suspend point) + + // We can do this by adding new nodes to the list for each suspend + // point. Then using standard GraphTraits to give a reverse post-order + // traversal when we insert the nodes after the suspend + SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> AllRemats; + for (auto &E : Spills) { + for (Instruction *U : E.second) { + // Don't process a user twice (this can happen if the instruction uses + // more than one rematerializable def) + if (AllRemats.count(U)) + continue; + + // Constructor creates the whole RematGraph for the given Use + auto RematUPtr = + std::make_unique<RematGraph>(IsMaterializable, U, Checker); + + LLVM_DEBUG(dbgs() << "***** Next remat group *****\n"; + ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get()); + for (auto I = RPOT.begin(); I != RPOT.end(); + ++I) { (*I)->Node->dump(); } dbgs() + << "\n";); + + AllRemats[U] = std::move(RematUPtr); + } + } + + // Rewrite materializable instructions to be materialized at the use + // point. + LLVM_DEBUG(dumpRemats("Materializations", AllRemats)); + rewriteMaterializableInstructions(AllRemats); +} diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.h b/llvm/lib/Transforms/Coroutines/MaterializationUtils.h new file mode 100644 index 0000000..f391851 --- /dev/null +++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.h @@ -0,0 +1,30 @@ +//===- MaterializationUtils.h - Utilities for doing materialization -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SuspendCrossingInfo.h" +#include "llvm/IR/Instruction.h" + +#ifndef LIB_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H +#define LIB_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H + +namespace llvm { + +namespace coro { + +// True if I is trivially rematerialzable, e.g. InsertElementInst +bool isTriviallyMaterializable(Instruction &I); + +// Performs rematerialization, invoked from buildCoroutineFrame. +void doRematerializations(Function &F, SuspendCrossingInfo &Checker, + std::function<bool(Instruction &)> IsMaterializable); + +} // namespace coro + +} // namespace llvm + +#endif // LIB_TRANSFORMS_COROUTINES_MATERIALIZATIONUTILS_H diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp index d71b0a3..f213ac1 100644 --- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp @@ -23,10 +23,6 @@ namespace { typedef SmallPtrSet<BasicBlock *, 8> VisitedBlocksSet; -static bool isSuspendBlock(BasicBlock *BB) { - return isa<AnyCoroSuspendInst>(BB->front()); -} - // Check for structural coroutine intrinsics that should not be spilled into // the coroutine frame. static bool isCoroutineStructureIntrinsic(Instruction &I) { @@ -45,7 +41,7 @@ static bool isSuspendReachableFrom(BasicBlock *From, return false; // We assume that we'll already have split suspends into their own blocks. - if (isSuspendBlock(From)) + if (coro::isSuspendBlock(From)) return true; // Recurse on the successors. diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.h b/llvm/lib/Transforms/Coroutines/SpillUtils.h index de0ff0b..8843b61 100644 --- a/llvm/lib/Transforms/Coroutines/SpillUtils.h +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.h @@ -29,8 +29,6 @@ struct AllocaInfo { MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {} }; -bool isSuspendBlock(BasicBlock *BB); - void collectSpillsFromArgs(SpillInfo &Spills, Function &F, const SuspendCrossingInfo &Checker); void collectSpillsAndAllocasFromInsts( diff --git a/llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp b/llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp index 6b0dc12..84699e6 100644 --- a/llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp +++ b/llvm/lib/Transforms/Coroutines/SuspendCrossingInfo.cpp @@ -165,8 +165,13 @@ SuspendCrossingInfo::SuspendCrossingInfo( // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as // the code beyond coro.end is reachable during initial invocation of the // coroutine. - for (auto *CE : CoroEnds) + for (auto *CE : CoroEnds) { + // Verify CoroEnd was normalized + assert(CE->getParent()->getFirstInsertionPt() == CE->getIterator() && + CE->getParent()->size() <= 2 && "CoroEnd must be in its own BB"); + getBlockData(CE->getParent()).End = true; + } // Mark all suspend blocks and indicate that they kill everything they // consume. Note, that crossing coro.save also requires a spill, as any code @@ -179,6 +184,11 @@ SuspendCrossingInfo::SuspendCrossingInfo( B.Kills |= B.Consumes; }; for (auto *CSI : CoroSuspends) { + // Verify CoroSuspend was normalized + assert(CSI->getParent()->getFirstInsertionPt() == CSI->getIterator() && + CSI->getParent()->size() <= 2 && + "CoroSuspend must be in its own BB"); + markSuspendBlock(CSI); if (auto *Save = CSI->getCoroSave()) markSuspendBlock(Save); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b73f526..217c7cc 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12562,7 +12562,7 @@ struct AAAddressSpaceImpl : public AAAddressSpace { AAAddressSpaceImpl(const IRPosition &IRP, Attributor &A) : AAAddressSpace(IRP, A) {} - int32_t getAddressSpace() const override { + uint32_t getAddressSpace() const override { assert(isValidState() && "the AA is invalid"); return AssumedAddressSpace; } @@ -12576,7 +12576,7 @@ struct AAAddressSpaceImpl : public AAAddressSpace { } ChangeStatus updateImpl(Attributor &A) override { - int32_t OldAddressSpace = AssumedAddressSpace; + uint32_t OldAddressSpace = AssumedAddressSpace; auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this, DepClassTy::REQUIRED); auto Pred = [&](Value &Obj) { @@ -12597,16 +12597,13 @@ struct AAAddressSpaceImpl : public AAAddressSpace { Value *AssociatedValue = &getAssociatedValue(); Value *OriginalValue = peelAddrspacecast(AssociatedValue); if (getAddressSpace() == NoAddressSpace || - static_cast<uint32_t>(getAddressSpace()) == - getAssociatedType()->getPointerAddressSpace()) + getAddressSpace() == getAssociatedType()->getPointerAddressSpace()) return ChangeStatus::UNCHANGED; PointerType *NewPtrTy = - PointerType::get(getAssociatedType()->getContext(), - static_cast<uint32_t>(getAddressSpace())); + PointerType::get(getAssociatedType()->getContext(), getAddressSpace()); bool UseOriginalValue = - OriginalValue->getType()->getPointerAddressSpace() == - static_cast<uint32_t>(getAddressSpace()); + OriginalValue->getType()->getPointerAddressSpace() == getAddressSpace(); bool Changed = false; @@ -12656,9 +12653,9 @@ struct AAAddressSpaceImpl : public AAAddressSpace { } private: - int32_t AssumedAddressSpace = NoAddressSpace; + uint32_t AssumedAddressSpace = NoAddressSpace; - bool takeAddressSpace(int32_t AS) { + bool takeAddressSpace(uint32_t AS) { if (AssumedAddressSpace == NoAddressSpace) { AssumedAddressSpace = AS; return true; diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index ff0d7817..261731f 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -722,7 +722,7 @@ class WorkloadImportsManager : public ModuleImportsManager { return; } const auto &CtxMap = *Ctx; - DenseSet<GlobalValue::GUID> ContainedGUIDs; + SetVector<GlobalValue::GUID> ContainedGUIDs; for (const auto &[RootGuid, Root] : CtxMap) { // Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for // subsequent roots, but clear its contents. diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index d816656..1d67773 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -378,8 +378,8 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // after MemInsertPoint. MemoryUseOrDef *MemInsertPoint = nullptr; for (++BI; !BI->isTerminator(); ++BI) { - auto *CurrentAcc = cast_or_null<MemoryUseOrDef>( - MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); + auto *CurrentAcc = + cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI)); if (CurrentAcc) MemInsertPoint = CurrentAcc; @@ -605,13 +605,13 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { // TODO: Simplify this once P will be determined by MSSA, in which case the // discrepancy can no longer occur. MemoryUseOrDef *MemInsertPoint = nullptr; - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { + if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) { MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator()); } else { const Instruction *ConstP = P; for (const Instruction &I : make_range(++ConstP->getReverseIterator(), ++LI->getReverseIterator())) { - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) { MemInsertPoint = MA; break; } @@ -623,7 +623,7 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); I->moveBefore(P); assert(MemInsertPoint && "Must have found insert point"); - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { + if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) { MSSAU->moveAfter(MA, MemInsertPoint); MemInsertPoint = MA; } @@ -661,18 +661,11 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, } } - // We found an instruction that may write to the loaded memory. - // We can try to promote at this position instead of the store + // If we found an instruction that may write to the loaded memory, + // we can try to promote at this position instead of the store // position if nothing aliases the store memory after this and the store // destination is not in the range. - if (P && P != SI) { - if (!moveUp(SI, P, LI)) - P = nullptr; - } - - // If a valid insertion position is found, then we can promote - // the load/store pair to a memcpy. - if (P) { + if (P == SI || moveUp(SI, P, LI)) { // If we load from memory that may alias the memory we store to, // memmove must be used to preserve semantic. If not, memcpy can // be used. Also, if we load from constant memory, memcpy can be used @@ -697,8 +690,7 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI)); auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef); MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); @@ -1258,8 +1250,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, CopySourceAlign, M->getLength(), M->isVolatile()); NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID); - assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M))); - auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)); + assert(isa<MemoryDef>(MSSA->getMemoryAccess(M))); + auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M)); auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef); MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); @@ -1369,12 +1361,11 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize), MemSet->getOperand(1), MemsetLen, Alignment); - assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && + assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) && "MemCpy must be a MemoryDef"); // The new memset is inserted before the memcpy, and it is known that the // memcpy's defining access is the memset about to be removed. - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy)); auto *NewAccess = MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef); MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); @@ -1479,8 +1470,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, Instruction *NewM = Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize, MemCpy->getDestAlign()); - auto *LastDef = - cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy)); auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef); MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index 7aeee1d..fa6e671 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -21,6 +21,7 @@ using namespace llvm; void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeConstantHoistingLegacyPassPass(Registry); initializeDCELegacyPassPass(Registry); + initializeScalarizerLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 2bed348..01d2433 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/Local.h" @@ -340,8 +341,33 @@ private: const unsigned ScalarizeMinBits; }; +class ScalarizerLegacyPass : public FunctionPass { +public: + static char ID; + ScalarizerPassOptions Options; + ScalarizerLegacyPass() : FunctionPass(ID), Options() {} + ScalarizerLegacyPass(const ScalarizerPassOptions &Options); + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + } // end anonymous namespace +ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options) + : FunctionPass(ID), Options(Options) {} + +void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); +} + +char ScalarizerLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", + "Scalarize vector operations", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer", + "Scalarize vector operations", false, false) + Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, const VectorSplit &VS, ValueVector *cachePtr) : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) { @@ -414,6 +440,19 @@ Value *Scatterer::operator[](unsigned Frag) { return CV[Frag]; } +bool ScalarizerLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + ScalarizerVisitor Impl(DT, Options); + return Impl.visit(F); +} + +FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) { + return new ScalarizerLegacyPass(Options); +} + bool ScalarizerVisitor::visit(Function &F) { assert(Gathered.empty() && Scattered.empty()); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index b5f87e4..322b431 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -69,6 +69,9 @@ public: VPBuilder() = default; VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); } VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); } + VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) { + setInsertPoint(TheBB, IP); + } /// Clear the insertion point: created instructions will not be inserted into /// a block. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3b6b154..7b6cec9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -585,11 +585,6 @@ protected: const SCEV2ValueTy &ExpandedSCEVs, std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr}); - /// Complete the loop skeleton by adding debug MDs, creating appropriate - /// conditional branches in the middle block, preparing the builder and - /// running the verifier. Return the preheader of the completed vector loop. - BasicBlock *completeLoopSkeleton(); - /// Allow subclasses to override and print debug traces before/after vplan /// execution, when trace information is requested. virtual void printDebugTracesAtStart(){}; @@ -8696,14 +8691,7 @@ addUsersInExitBlock(VPlan &Plan, cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); BasicBlock *ExitBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); - // TODO: set B to MiddleVPBB->getFirstNonPhi(), taking care of affected tests. - VPBuilder B(MiddleVPBB); - if (auto *Terminator = MiddleVPBB->getTerminator()) { - auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0)); - assert((!Condition || Condition->getParent() == MiddleVPBB) && - "Condition expected in MiddleVPBB"); - B.setInsertPoint(Condition ? Condition : Terminator); - } + VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); // Introduce VPUsers modeling the exit values. for (const auto &[ExitPhi, V] : ExitingValuesToFix) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 00d9f29..ac59ed3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -12486,11 +12486,12 @@ public: V = createShuffle(InVectors.front(), nullptr, CommonMask); transformMaskAfterShuffle(CommonMask, CommonMask); } + unsigned VF = std::max(CommonMask.size(), Mask.size()); for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx) if (CommonMask[Idx] == PoisonMaskElem && Mask[Idx] != PoisonMaskElem) CommonMask[Idx] = V->getType() != V1->getType() - ? Idx + Sz + ? Idx + VF : Mask[Idx] + cast<FixedVectorType>(V1->getType()) ->getNumElements(); if (V->getType() != V1->getType()) diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll index a68c21f..58cb8c2 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll @@ -7,8 +7,11 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @strict_fp_reductions() { ; CHECK-LABEL: 'strict_fp_reductions' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) @@ -18,8 +21,11 @@ define void @strict_fp_reductions() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP16-LABEL: 'strict_fp_reductions' +; FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) @@ -29,8 +35,11 @@ define void @strict_fp_reductions() { ; FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BF16-LABEL: 'strict_fp_reductions' +; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) @@ -39,8 +48,11 @@ define void @strict_fp_reductions() { ; BF16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) + %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) @@ -54,12 +66,18 @@ define void @strict_fp_reductions() { define void @fast_fp_reductions() { ; CHECK-LABEL: 'fast_fp_reductions' +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) @@ -77,12 +95,18 @@ define void @fast_fp_reductions() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP16-LABEL: 'fast_fp_reductions' +; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) @@ -100,12 +124,18 @@ define void @fast_fp_reductions() { ; FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BF16-LABEL: 'fast_fp_reductions' +; BF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) @@ -122,15 +152,24 @@ define void @fast_fp_reductions() { ; BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %fadd_v2f16_fast = call fast half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) + %fadd_v2f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) + %fadd_v4f16_fast = call fast half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) %fadd_v4f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) + %fadd_v16f16 = call fast half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + %fadd_v16f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0.0, <11 x half> undef) %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0.0, <13 x half> undef) + %fadd_v2f32 = call fast float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) + %fadd_v2f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) + %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) @@ -158,11 +197,14 @@ define void @fast_fp_reductions() { declare bfloat @llvm.vector.reduce.fadd.v4f8(bfloat, <4 x bfloat>) declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>) +declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) +declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) declare half @llvm.vector.reduce.fadd.v11f16(half, <11 x half>) declare half @llvm.vector.reduce.fadd.v13f16(half, <13 x half>) +declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) declare float @llvm.vector.reduce.fadd.v13f32(float, <13 x float>) diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll new file mode 100644 index 0000000..2bd7a2e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll @@ -0,0 +1,93 @@ +; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s + +; Make sure resource table is created correctly. +; CHECK: Resources: +target triple = "dxil-unknown-shadermodel6.0-compute" + +define void @main() #0 { + + ; ByteAddressBuffer Buf : register(t8, space1) +; CHECK: - Type: SRVRaw +; CHECK: Space: 1 +; CHECK: LowerBound: 8 +; CHECK: UpperBound: 8 +; CHECK: Kind: RawBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + %srv0 = call target("dx.RawBuffer", i8, 0, 0) + @llvm.dx.handle.fromBinding.tdx.RawBuffer_i8_0_0t( + i32 1, i32 8, i32 1, i32 0, i1 false) + + ; struct S { float4 a; uint4 b; }; + ; StructuredBuffer<S> Buf : register(t2, space4) +; CHECK: - Type: SRVStructured +; CHECK: Space: 4 +; CHECK: LowerBound: 2 +; CHECK: UpperBound: 2 +; CHECK: Kind: StructuredBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + %srv1 = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0) + @llvm.dx.handle.fromBinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0t( + i32 4, i32 2, i32 1, i32 0, i1 false) + + ; Buffer<uint4> Buf[24] : register(t3, space5) +; CHECK: - Type: SRVTyped +; CHECK: Space: 5 +; CHECK: LowerBound: 3 +; CHECK: UpperBound: 26 +; CHECK: Kind: TypedBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + %srv2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0t( + i32 5, i32 3, i32 24, i32 0, i1 false) + + ; RWBuffer<int> Buf : register(u7, space2) +; CHECK: - Type: UAVTyped +; CHECK: Space: 2 +; CHECK: LowerBound: 7 +; CHECK: UpperBound: 7 +; CHECK: Kind: TypedBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + %uav0 = call target("dx.TypedBuffer", i32, 1, 0, 1) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_1_0t( + i32 2, i32 7, i32 1, i32 0, i1 false) + + ; RWBuffer<float4> Buf : register(u5, space3) +; CHECK: - Type: UAVTyped +; CHECK: Space: 3 +; CHECK: LowerBound: 5 +; CHECK: UpperBound: 5 +; CHECK: Kind: TypedBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + %uav1 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_1_0( + i32 3, i32 5, i32 1, i32 0, i1 false) + + ; RWBuffer<float4> BufferArray[10] : register(u0, space4) +; CHECK: - Type: UAVTyped +; CHECK: Space: 4 +; CHECK: LowerBound: 0 +; CHECK: UpperBound: 9 +; CHECK: Kind: TypedBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + ; RWBuffer<float4> Buf = BufferArray[0] + %uav2_1 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_1_0( + i32 4, i32 0, i32 10, i32 0, i1 false) + ; RWBuffer<float4> Buf = BufferArray[5] + %uav2_2 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.handle.fromBinding.tdx.TypedBuffer_f32_1_0( + i32 4, i32 0, i32 10, i32 5, i1 false) + ret void +} + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + +!dx.valver = !{!0} + +!0 = !{i32 1, i32 7} diff --git a/llvm/test/CodeGen/DirectX/acos.ll b/llvm/test/CodeGen/DirectX/acos.ll index cc32182..f4a10eb 100644 --- a/llvm/test/CodeGen/DirectX/acos.ll +++ b/llvm/test/CodeGen/DirectX/acos.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for acos are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @acos_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}}) %elt.acos = call float @llvm.acos.f32(float %a) ret float %elt.acos } -define noundef half @tan_half(half noundef %a) { +define noundef half @acos_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}}) %elt.acos = call half @llvm.acos.f16(half %a) ret half %elt.acos } +define noundef <4 x float> @acos_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.acos.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.acos.f16(half) declare float @llvm.acos.f32(float) +declare <4 x float> @llvm.acos.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/asin.ll b/llvm/test/CodeGen/DirectX/asin.ll index 06e3bab..bd948f59 100644 --- a/llvm/test/CodeGen/DirectX/asin.ll +++ b/llvm/test/CodeGen/DirectX/asin.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for asin are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @asin_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}}) %elt.asin = call float @llvm.asin.f32(float %a) ret float %elt.asin } -define noundef half @tan_half(half noundef %a) { +define noundef half @asin_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}}) %elt.asin = call half @llvm.asin.f16(half %a) ret half %elt.asin } +define noundef <4 x float> @asin_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.asin.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.asin.f16(half) declare float @llvm.asin.f32(float) +declare <4 x float> @llvm.asin.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/atan.ll b/llvm/test/CodeGen/DirectX/atan.ll index d7c4cd0..58899ab 100644 --- a/llvm/test/CodeGen/DirectX/atan.ll +++ b/llvm/test/CodeGen/DirectX/atan.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for atan are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @atan_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}}) %elt.atan = call float @llvm.atan.f32(float %a) ret float %elt.atan } -define noundef half @tan_half(half noundef %a) { +define noundef half @atan_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}}) %elt.atan = call half @llvm.atan.f16(half %a) ret half %elt.atan } +define noundef <4 x float> @atan_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.atan.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.atan.f16(half) declare float @llvm.atan.f32(float) +declare <4 x float> @llvm.atan.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/ceil.ll b/llvm/test/CodeGen/DirectX/ceil.ll index 48bc549..bd6e747 100644 --- a/llvm/test/CodeGen/DirectX/ceil.ll +++ b/llvm/test/CodeGen/DirectX/ceil.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for ceil are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.ceil } +define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.ceil.f16(half) declare float @llvm.ceil.f32(float) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/cos.ll b/llvm/test/CodeGen/DirectX/cos.ll index 72f4bfc..85f5db2 100644 --- a/llvm/test/CodeGen/DirectX/cos.ll +++ b/llvm/test/CodeGen/DirectX/cos.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for cos are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.cos } +define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.cos.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.cos.f16(half) declare float @llvm.cos.f32(float) +declare <4 x float> @llvm.cos.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/cosh.ll b/llvm/test/CodeGen/DirectX/cosh.ll index 91aaf89..670a8a3 100644 --- a/llvm/test/CodeGen/DirectX/cosh.ll +++ b/llvm/test/CodeGen/DirectX/cosh.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for cosh are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @cosh_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}}) %elt.cosh = call float @llvm.cosh.f32(float %a) ret float %elt.cosh } -define noundef half @tan_half(half noundef %a) { +define noundef half @cosh_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}}) %elt.cosh = call half @llvm.cosh.f16(half %a) ret half %elt.cosh } +define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.cosh.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.cosh.f16(half) declare float @llvm.cosh.f32(float) +declare <4 x float> @llvm.cosh.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/exp2.ll b/llvm/test/CodeGen/DirectX/exp2.ll index b70b87d..6d16af6 100644 --- a/llvm/test/CodeGen/DirectX/exp2.ll +++ b/llvm/test/CodeGen/DirectX/exp2.ll @@ -1,31 +1,39 @@ -; RUN: opt -S -dxil-op-lower < %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.7-library %s | FileCheck %s ; Make sure dxil operation function calls for exp2 are generated for float and half. -; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}}) -; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}}) -target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" -target triple = "dxil-pc-shadermodel6.7-library" - -; Function Attrs: noinline nounwind optnone -define noundef float @exp2_float(float noundef %a) #0 { +define noundef float @exp2_float(float noundef %a) { entry: - %a.addr = alloca float, align 4 - store float %a, ptr %a.addr, align 4 - %0 = load float, ptr %a.addr, align 4 - %elt.exp2 = call float @llvm.exp2.f32(float %0) + ; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}}) + %elt.exp2 = call float @llvm.exp2.f32(float %a) ret float %elt.exp2 } -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare float @llvm.exp2.f32(float) #1 - -; Function Attrs: noinline nounwind optnone -define noundef half @exp2_half(half noundef %a) #0 { +define noundef half @exp2_half(half noundef %a) { entry: - %a.addr = alloca half, align 2 - store half %a, ptr %a.addr, align 2 - %0 = load half, ptr %a.addr, align 2 - %elt.exp2 = call half @llvm.exp2.f16(half %0) + ; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}}) + %elt.exp2 = call half @llvm.exp2.f16(half %a) ret half %elt.exp2 } + +define noundef <4 x float> @exp2_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + +declare float @llvm.exp2.f32(float) +declare half @llvm.exp2.f16(half) +declare <4 x float> @llvm.exp2.v4f32(<4 x float> %a) diff --git a/llvm/test/CodeGen/DirectX/fabs.ll b/llvm/test/CodeGen/DirectX/fabs.ll index becbdf8..6d903f1 100644 --- a/llvm/test/CodeGen/DirectX/fabs.ll +++ b/llvm/test/CodeGen/DirectX/fabs.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for abs are generated for float, half, and double. @@ -27,6 +27,26 @@ entry: ret double %elt.abs } +; CHECK-LABEL: fabs_float4 +define noundef <4 x float> @fabs_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.fabs.f16(half) declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/floor.ll b/llvm/test/CodeGen/DirectX/floor.ll index f79f160..eaab398 100644 --- a/llvm/test/CodeGen/DirectX/floor.ll +++ b/llvm/test/CodeGen/DirectX/floor.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -passes=dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for floor are generated for float and half. -define noundef float @floor_float(float noundef %a) #0 { +define noundef float @floor_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 27, float %{{.*}}) %elt.floor = call float @llvm.floor.f32(float %a) ret float %elt.floor } -define noundef half @floor_half(half noundef %a) #0 { +define noundef half @floor_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 27, half %{{.*}}) %elt.floor = call half @llvm.floor.f16(half %a) ret half %elt.floor } +define noundef <4 x float> @floor_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 27, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.floor.f16(half) declare float @llvm.floor.f32(float) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll index 295776b..03a00c4 100644 --- a/llvm/test/CodeGen/DirectX/isinf.ll +++ b/llvm/test/CodeGen/DirectX/isinf.ll @@ -1,25 +1,21 @@ ; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for isinf are generated for float and half. -; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}}) -; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) -; Function Attrs: noinline nounwind optnone -define noundef i1 @isinf_float(float noundef %a) #0 { +define noundef i1 @isinf_float(float noundef %a) { entry: - %a.addr = alloca float, align 4 - store float %a, ptr %a.addr, align 4 - %0 = load float, ptr %a.addr, align 4 - %dx.isinf = call i1 @llvm.dx.isinf.f32(float %0) + ; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}}) + %dx.isinf = call i1 @llvm.dx.isinf.f32(float %a) ret i1 %dx.isinf } -; Function Attrs: noinline nounwind optnone -define noundef i1 @isinf_half(half noundef %p0) #0 { +define noundef i1 @isinf_half(half noundef %a) { entry: - %p0.addr = alloca half, align 2 - store half %p0, ptr %p0.addr, align 2 - %0 = load half, ptr %p0.addr, align 2 - %dx.isinf = call i1 @llvm.dx.isinf.f16(half %0) + ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) + %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a) ret i1 %dx.isinf } + + +declare i1 @llvm.dx.isinf.f16(half) +declare i1 @llvm.dx.isinf.f32(float) diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll new file mode 100644 index 0000000..52bd891 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | FileCheck %s + +; REQUIRES: asserts + +; CHECK-LABEL: Pass Arguments: +; CHECK-NEXT: Target Library Information +; CHECK-NEXT: ModulePass Manager +; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Scalarize vector operations +; CHECK-NEXT: DXIL Intrinsic Expansion +; CHECK-NEXT: DXIL Resource analysis +; CHECK-NEXT: DXIL Op Lowering +; CHECK-NEXT: DXIL Finalize Linkage +; CHECK-NEXT: DXIL resource Information +; CHECK-NEXT: DXIL Shader Flag Analysis +; CHECK-NEXT: DXIL Module Metadata analysis +; CHECK-NEXT: DXIL Translate Metadata +; CHECK-NEXT: DXIL Prepare Module +; CHECK-NEXT: DXIL Metadata Pretty Printer +; CHECK-NEXT: Print Module IR + diff --git a/llvm/test/CodeGen/DirectX/reversebits.ll b/llvm/test/CodeGen/DirectX/reversebits.ll index 1ade57b..b5530d0 100644 --- a/llvm/test/CodeGen/DirectX/reversebits.ll +++ b/llvm/test/CodeGen/DirectX/reversebits.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for reversebits are generated for all integer types. @@ -26,6 +26,25 @@ entry: ret i64 %elt.bitreverse } +define noundef <4 x i32> @round_int324(<4 x i32> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee3]]) + ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 + ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3 + %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) + ret <4 x i32> %2 +} + declare i16 @llvm.bitreverse.i16(i16) declare i32 @llvm.bitreverse.i32(i32) declare i64 @llvm.bitreverse.i64(i64) +declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/DirectX/round.ll b/llvm/test/CodeGen/DirectX/round.ll index db953fb..b08cbac 100644 --- a/llvm/test/CodeGen/DirectX/round.ll +++ b/llvm/test/CodeGen/DirectX/round.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for round are generated for float and half. @@ -18,5 +18,25 @@ entry: ret float %elt.roundeven } +define noundef <4 x float> @round_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + + declare half @llvm.roundeven.f16(half) declare float @llvm.roundeven.f32(float) +declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll index a855735..404cab7 100644 --- a/llvm/test/CodeGen/DirectX/saturate.ll +++ b/llvm/test/CodeGen/DirectX/saturate.ll @@ -2,7 +2,7 @@ ; Make sure the intrinsic dx.saturate is to appropriate DXIL op for half/float/double data types. ; CHECK-LABEL: test_saturate_half -define noundef half @test_saturate_half(half noundef %p0) #0 { +define noundef half @test_saturate_half(half noundef %p0) { entry: ; CHECK: call half @dx.op.unary.f16(i32 7, half %p0) %hlsl.saturate = call half @llvm.dx.saturate.f16(half %p0) @@ -10,11 +10,8 @@ entry: ret half %hlsl.saturate } -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare half @llvm.dx.saturate.f16(half) #1 - ; CHECK-LABEL: test_saturate_float -define noundef float @test_saturate_float(float noundef %p0) #0 { +define noundef float @test_saturate_float(float noundef %p0) { entry: ; CHECK: call float @dx.op.unary.f32(i32 7, float %p0) %hlsl.saturate = call float @llvm.dx.saturate.f32(float %p0) @@ -22,11 +19,8 @@ entry: ret float %hlsl.saturate } -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare float @llvm.dx.saturate.f32(float) #1 - ; CHECK-LABEL: test_saturate_double -define noundef double @test_saturate_double(double noundef %p0) #0 { +define noundef double @test_saturate_double(double noundef %p0) { entry: ; CHECK: call double @dx.op.unary.f64(i32 7, double %p0) %hlsl.saturate = call double @llvm.dx.saturate.f64(double %p0) @@ -34,6 +28,7 @@ entry: ret double %hlsl.saturate } -; Function Attrs: nocallback nofree nosync nounwind willreturn -declare double @llvm.dx.saturate.f64(double) #1 +declare half @llvm.dx.saturate.f16(half) +declare float @llvm.dx.saturate.f32(float) +declare double @llvm.dx.saturate.f64(double) diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll new file mode 100644 index 0000000..b970a28 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalar-store.ll @@ -0,0 +1,17 @@ +; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +@"sharedData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +; CHECK-LABEL: store_test +define void @store_test () local_unnamed_addr { + ; CHECK: store float 1.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 3.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 4.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + ; CHECK: store float 6.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}} + + store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"sharedData", align 16 + store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"sharedData", i32 16), align 16 + ret void + } diff --git a/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll new file mode 100644 index 0000000..a14c1de --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll @@ -0,0 +1,25 @@ +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +; CHECK: target triple = "dxilv1.3-pc-shadermodel6.3-library" +; CHECK-LABEL: cos_sin_float_test +define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) { + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]]) + ; CHECK: [[ie4:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie0]]) + ; CHECK: [[ie5:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie1]]) + ; CHECK: [[ie6:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie2]]) + ; CHECK: [[ie7:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie4]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie5]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie6]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie7]], i64 3 + %2 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %a) + %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2) + ret <4 x float> %3 +} diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll index f309a36..ac6b217 100644 --- a/llvm/test/CodeGen/DirectX/sin.ll +++ b/llvm/test/CodeGen/DirectX/sin.ll @@ -1,25 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for sin are generated for float and half. -; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}}) -; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}}) -; Function Attrs: noinline nounwind optnone -define noundef float @sin_float(float noundef %a) #0 { +define noundef float @sin_float(float noundef %a) { entry: - %a.addr = alloca float, align 4 - store float %a, ptr %a.addr, align 4 - %0 = load float, ptr %a.addr, align 4 - %1 = call float @llvm.sin.f32(float %0) + ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}}) + %1 = call float @llvm.sin.f32(float %a) ret float %1 } -; Function Attrs: noinline nounwind optnone -define noundef half @sin_half(half noundef %a) #0 { +define noundef half @sin_half(half noundef %a) { entry: - %a.addr = alloca half, align 2 - store half %a, ptr %a.addr, align 2 - %0 = load half, ptr %a.addr, align 2 - %1 = call half @llvm.sin.f16(half %0) + ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}}) + %1 = call half @llvm.sin.f16(half %a) ret half %1 } + +define noundef <4 x float> @sin_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.sin.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + +declare half @llvm.sin.f16(half) +declare float @llvm.sin.f32(float) +declare <4 x float> @llvm.sin.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/sinh.ll b/llvm/test/CodeGen/DirectX/sinh.ll index d4d3eda..deba726e 100644 --- a/llvm/test/CodeGen/DirectX/sinh.ll +++ b/llvm/test/CodeGen/DirectX/sinh.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for sinh are generated for float and half. -define noundef float @tan_float(float noundef %a) { +define noundef float @sinh_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 19, float %{{.*}}) %elt.sinh = call float @llvm.sinh.f32(float %a) ret float %elt.sinh } -define noundef half @tan_half(half noundef %a) { +define noundef half @sinh_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 19, half %{{.*}}) %elt.sinh = call half @llvm.sinh.f16(half %a) ret half %elt.sinh } +define noundef <4 x float> @sinh_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.sinh.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.sinh.f16(half) declare float @llvm.sinh.f32(float) +declare <4 x float> @llvm.sinh.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/sqrt.ll b/llvm/test/CodeGen/DirectX/sqrt.ll index 792fbc8..e2955b4 100644 --- a/llvm/test/CodeGen/DirectX/sqrt.ll +++ b/llvm/test/CodeGen/DirectX/sqrt.ll @@ -1,20 +1,39 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for sqrt are generated for float and half. -define noundef float @sqrt_float(float noundef %a) #0 { +define noundef float @sqrt_float(float noundef %a) { entry: ; CHECK:call float @dx.op.unary.f32(i32 24, float %{{.*}}) %elt.sqrt = call float @llvm.sqrt.f32(float %a) ret float %elt.sqrt } -define noundef half @sqrt_half(half noundef %a) #0 { +define noundef half @sqrt_half(half noundef %a) { entry: ; CHECK:call half @dx.op.unary.f16(i32 24, half %{{.*}}) %elt.sqrt = call half @llvm.sqrt.f16(half %a) ret half %elt.sqrt } +define noundef <4 x float> @sqrt_float4(<4 x float> noundef %a) { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.sqrt.f16(half) declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/step.ll b/llvm/test/CodeGen/DirectX/step.ll new file mode 100644 index 0000000..0393c15 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/step.ll @@ -0,0 +1,78 @@ +; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+
+; Make sure dxil operation function calls for step are generated for half/float.
+
+declare half @llvm.dx.step.f16(half, half)
+declare <2 x half> @llvm.dx.step.v2f16(<2 x half>, <2 x half>)
+declare <3 x half> @llvm.dx.step.v3f16(<3 x half>, <3 x half>)
+declare <4 x half> @llvm.dx.step.v4f16(<4 x half>, <4 x half>)
+
+declare float @llvm.dx.step.f32(float, float)
+declare <2 x float> @llvm.dx.step.v2f32(<2 x float>, <2 x float>)
+declare <3 x float> @llvm.dx.step.v3f32(<3 x float>, <3 x float>)
+declare <4 x float> @llvm.dx.step.v4f32(<4 x float>, <4 x float>)
+
+define noundef half @test_step_half(half noundef %p0, half noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt half %p1, %p0
+ ; CHECK: %1 = select i1 %0, half 0xH0000, half 0xH3C00
+ %hlsl.step = call half @llvm.dx.step.f16(half %p0, half %p1)
+ ret half %hlsl.step
+}
+
+define noundef <2 x half> @test_step_half2(<2 x half> noundef %p0, <2 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <2 x half> %p1, %p0
+ ; CHECK: %1 = select <2 x i1> %0, <2 x half> zeroinitializer, <2 x half> <half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <2 x half> @llvm.dx.step.v2f16(<2 x half> %p0, <2 x half> %p1)
+ ret <2 x half> %hlsl.step
+}
+
+define noundef <3 x half> @test_step_half3(<3 x half> noundef %p0, <3 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <3 x half> %p1, %p0
+ ; CHECK: %1 = select <3 x i1> %0, <3 x half> zeroinitializer, <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <3 x half> @llvm.dx.step.v3f16(<3 x half> %p0, <3 x half> %p1)
+ ret <3 x half> %hlsl.step
+}
+
+define noundef <4 x half> @test_step_half4(<4 x half> noundef %p0, <4 x half> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <4 x half> %p1, %p0
+ ; CHECK: %1 = select <4 x i1> %0, <4 x half> zeroinitializer, <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>
+ %hlsl.step = call <4 x half> @llvm.dx.step.v4f16(<4 x half> %p0, <4 x half> %p1)
+ ret <4 x half> %hlsl.step
+}
+
+define noundef float @test_step_float(float noundef %p0, float noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt float %p1, %p0
+ ; CHECK: %1 = select i1 %0, float 0.000000e+00, float 1.000000e+00
+ %hlsl.step = call float @llvm.dx.step.f32(float %p0, float %p1)
+ ret float %hlsl.step
+}
+
+define noundef <2 x float> @test_step_float2(<2 x float> noundef %p0, <2 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <2 x float> %p1, %p0
+ ; CHECK: %1 = select <2 x i1> %0, <2 x float> zeroinitializer, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+ %hlsl.step = call <2 x float> @llvm.dx.step.v2f32(<2 x float> %p0, <2 x float> %p1)
+ ret <2 x float> %hlsl.step
+}
+
+define noundef <3 x float> @test_step_float3(<3 x float> noundef %p0, <3 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <3 x float> %p1, %p0
+ ; CHECK: %1 = select <3 x i1> %0, <3 x float> zeroinitializer, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %hlsl.step = call <3 x float> @llvm.dx.step.v3f32(<3 x float> %p0, <3 x float> %p1)
+ ret <3 x float> %hlsl.step
+}
+
+define noundef <4 x float> @test_step_float4(<4 x float> noundef %p0, <4 x float> noundef %p1) {
+entry:
+ ; CHECK: %0 = fcmp olt <4 x float> %p1, %p0
+ ; CHECK: %1 = select <4 x i1> %0, <4 x float> zeroinitializer, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %hlsl.step = call <4 x float> @llvm.dx.step.v4f32(<4 x float> %p0, <4 x float> %p1)
+ ret <4 x float> %hlsl.step
+}
diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll index 6f7beb5..cf6965a 100644 --- a/llvm/test/CodeGen/DirectX/tan.ll +++ b/llvm/test/CodeGen/DirectX/tan.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for tan are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.tan } +define noundef <4 x float> @tan_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.tan.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.tan.f16(half) declare float @llvm.tan.f32(float) +declare <4 x float> @llvm.tan.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/tanh.ll b/llvm/test/CodeGen/DirectX/tanh.ll index e6642d9..54ec6f2 100644 --- a/llvm/test/CodeGen/DirectX/tanh.ll +++ b/llvm/test/CodeGen/DirectX/tanh.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for tanh are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.tanh } +define noundef <4 x float> @tanh_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.tanh.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.tanh.f16(half) declare float @llvm.tanh.f32(float) +declare <4 x float> @llvm.tanh.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/DirectX/trunc.ll b/llvm/test/CodeGen/DirectX/trunc.ll index f00b737..6d9c222 100644 --- a/llvm/test/CodeGen/DirectX/trunc.ll +++ b/llvm/test/CodeGen/DirectX/trunc.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Make sure dxil operation function calls for trunc are generated for float and half. @@ -16,5 +16,24 @@ entry: ret half %elt.trunc } +define noundef <4 x float> @trunc_float4(<4 x float> noundef %a) #0 { +entry: + ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 + ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 + ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 + ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee2]]) + ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 + ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee3]]) + ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 + ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 + %2 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) + ret <4 x float> %2 +} + declare half @llvm.trunc.f16(half) declare float @llvm.trunc.f32(float) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll index 75f920b..03965ac 100644 --- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll +++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll @@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind { define half @test_fadd_reg(half %a, half %b) nounwind { ; LA32-LABEL: test_fadd_reg: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -32 -; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) ; LA32-NEXT: fmov.s $fs0, $fa0 -; LA32-NEXT: fmov.s $fa0, $fa1 -; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fmov.s $fs1, $fa0 -; LA32-NEXT: fmov.s $fa0, $fs0 +; LA32-NEXT: fadd.s $fa0, $fa0, $fs0 ; LA32-NEXT: bl %plt(__gnu_f2h_ieee) -; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fadd.s $fa0, $fa0, $fs1 -; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: test_fadd_reg: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $sp, $sp, -32 ; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) ; LA64-NEXT: fmov.s $fs0, $fa0 -; LA64-NEXT: fmov.s $fa0, $fa1 -; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: move $a0, $fp ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fmov.s $fs1, $fa0 -; LA64-NEXT: fmov.s $fa0, $fs0 +; LA64-NEXT: fadd.s $fa0, $fa0, $fs0 ; LA64-NEXT: bl %plt(__gnu_f2h_ieee) -; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fadd.s $fa0, $fa0, $fs1 -; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 32 ; LA64-NEXT: ret @@ -177,16 +175,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind { ; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill ; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill ; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill -; LA32-NEXT: move $fp, $a1 -; LA32-NEXT: move $s0, $a0 -; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: ld.hu $s0, $a0, 0 +; LA32-NEXT: ld.hu $a0, $a1, 0 ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) ; LA32-NEXT: fmov.s $fs0, $fa0 -; LA32-NEXT: ld.hu $a0, $fp, 0 +; LA32-NEXT: move $a0, $s0 ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fadd.s $fa0, $fs0, $fa0 +; LA32-NEXT: fadd.s $fa0, $fa0, $fs0 ; LA32-NEXT: bl %plt(__gnu_f2h_ieee) -; LA32-NEXT: st.h $a0, $s0, 0 +; LA32-NEXT: st.h $a0, $fp, 0 ; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload @@ -201,16 +199,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind { ; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill -; LA64-NEXT: move $fp, $a1 -; LA64-NEXT: move $s0, $a0 -; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: ld.hu $s0, $a0, 0 +; LA64-NEXT: ld.hu $a0, $a1, 0 ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) ; LA64-NEXT: fmov.s $fs0, $fa0 -; LA64-NEXT: ld.hu $a0, $fp, 0 +; LA64-NEXT: move $a0, $s0 ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fadd.s $fa0, $fs0, $fa0 +; LA64-NEXT: fadd.s $fa0, $fa0, $fs0 ; LA64-NEXT: bl %plt(__gnu_f2h_ieee) -; LA64-NEXT: st.h $a0, $s0, 0 +; LA64-NEXT: st.h $a0, $fp, 0 ; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload @@ -227,42 +225,40 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind { define half @test_fmul_reg(half %a, half %b) nounwind { ; LA32-LABEL: test_fmul_reg: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -32 -; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) ; LA32-NEXT: fmov.s $fs0, $fa0 -; LA32-NEXT: fmov.s $fa0, $fa1 -; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fmov.s $fs1, $fa0 -; LA32-NEXT: fmov.s $fa0, $fs0 +; LA32-NEXT: fmul.s $fa0, $fa0, $fs0 ; LA32-NEXT: bl %plt(__gnu_f2h_ieee) -; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fmul.s $fa0, $fa0, $fs1 -; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: test_fmul_reg: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $sp, $sp, -32 ; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) ; LA64-NEXT: fmov.s $fs0, $fa0 -; LA64-NEXT: fmov.s $fa0, $fa1 -; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: move $a0, $fp ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fmov.s $fs1, $fa0 -; LA64-NEXT: fmov.s $fa0, $fs0 +; LA64-NEXT: fmul.s $fa0, $fa0, $fs0 ; LA64-NEXT: bl %plt(__gnu_f2h_ieee) -; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fmul.s $fa0, $fa0, $fs1 -; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 32 ; LA64-NEXT: ret @@ -278,16 +274,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind { ; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill ; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill ; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill -; LA32-NEXT: move $fp, $a1 -; LA32-NEXT: move $s0, $a0 -; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: ld.hu $s0, $a0, 0 +; LA32-NEXT: ld.hu $a0, $a1, 0 ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) ; LA32-NEXT: fmov.s $fs0, $fa0 -; LA32-NEXT: ld.hu $a0, $fp, 0 +; LA32-NEXT: move $a0, $s0 ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fmul.s $fa0, $fs0, $fa0 +; LA32-NEXT: fmul.s $fa0, $fa0, $fs0 ; LA32-NEXT: bl %plt(__gnu_f2h_ieee) -; LA32-NEXT: st.h $a0, $s0, 0 +; LA32-NEXT: st.h $a0, $fp, 0 ; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload @@ -302,16 +298,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind { ; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill -; LA64-NEXT: move $fp, $a1 -; LA64-NEXT: move $s0, $a0 -; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: ld.hu $s0, $a0, 0 +; LA64-NEXT: ld.hu $a0, $a1, 0 ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) ; LA64-NEXT: fmov.s $fs0, $fa0 -; LA64-NEXT: ld.hu $a0, $fp, 0 +; LA64-NEXT: move $a0, $s0 ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fmul.s $fa0, $fs0, $fa0 +; LA64-NEXT: fmul.s $fa0, $fa0, $fs0 ; LA64-NEXT: bl %plt(__gnu_f2h_ieee) -; LA64-NEXT: st.h $a0, $s0, 0 +; LA64-NEXT: st.h $a0, $fp, 0 ; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload @@ -324,3 +320,61 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind { store half %r, ptr %p ret void } + +define half @freeze_half_undef() nounwind { +; LA32-LABEL: freeze_half_undef: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: movgr2fr.w $fa0, $zero +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: freeze_half_undef: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: movgr2fr.w $fa0, $zero +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %y1 = freeze half undef + %t1 = fadd half %y1, %y1 + ret half %t1 +} + +define half @freeze_half_poison(half %maybe.poison) nounwind { +; LA32-LABEL: freeze_half_poison: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: freeze_half_poison: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %y1 = freeze half %maybe.poison + %t1 = fadd half %y1, %y1 + ret half %t1 +} diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll index b1c34c8..400184a 100644 --- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll +++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s +; RUN: opt < %s -S -nvptx-lower-alloca | FileCheck %s --check-prefix LOWERALLOCAONLY ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %} @@ -11,13 +12,32 @@ define void @kernel() { %A = alloca i32 ; CHECK: addrspacecast ptr %A to ptr addrspace(5) ; CHECK: store i32 0, ptr addrspace(5) {{%.+}} +; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr %A to ptr addrspace(5) +; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) [[V1]] to ptr +; LOWERALLOCAONLY: store i32 0, ptr [[V2]], align 4 ; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} store i32 0, ptr %A call void @callee(ptr %A) ret void } +define void @alloca_in_explicit_local_as() { +; LABEL: @lower_alloca_addrspace5 +; PTX-LABEL: .visible .func alloca_in_explicit_local_as( + %A = alloca i32, addrspace(5) +; CHECK: store i32 0, ptr addrspace(5) {{%.+}} +; PTX: st.local.u32 [%SP+0], {{%r[0-9]+}} +; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr +; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4 + store i32 0, ptr addrspace(5) %A + call void @callee(ptr addrspace(5) %A) + ret void +} + declare void @callee(ptr) +declare void @callee_addrspace5(ptr addrspace(5)) !nvvm.annotations = !{!0} +!nvvm.annotations = !{!1} !0 = !{ptr @kernel, !"kernel", i32 1} +!1 = !{ptr @alloca_in_explicit_local_as, !"alloca_in_explicit_local_as", i32 1} diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll index e864d8f..949668f 100644 --- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll @@ -68,18 +68,18 @@ define i32 @fcmp_ogt(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_ogt: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a5, fflags +; RV32IZFINXZDINX-NEXT: frflags a5 ; RV32IZFINXZDINX-NEXT: flt.d a4, a2, a0 -; RV32IZFINXZDINX-NEXT: csrw fflags, a5 +; RV32IZFINXZDINX-NEXT: fsflags a5 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 ; RV32IZFINXZDINX-NEXT: mv a0, a4 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_ogt: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a3, fflags +; RV64IZFINXZDINX-NEXT: frflags a3 ; RV64IZFINXZDINX-NEXT: flt.d a2, a1, a0 -; RV64IZFINXZDINX-NEXT: csrw fflags, a3 +; RV64IZFINXZDINX-NEXT: fsflags a3 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 ; RV64IZFINXZDINX-NEXT: mv a0, a2 ; RV64IZFINXZDINX-NEXT: ret @@ -119,18 +119,18 @@ define i32 @fcmp_oge(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_oge: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a5, fflags +; RV32IZFINXZDINX-NEXT: frflags a5 ; RV32IZFINXZDINX-NEXT: fle.d a4, a2, a0 -; RV32IZFINXZDINX-NEXT: csrw fflags, a5 +; RV32IZFINXZDINX-NEXT: fsflags a5 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 ; RV32IZFINXZDINX-NEXT: mv a0, a4 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_oge: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a3, fflags +; RV64IZFINXZDINX-NEXT: frflags a3 ; RV64IZFINXZDINX-NEXT: fle.d a2, a1, a0 -; RV64IZFINXZDINX-NEXT: csrw fflags, a3 +; RV64IZFINXZDINX-NEXT: fsflags a3 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 ; RV64IZFINXZDINX-NEXT: mv a0, a2 ; RV64IZFINXZDINX-NEXT: ret @@ -172,18 +172,18 @@ define i32 @fcmp_olt(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_olt: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a5, fflags +; RV32IZFINXZDINX-NEXT: frflags a5 ; RV32IZFINXZDINX-NEXT: flt.d a4, a0, a2 -; RV32IZFINXZDINX-NEXT: csrw fflags, a5 +; RV32IZFINXZDINX-NEXT: fsflags a5 ; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2 ; RV32IZFINXZDINX-NEXT: mv a0, a4 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_olt: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a3, fflags +; RV64IZFINXZDINX-NEXT: frflags a3 ; RV64IZFINXZDINX-NEXT: flt.d a2, a0, a1 -; RV64IZFINXZDINX-NEXT: csrw fflags, a3 +; RV64IZFINXZDINX-NEXT: fsflags a3 ; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1 ; RV64IZFINXZDINX-NEXT: mv a0, a2 ; RV64IZFINXZDINX-NEXT: ret @@ -223,18 +223,18 @@ define i32 @fcmp_ole(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_ole: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a5, fflags +; RV32IZFINXZDINX-NEXT: frflags a5 ; RV32IZFINXZDINX-NEXT: fle.d a4, a0, a2 -; RV32IZFINXZDINX-NEXT: csrw fflags, a5 +; RV32IZFINXZDINX-NEXT: fsflags a5 ; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2 ; RV32IZFINXZDINX-NEXT: mv a0, a4 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_ole: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a3, fflags +; RV64IZFINXZDINX-NEXT: frflags a3 ; RV64IZFINXZDINX-NEXT: fle.d a2, a0, a1 -; RV64IZFINXZDINX-NEXT: csrw fflags, a3 +; RV64IZFINXZDINX-NEXT: fsflags a3 ; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1 ; RV64IZFINXZDINX-NEXT: mv a0, a2 ; RV64IZFINXZDINX-NEXT: ret @@ -281,13 +281,13 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_one: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a4, fflags +; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a5, a0, a2 -; RV32IZFINXZDINX-NEXT: csrw fflags, a4 +; RV32IZFINXZDINX-NEXT: fsflags a4 ; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2 -; RV32IZFINXZDINX-NEXT: csrr a4, fflags +; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a6, a2, a0 -; RV32IZFINXZDINX-NEXT: csrw fflags, a4 +; RV32IZFINXZDINX-NEXT: fsflags a4 ; RV32IZFINXZDINX-NEXT: or a4, a6, a5 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 ; RV32IZFINXZDINX-NEXT: mv a0, a4 @@ -295,13 +295,13 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp { ; ; RV64IZFINXZDINX-LABEL: fcmp_one: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a2, fflags +; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a3, a0, a1 -; RV64IZFINXZDINX-NEXT: csrw fflags, a2 +; RV64IZFINXZDINX-NEXT: fsflags a2 ; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1 -; RV64IZFINXZDINX-NEXT: csrr a2, fflags +; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a4, a1, a0 -; RV64IZFINXZDINX-NEXT: csrw fflags, a2 +; RV64IZFINXZDINX-NEXT: fsflags a2 ; RV64IZFINXZDINX-NEXT: or a2, a4, a3 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 ; RV64IZFINXZDINX-NEXT: mv a0, a2 @@ -430,13 +430,13 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_ueq: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a4, fflags +; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a5, a0, a2 -; RV32IZFINXZDINX-NEXT: csrw fflags, a4 +; RV32IZFINXZDINX-NEXT: fsflags a4 ; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2 -; RV32IZFINXZDINX-NEXT: csrr a4, fflags +; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a6, a2, a0 -; RV32IZFINXZDINX-NEXT: csrw fflags, a4 +; RV32IZFINXZDINX-NEXT: fsflags a4 ; RV32IZFINXZDINX-NEXT: or a4, a6, a5 ; RV32IZFINXZDINX-NEXT: xori a4, a4, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 @@ -445,13 +445,13 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { ; ; RV64IZFINXZDINX-LABEL: fcmp_ueq: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a2, fflags +; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a3, a0, a1 -; RV64IZFINXZDINX-NEXT: csrw fflags, a2 +; RV64IZFINXZDINX-NEXT: fsflags a2 ; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1 -; RV64IZFINXZDINX-NEXT: csrr a2, fflags +; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a4, a1, a0 -; RV64IZFINXZDINX-NEXT: csrw fflags, a2 +; RV64IZFINXZDINX-NEXT: fsflags a2 ; RV64IZFINXZDINX-NEXT: or a3, a4, a3 ; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 @@ -528,9 +528,9 @@ define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_ugt: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a4, fflags +; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: fle.d a5, a0, a2 -; RV32IZFINXZDINX-NEXT: csrw fflags, a4 +; RV32IZFINXZDINX-NEXT: fsflags a4 ; RV32IZFINXZDINX-NEXT: xori a4, a5, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2 ; RV32IZFINXZDINX-NEXT: mv a0, a4 @@ -538,9 +538,9 @@ define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { ; ; RV64IZFINXZDINX-LABEL: fcmp_ugt: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a2, fflags +; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: fle.d a3, a0, a1 -; RV64IZFINXZDINX-NEXT: csrw fflags, a2 +; RV64IZFINXZDINX-NEXT: fsflags a2 ; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1 ; RV64IZFINXZDINX-NEXT: mv a0, a2 @@ -582,9 +582,9 @@ define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_uge: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a4, fflags +; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a5, a0, a2 -; RV32IZFINXZDINX-NEXT: csrw fflags, a4 +; RV32IZFINXZDINX-NEXT: fsflags a4 ; RV32IZFINXZDINX-NEXT: xori a4, a5, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2 ; RV32IZFINXZDINX-NEXT: mv a0, a4 @@ -592,9 +592,9 @@ define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { ; ; RV64IZFINXZDINX-LABEL: fcmp_uge: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a2, fflags +; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a3, a0, a1 -; RV64IZFINXZDINX-NEXT: csrw fflags, a2 +; RV64IZFINXZDINX-NEXT: fsflags a2 ; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1 ; RV64IZFINXZDINX-NEXT: mv a0, a2 @@ -638,9 +638,9 @@ define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_ult: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a4, fflags +; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: fle.d a5, a2, a0 -; RV32IZFINXZDINX-NEXT: csrw fflags, a4 +; RV32IZFINXZDINX-NEXT: fsflags a4 ; RV32IZFINXZDINX-NEXT: xori a4, a5, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 ; RV32IZFINXZDINX-NEXT: mv a0, a4 @@ -648,9 +648,9 @@ define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { ; ; RV64IZFINXZDINX-LABEL: fcmp_ult: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a2, fflags +; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: fle.d a3, a1, a0 -; RV64IZFINXZDINX-NEXT: csrw fflags, a2 +; RV64IZFINXZDINX-NEXT: fsflags a2 ; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 ; RV64IZFINXZDINX-NEXT: mv a0, a2 @@ -692,9 +692,9 @@ define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { ; ; RV32IZFINXZDINX-LABEL: fcmp_ule: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: csrr a4, fflags +; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a5, a2, a0 -; RV32IZFINXZDINX-NEXT: csrw fflags, a4 +; RV32IZFINXZDINX-NEXT: fsflags a4 ; RV32IZFINXZDINX-NEXT: xori a4, a5, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 ; RV32IZFINXZDINX-NEXT: mv a0, a4 @@ -702,9 +702,9 @@ define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { ; ; RV64IZFINXZDINX-LABEL: fcmp_ule: ; RV64IZFINXZDINX: # %bb.0: -; RV64IZFINXZDINX-NEXT: csrr a2, fflags +; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a3, a1, a0 -; RV64IZFINXZDINX-NEXT: csrw fflags, a2 +; RV64IZFINXZDINX-NEXT: fsflags a2 ; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 ; RV64IZFINXZDINX-NEXT: mv a0, a2 diff --git a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll index dae9f3e..0cbfc96 100644 --- a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll @@ -63,9 +63,9 @@ define i32 @fcmp_ogt(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_ogt: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a3, fflags +; CHECKIZFINX-NEXT: frflags a3 ; CHECKIZFINX-NEXT: flt.s a2, a1, a0 -; CHECKIZFINX-NEXT: csrw fflags, a3 +; CHECKIZFINX-NEXT: fsflags a3 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 ; CHECKIZFINX-NEXT: mv a0, a2 ; CHECKIZFINX-NEXT: ret @@ -105,9 +105,9 @@ define i32 @fcmp_oge(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_oge: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a3, fflags +; CHECKIZFINX-NEXT: frflags a3 ; CHECKIZFINX-NEXT: fle.s a2, a1, a0 -; CHECKIZFINX-NEXT: csrw fflags, a3 +; CHECKIZFINX-NEXT: fsflags a3 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 ; CHECKIZFINX-NEXT: mv a0, a2 ; CHECKIZFINX-NEXT: ret @@ -149,9 +149,9 @@ define i32 @fcmp_olt(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_olt: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a3, fflags +; CHECKIZFINX-NEXT: frflags a3 ; CHECKIZFINX-NEXT: flt.s a2, a0, a1 -; CHECKIZFINX-NEXT: csrw fflags, a3 +; CHECKIZFINX-NEXT: fsflags a3 ; CHECKIZFINX-NEXT: feq.s zero, a0, a1 ; CHECKIZFINX-NEXT: mv a0, a2 ; CHECKIZFINX-NEXT: ret @@ -191,9 +191,9 @@ define i32 @fcmp_ole(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_ole: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a3, fflags +; CHECKIZFINX-NEXT: frflags a3 ; CHECKIZFINX-NEXT: fle.s a2, a0, a1 -; CHECKIZFINX-NEXT: csrw fflags, a3 +; CHECKIZFINX-NEXT: fsflags a3 ; CHECKIZFINX-NEXT: feq.s zero, a0, a1 ; CHECKIZFINX-NEXT: mv a0, a2 ; CHECKIZFINX-NEXT: ret @@ -240,13 +240,13 @@ define i32 @fcmp_one(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_one: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a2, fflags +; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a3, a0, a1 -; CHECKIZFINX-NEXT: csrw fflags, a2 +; CHECKIZFINX-NEXT: fsflags a2 ; CHECKIZFINX-NEXT: feq.s zero, a0, a1 -; CHECKIZFINX-NEXT: csrr a2, fflags +; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a4, a1, a0 -; CHECKIZFINX-NEXT: csrw fflags, a2 +; CHECKIZFINX-NEXT: fsflags a2 ; CHECKIZFINX-NEXT: or a2, a4, a3 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 ; CHECKIZFINX-NEXT: mv a0, a2 @@ -360,13 +360,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_ueq: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a2, fflags +; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a3, a0, a1 -; CHECKIZFINX-NEXT: csrw fflags, a2 +; CHECKIZFINX-NEXT: fsflags a2 ; CHECKIZFINX-NEXT: feq.s zero, a0, a1 -; CHECKIZFINX-NEXT: csrr a2, fflags +; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a4, a1, a0 -; CHECKIZFINX-NEXT: csrw fflags, a2 +; CHECKIZFINX-NEXT: fsflags a2 ; CHECKIZFINX-NEXT: or a3, a4, a3 ; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 @@ -435,9 +435,9 @@ define i32 @fcmp_ugt(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_ugt: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a2, fflags +; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: fle.s a3, a0, a1 -; CHECKIZFINX-NEXT: csrw fflags, a2 +; CHECKIZFINX-NEXT: fsflags a2 ; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a0, a1 ; CHECKIZFINX-NEXT: mv a0, a2 @@ -479,9 +479,9 @@ define i32 @fcmp_uge(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_uge: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a2, fflags +; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a3, a0, a1 -; CHECKIZFINX-NEXT: csrw fflags, a2 +; CHECKIZFINX-NEXT: fsflags a2 ; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a0, a1 ; CHECKIZFINX-NEXT: mv a0, a2 @@ -525,9 +525,9 @@ define i32 @fcmp_ult(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_ult: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a2, fflags +; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: fle.s a3, a1, a0 -; CHECKIZFINX-NEXT: csrw fflags, a2 +; CHECKIZFINX-NEXT: fsflags a2 ; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 ; CHECKIZFINX-NEXT: mv a0, a2 @@ -569,9 +569,9 @@ define i32 @fcmp_ule(float %a, float %b) nounwind strictfp { ; ; CHECKIZFINX-LABEL: fcmp_ule: ; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: csrr a2, fflags +; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a3, a1, a0 -; CHECKIZFINX-NEXT: csrw fflags, a2 +; CHECKIZFINX-NEXT: fsflags a2 ; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 ; CHECKIZFINX-NEXT: mv a0, a2 diff --git a/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll index d96c39c..4bc595b 100644 --- a/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll @@ -61,9 +61,9 @@ define i32 @fcmp_ogt(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_ogt: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a3, fflags +; CHECKIZHINX-NEXT: frflags a3 ; CHECKIZHINX-NEXT: flt.h a2, a1, a0 -; CHECKIZHINX-NEXT: csrw fflags, a3 +; CHECKIZHINX-NEXT: fsflags a3 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 ; CHECKIZHINX-NEXT: mv a0, a2 ; CHECKIZHINX-NEXT: ret @@ -80,9 +80,9 @@ define i32 @fcmp_ogt(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: csrr a3, fflags +; CHECKIZHINXMIN-NEXT: frflags a3 ; CHECKIZHINXMIN-NEXT: flt.s a0, a1, a2 -; CHECKIZHINXMIN-NEXT: csrw fflags, a3 +; CHECKIZHINXMIN-NEXT: fsflags a3 ; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") strictfp @@ -101,9 +101,9 @@ define i32 @fcmp_oge(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_oge: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a3, fflags +; CHECKIZHINX-NEXT: frflags a3 ; CHECKIZHINX-NEXT: fle.h a2, a1, a0 -; CHECKIZHINX-NEXT: csrw fflags, a3 +; CHECKIZHINX-NEXT: fsflags a3 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 ; CHECKIZHINX-NEXT: mv a0, a2 ; CHECKIZHINX-NEXT: ret @@ -120,9 +120,9 @@ define i32 @fcmp_oge(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: csrr a3, fflags +; CHECKIZHINXMIN-NEXT: frflags a3 ; CHECKIZHINXMIN-NEXT: fle.s a0, a1, a2 -; CHECKIZHINXMIN-NEXT: csrw fflags, a3 +; CHECKIZHINXMIN-NEXT: fsflags a3 ; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") strictfp @@ -141,9 +141,9 @@ define i32 @fcmp_olt(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_olt: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a3, fflags +; CHECKIZHINX-NEXT: frflags a3 ; CHECKIZHINX-NEXT: flt.h a2, a0, a1 -; CHECKIZHINX-NEXT: csrw fflags, a3 +; CHECKIZHINX-NEXT: fsflags a3 ; CHECKIZHINX-NEXT: feq.h zero, a0, a1 ; CHECKIZHINX-NEXT: mv a0, a2 ; CHECKIZHINX-NEXT: ret @@ -160,9 +160,9 @@ define i32 @fcmp_olt(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: csrr a3, fflags +; CHECKIZHINXMIN-NEXT: frflags a3 ; CHECKIZHINXMIN-NEXT: flt.s a0, a2, a1 -; CHECKIZHINXMIN-NEXT: csrw fflags, a3 +; CHECKIZHINXMIN-NEXT: fsflags a3 ; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") strictfp @@ -181,9 +181,9 @@ define i32 @fcmp_ole(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_ole: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a3, fflags +; CHECKIZHINX-NEXT: frflags a3 ; CHECKIZHINX-NEXT: fle.h a2, a0, a1 -; CHECKIZHINX-NEXT: csrw fflags, a3 +; CHECKIZHINX-NEXT: fsflags a3 ; CHECKIZHINX-NEXT: feq.h zero, a0, a1 ; CHECKIZHINX-NEXT: mv a0, a2 ; CHECKIZHINX-NEXT: ret @@ -200,9 +200,9 @@ define i32 @fcmp_ole(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: csrr a3, fflags +; CHECKIZHINXMIN-NEXT: frflags a3 ; CHECKIZHINXMIN-NEXT: fle.s a0, a2, a1 -; CHECKIZHINXMIN-NEXT: csrw fflags, a3 +; CHECKIZHINXMIN-NEXT: fsflags a3 ; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") strictfp @@ -228,13 +228,13 @@ define i32 @fcmp_one(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_one: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a2, fflags +; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a3, a0, a1 -; CHECKIZHINX-NEXT: csrw fflags, a2 +; CHECKIZHINX-NEXT: fsflags a2 ; CHECKIZHINX-NEXT: feq.h zero, a0, a1 -; CHECKIZHINX-NEXT: csrr a2, fflags +; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a4, a1, a0 -; CHECKIZHINX-NEXT: csrw fflags, a2 +; CHECKIZHINX-NEXT: fsflags a2 ; CHECKIZHINX-NEXT: or a2, a4, a3 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 ; CHECKIZHINX-NEXT: mv a0, a2 @@ -257,13 +257,13 @@ define i32 @fcmp_one(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: csrr a0, fflags +; CHECKIZHINXMIN-NEXT: frflags a0 ; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1 -; CHECKIZHINXMIN-NEXT: csrw fflags, a0 +; CHECKIZHINXMIN-NEXT: fsflags a0 ; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 -; CHECKIZHINXMIN-NEXT: csrr a0, fflags +; CHECKIZHINXMIN-NEXT: frflags a0 ; CHECKIZHINXMIN-NEXT: flt.s a4, a1, a2 -; CHECKIZHINXMIN-NEXT: csrw fflags, a0 +; CHECKIZHINXMIN-NEXT: fsflags a0 ; CHECKIZHINXMIN-NEXT: or a0, a4, a3 ; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret @@ -326,13 +326,13 @@ define i32 @fcmp_ueq(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_ueq: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a2, fflags +; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a3, a0, a1 -; CHECKIZHINX-NEXT: csrw fflags, a2 +; CHECKIZHINX-NEXT: fsflags a2 ; CHECKIZHINX-NEXT: feq.h zero, a0, a1 -; CHECKIZHINX-NEXT: csrr a2, fflags +; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a4, a1, a0 -; CHECKIZHINX-NEXT: csrw fflags, a2 +; CHECKIZHINX-NEXT: fsflags a2 ; CHECKIZHINX-NEXT: or a3, a4, a3 ; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 @@ -357,13 +357,13 @@ define i32 @fcmp_ueq(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: csrr a0, fflags +; CHECKIZHINXMIN-NEXT: frflags a0 ; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1 -; CHECKIZHINXMIN-NEXT: csrw fflags, a0 +; CHECKIZHINXMIN-NEXT: fsflags a0 ; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 -; CHECKIZHINXMIN-NEXT: csrr a0, fflags +; CHECKIZHINXMIN-NEXT: frflags a0 ; CHECKIZHINXMIN-NEXT: flt.s a4, a1, a2 -; CHECKIZHINXMIN-NEXT: csrw fflags, a0 +; CHECKIZHINXMIN-NEXT: fsflags a0 ; CHECKIZHINXMIN-NEXT: or a3, a4, a3 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 ; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 @@ -385,9 +385,9 @@ define i32 @fcmp_ugt(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_ugt: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a2, fflags +; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: fle.h a3, a0, a1 -; CHECKIZHINX-NEXT: csrw fflags, a2 +; CHECKIZHINX-NEXT: fsflags a2 ; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a0, a1 ; CHECKIZHINX-NEXT: mv a0, a2 @@ -406,9 +406,9 @@ define i32 @fcmp_ugt(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: csrr a0, fflags +; CHECKIZHINXMIN-NEXT: frflags a0 ; CHECKIZHINXMIN-NEXT: fle.s a3, a2, a1 -; CHECKIZHINXMIN-NEXT: csrw fflags, a0 +; CHECKIZHINXMIN-NEXT: fsflags a0 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 ; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 ; CHECKIZHINXMIN-NEXT: ret @@ -429,9 +429,9 @@ define i32 @fcmp_uge(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_uge: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a2, fflags +; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a3, a0, a1 -; CHECKIZHINX-NEXT: csrw fflags, a2 +; CHECKIZHINX-NEXT: fsflags a2 ; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a0, a1 ; CHECKIZHINX-NEXT: mv a0, a2 @@ -450,9 +450,9 @@ define i32 @fcmp_uge(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: csrr a0, fflags +; CHECKIZHINXMIN-NEXT: frflags a0 ; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1 -; CHECKIZHINXMIN-NEXT: csrw fflags, a0 +; CHECKIZHINXMIN-NEXT: fsflags a0 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 ; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 ; CHECKIZHINXMIN-NEXT: ret @@ -473,9 +473,9 @@ define i32 @fcmp_ult(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_ult: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a2, fflags +; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: fle.h a3, a1, a0 -; CHECKIZHINX-NEXT: csrw fflags, a2 +; CHECKIZHINX-NEXT: fsflags a2 ; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 ; CHECKIZHINX-NEXT: mv a0, a2 @@ -494,9 +494,9 @@ define i32 @fcmp_ult(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: csrr a0, fflags +; CHECKIZHINXMIN-NEXT: frflags a0 ; CHECKIZHINXMIN-NEXT: fle.s a3, a1, a2 -; CHECKIZHINXMIN-NEXT: csrw fflags, a0 +; CHECKIZHINXMIN-NEXT: fsflags a0 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 ; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret @@ -517,9 +517,9 @@ define i32 @fcmp_ule(half %a, half %b) nounwind strictfp { ; ; CHECKIZHINX-LABEL: fcmp_ule: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: csrr a2, fflags +; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a3, a1, a0 -; CHECKIZHINX-NEXT: csrw fflags, a2 +; CHECKIZHINX-NEXT: fsflags a2 ; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 ; CHECKIZHINX-NEXT: mv a0, a2 @@ -538,9 +538,9 @@ define i32 @fcmp_ule(half %a, half %b) nounwind strictfp { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: csrr a0, fflags +; CHECKIZHINXMIN-NEXT: frflags a0 ; CHECKIZHINXMIN-NEXT: flt.s a3, a1, a2 -; CHECKIZHINXMIN-NEXT: csrw fflags, a0 +; CHECKIZHINXMIN-NEXT: fsflags a0 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 ; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/xcvalu.ll b/llvm/test/CodeGen/RISCV/xcvalu.ll index b103173..1ddfa10 100644 --- a/llvm/test/CodeGen/RISCV/xcvalu.ll +++ b/llvm/test/CodeGen/RISCV/xcvalu.ll @@ -20,7 +20,7 @@ define i32 @abs(i32 %a) { define i1 @slet(i32 %a, i32 %b) { ; CHECK-LABEL: slet: ; CHECK: # %bb.0: -; CHECK-NEXT: cv.slet a0, a0, a1 +; CHECK-NEXT: cv.sle a0, a0, a1 ; CHECK-NEXT: ret %1 = icmp sle i32 %a, %b ret i1 %1 @@ -29,7 +29,7 @@ define i1 @slet(i32 %a, i32 %b) { define i1 @sletu(i32 %a, i32 %b) { ; CHECK-LABEL: sletu: ; CHECK: # %bb.0: -; CHECK-NEXT: cv.sletu a0, a0, a1 +; CHECK-NEXT: cv.sleu a0, a0, a1 ; CHECK-NEXT: ret %1 = icmp ule i32 %a, %b ret i1 %1 diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll new file mode 100644 index 0000000..bb50d8c --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll @@ -0,0 +1,33 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for step are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+
+define noundef <4 x half> @step_half4(<4 x half> noundef %a, <4 x half> noundef %b) {
+entry:
+ ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]]
+ ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
+ ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_16]]
+ ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Step %[[#arg0]] %[[#arg1]]
+ %hlsl.step = call <4 x half> @llvm.spv.step.v4f16(<4 x half> %a, <4 x half> %b)
+ ret <4 x half> %hlsl.step
+}
+
+define noundef <4 x float> @step_float4(<4 x float> noundef %a, <4 x float> noundef %b) {
+entry:
+ ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
+ ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+ ; CHECK: %[[#arg1:]] = OpFunctionParameter %[[#vec4_float_32]]
+ ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Step %[[#arg0]] %[[#arg1]]
+ %hlsl.step = call <4 x float> @llvm.spv.step.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %hlsl.step
+}
+
+declare <4 x half> @llvm.spv.step.v4f16(<4 x half>, <4 x half>)
+declare <4 x float> @llvm.spv.step.v4f32(<4 x float>, <4 x float>)
diff --git a/llvm/test/MC/RISCV/corev/XCValu-valid.s b/llvm/test/MC/RISCV/corev/XCValu-valid.s index 423dbba..1c74e36 100644 --- a/llvm/test/MC/RISCV/corev/XCValu-valid.s +++ b/llvm/test/MC/RISCV/corev/XCValu-valid.s @@ -36,15 +36,25 @@ cv.subrnr a0, a1, a2 # CHECK-ENCODING: [0x2b,0xb5,0xc5,0x8c] # CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}} +cv.sle t0, t1, t2 +# CHECK-INSTR: cv.sle t0, t1, t2 +# CHECK-ENCODING: [0xab,0x32,0x73,0x52] +# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}} + cv.slet t0, t1, t2 -# CHECK-INSTR: cv.slet t0, t1, t2 +# CHECK-INSTR: cv.sle t0, t1, t2 # CHECK-ENCODING: [0xab,0x32,0x73,0x52] +# CHECK-NO-EXT: unrecognized instruction mnemonic + +cv.sle a0, a1, a2 +# CHECK-INSTR: cv.sle a0, a1, a2 +# CHECK-ENCODING: [0x2b,0xb5,0xc5,0x52] # CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}} cv.slet a0, a1, a2 -# CHECK-INSTR: cv.slet a0, a1, a2 +# CHECK-INSTR: cv.sle a0, a1, a2 # CHECK-ENCODING: [0x2b,0xb5,0xc5,0x52] -# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}} +# CHECK-NO-EXT: unrecognized instruction mnemonic cv.subrn t0, t1, t2, 0 # CHECK-INSTR: cv.subrn t0, t1, t2, 0 @@ -261,15 +271,25 @@ cv.extbs a0, a1 # CHECK-ENCODING: [0x2b,0xb5,0x05,0x64] # CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}} +cv.sleu t0, t1, t2 +# CHECK-INSTR: cv.sleu t0, t1, t2 +# CHECK-ENCODING: [0xab,0x32,0x73,0x54] +# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}} + cv.sletu t0, t1, t2 -# CHECK-INSTR: cv.sletu t0, t1, t2 +# CHECK-INSTR: cv.sleu t0, t1, t2 # CHECK-ENCODING: [0xab,0x32,0x73,0x54] +# CHECK-NO-EXT: unrecognized instruction mnemonic + +cv.sleu a0, a1, a2 +# CHECK-INSTR: cv.sleu a0, a1, a2 +# CHECK-ENCODING: [0x2b,0xb5,0xc5,0x54] # CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}} cv.sletu a0, a1, a2 -# CHECK-INSTR: cv.sletu a0, a1, a2 +# CHECK-INSTR: cv.sleu a0, a1, a2 # CHECK-ENCODING: [0x2b,0xb5,0xc5,0x54] -# CHECK-NO-EXT: instruction requires the following: 'XCValu' (CORE-V ALU Operations){{$}} +# CHECK-NO-EXT: unrecognized instruction mnemonic cv.min t0, t1, t2 # CHECK-INSTR: cv.min t0, t1, t2 diff --git a/llvm/test/MC/RISCV/csr-aliases.s b/llvm/test/MC/RISCV/csr-aliases.s index 1d7032f..96eb96f 100644 --- a/llvm/test/MC/RISCV/csr-aliases.s +++ b/llvm/test/MC/RISCV/csr-aliases.s @@ -7,6 +7,9 @@ # RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+f < %s \ # RUN: | llvm-objdump -d --mattr=+f - \ # RUN: | FileCheck -check-prefix=CHECK-EXT-F %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+zfinx < %s \ +# RUN: | llvm-objdump -d --mattr=+zfinx - \ +# RUN: | FileCheck -check-prefix=CHECK-EXT-F %s # RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=-f < %s \ # RUN: | llvm-objdump -d --mattr=+f - \ # RUN: | FileCheck -check-prefix=CHECK-EXT-F %s @@ -26,6 +29,9 @@ # RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+f < %s \ # RUN: | llvm-objdump -d --mattr=+f - \ # RUN: | FileCheck -check-prefix=CHECK-EXT-F %s +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+zfinx < %s \ +# RUN: | llvm-objdump -d --mattr=+zfinx - \ +# RUN: | FileCheck -check-prefix=CHECK-EXT-F %s # RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=-f < %s \ # RUN: | llvm-objdump -d --mattr=+f - \ # RUN: | FileCheck -check-prefix=CHECK-EXT-F %s @@ -45,61 +51,61 @@ csrrs t0, 3, zero # CHECK-INST: csrrw t1, fcsr, t2 # CHECK-ALIAS: fscsr t1, t2 -# CHECK-EXT-F-ON: fscsr t1, t2 +# CHECK-EXT-F: fscsr t1, t2 # CHECK-EXT-F-OFF: csrrw t1, fcsr, t2 csrrw t1, 3, t2 # CHECK-INST: csrrw zero, fcsr, t2 # CHECK-ALIAS: fscsr t2 -# CHECK-EXT-F-ON: fscsr t2 +# CHECK-EXT-F: fscsr t2 # CHECK-EXT-F-OFF: csrw fcsr, t2 csrrw zero, 3, t2 # CHECK-INST: csrrw zero, fcsr, t2 # CHECK-ALIAS: fscsr t2 -# CHECK-EXT-F-ON: fscsr t2 +# CHECK-EXT-F: fscsr t2 # CHECK-EXT-F-OFF: csrw fcsr, t2 csrrw zero, 3, t2 # CHECK-INST: csrrw t0, frm, zero # CHECK-ALIAS: fsrm t0, zero -# CHECK-EXT-F-ON: fsrm t0, zero +# CHECK-EXT-F: fsrm t0, zero # CHECK-EXT-F-OFF: csrrw t0, frm csrrw t0, 2, zero # CHECK-INST: csrrw t0, frm, t1 # CHECK-ALIAS: fsrm t0, t1 -# CHECK-EXT-F-ON: fsrm t0, t1 +# CHECK-EXT-F: fsrm t0, t1 # CHECK-EXT-F-OFF: csrrw t0, frm, t1 csrrw t0, 2, t1 # CHECK-INST: csrrwi t0, frm, 0x1f # CHECK-ALIAS: fsrmi t0, 0x1f -# CHECK-EXT-F-ON: fsrmi t0, 0x1f +# CHECK-EXT-F: fsrmi t0, 0x1f # CHECK-EXT-F-OFF: csrrwi t0, frm, 0x1f csrrwi t0, 2, 31 # CHECK-INST: csrrwi zero, frm, 0x1f # CHECK-ALIAS: fsrmi 0x1f -# CHECK-EXT-F-ON: fsrmi 0x1f +# CHECK-EXT-F: fsrmi 0x1f # CHECK-EXT-F-OFF: csrwi frm, 0x1f csrrwi zero, 2, 31 # CHECK-INST: csrrs t0, fflags, zero # CHECK-ALIAS: frflags t0 -# CHECK-EXT-F-ON: frflags t0 +# CHECK-EXT-F: frflags t0 # CHECK-EXT-F-OFF: csrr t0, fflags csrrs t0, 1, zero # CHECK-INST: csrrw t0, fflags, t2 # CHECK-ALIAS: fsflags t0, t2 -# CHECK-EXT-F-ON: fsflags t0, t2 +# CHECK-EXT-F: fsflags t0, t2 # CHECK-EXT-F-OFF: csrrw t0, fflags, t2 csrrw t0, 1, t2 # CHECK-INST: csrrw zero, fflags, t2 # CHECK-ALIAS: fsflags t2 -# CHECK-EXT-F-ON: fsflags t2 +# CHECK-EXT-F: fsflags t2 # CHECK-EXT-F-OFF: csrw fflags, t2 csrrw zero, 1, t2 diff --git a/llvm/test/MC/RISCV/rv32ih-aliases-valid.s b/llvm/test/MC/RISCV/rv32ih-aliases-valid.s index 85af8cf..7ae4b6c 100644 --- a/llvm/test/MC/RISCV/rv32ih-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv32ih-aliases-valid.s @@ -1,7 +1,7 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+h -riscv-no-aliases -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST,CHECK-ALIAS-INST %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+h -riscv-no-aliases -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK,CHECK-INST,CHECK-ALIAS-INST %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+h \ +# RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-ALIAS-INST %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+h \ +# RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-ALIAS-INST %s # RUN: llvm-mc -filetype=obj -mattr=+h -triple riscv32 < %s \ # RUN: | llvm-objdump --mattr=+h -M no-aliases -d - \ # RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-NOALIAS-INST %s @@ -9,62 +9,48 @@ # RUN: | llvm-objdump --mattr=+h -M no-aliases -d - \ # RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-NOALIAS-INST %s -# CHECK-ALIAS-INST: hfence.gvma +# CHECK-ALIAS-INST: hfence.gvma{{$}} # CHECK-NOALIAS-INST: hfence.gvma zero, zero -# CHECK: encoding: [0x73,0x00,0x00,0x62] hfence.gvma -# CHECK-ALIAS-INST: hfence.gvma a0 +# CHECK-ALIAS-INST: hfence.gvma a0{{$}} # CHECK-NOALIAS-INST: hfence.gvma a0, zero -# CHECK: encoding: [0x73,0x00,0x05,0x62] hfence.gvma a0 -# CHECK-ALIAS-INST: hfence.vvma +# CHECK-ALIAS-INST: hfence.vvma{{$}} # CHECK-NOALIAS-INST: hfence.vvma zero, zero -# CHECK: encoding: [0x73,0x00,0x00,0x22] hfence.vvma -# CHECK-ALIAS-INST: hfence.vvma a0 +# CHECK-ALIAS-INST: hfence.vvma a0{{$}} # CHECK-NOALIAS-INST: hfence.vvma a0, zero -# CHECK: encoding: [0x73,0x00,0x05,0x22] hfence.vvma a0 # CHECK-INST: hlv.b a0, (a1) -# CHECK: encoding: [0x73,0xc5,0x05,0x60] hlv.b a0, 0(a1) # CHECK-INST: hlv.bu a0, (a1) -# CHECK: encoding: [0x73,0xc5,0x15,0x60] hlv.bu a0, 0(a1) # CHECK-INST: hlv.h a1, (a2) -# CHECK: encoding: [0xf3,0x45,0x06,0x64] hlv.h a1, 0(a2) # CHECK-INST: hlv.hu a1, (a1) -# CHECK: encoding: [0xf3,0xc5,0x15,0x64] hlv.hu a1, 0(a1) # CHECK-INST: hlvx.hu a1, (a2) -# CHECK: encoding: [0xf3,0x45,0x36,0x64] hlvx.hu a1, 0(a2) # CHECK-INST: hlv.w a2, (a2) -# CHECK: encoding: [0x73,0x46,0x06,0x68] hlv.w a2, 0(a2) # CHECK-INST: hlvx.wu a2, (a3) -# CHECK: encoding: [0x73,0xc6,0x36,0x68] hlvx.wu a2, 0(a3) # CHECK-INST: hsv.b a0, (a1) -# CHECK: encoding: [0x73,0xc0,0xa5,0x62] hsv.b a0, 0(a1) # CHECK-INST: hsv.h a0, (a1) -# CHECK: encoding: [0x73,0xc0,0xa5,0x66] hsv.h a0, 0(a1) # CHECK-INST: hsv.w a0, (a1) -# CHECK: encoding: [0x73,0xc0,0xa5,0x6a] hsv.w a0, 0(a1) diff --git a/llvm/test/MC/RISCV/rvf-aliases-valid.s b/llvm/test/MC/RISCV/rvf-aliases-valid.s index 31f931b..0430e2a 100644 --- a/llvm/test/MC/RISCV/rvf-aliases-valid.s +++ b/llvm/test/MC/RISCV/rvf-aliases-valid.s @@ -48,7 +48,8 @@ fgt.s x4, f5, f6 fge.s x7, f8, f9 # The following instructions actually alias instructions from the base ISA. -# However, it only makes sense to support them when the F extension is enabled. +# However, it only makes sense to support them when the F or Zfinx extension is +# enabled. # CHECK-INST: csrrs t0, fcsr, zero # CHECK-ALIAS: frcsr t0 frcsr x5 diff --git a/llvm/test/MC/RISCV/rvi-aliases-valid.s b/llvm/test/MC/RISCV/rvi-aliases-valid.s index 9ac6a8a..ef05d12 100644 --- a/llvm/test/MC/RISCV/rvi-aliases-valid.s +++ b/llvm/test/MC/RISCV/rvi-aliases-valid.s @@ -261,10 +261,10 @@ csrrs t0, 0xfff, 0x10 csrrc t0, 0x140, 0x11 # CHECK-S-OBJ-NOALIAS: sfence.vma zero, zero -# CHECK-S-OBJ: sfence.vma +# CHECK-S-OBJ: sfence.vma{{$}} sfence.vma # CHECK-S-OBJ-NOALIAS: sfence.vma a0, zero -# CHECK-S-OBJ: sfence.vma a0 +# CHECK-S-OBJ: sfence.vma a0{{$}} sfence.vma a0 # The following aliases are accepted as input but the canonical form diff --git a/llvm/test/MC/RISCV/rvzfinx-aliases-valid.s b/llvm/test/MC/RISCV/rvzfinx-aliases-valid.s index f624c17..f9225cf 100644 --- a/llvm/test/MC/RISCV/rvzfinx-aliases-valid.s +++ b/llvm/test/MC/RISCV/rvzfinx-aliases-valid.s @@ -7,16 +7,16 @@ # RUN: llvm-mc %s -triple=riscv64 -mattr=+zfinx \ # RUN: | FileCheck -check-prefix=CHECK-ALIAS %s # RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+zfinx %s \ -# RUN: | llvm-objdump -d --mattr=+zfinx -M no-aliases - \ +# RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+zfinx -M no-aliases - \ # RUN: | FileCheck -check-prefix=CHECK-INST %s # RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+zfinx %s \ -# RUN: | llvm-objdump -d --mattr=+zfinx - \ +# RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+zfinx - \ # RUN: | FileCheck -check-prefix=CHECK-ALIAS %s # RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+zfinx %s \ -# RUN: | llvm-objdump -d --mattr=+zfinx -M no-aliases - \ +# RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+zfinx -M no-aliases - \ # RUN: | FileCheck -check-prefix=CHECK-INST %s # RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+zfinx %s \ -# RUN: | llvm-objdump -d --mattr=+zfinx - \ +# RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+zfinx - \ # RUN: | FileCheck -check-prefix=CHECK-ALIAS %s ##===----------------------------------------------------------------------===## @@ -40,6 +40,63 @@ fgt.s x4, s5, s6 # CHECK-ALIAS: fle.s t2, s1, s0 fge.s x7, x8, x9 +# The following instructions actually alias instructions from the base ISA. +# However, it only makes sense to support them when the F or Zfinx extension is +# enabled. +# CHECK-INST: csrrs t0, fcsr, zero +# CHECK-ALIAS: frcsr t0 +frcsr x5 +# CHECK-INST: csrrw t1, fcsr, t2 +# CHECK-ALIAS: fscsr t1, t2 +fscsr x6, x7 +# CHECK-INST: csrrw zero, fcsr, t3 +# CHECK-ALIAS: fscsr t3 +fscsr x28 + +# These are obsolete aliases of frcsr/fscsr. They are accepted by the assembler +# but the disassembler should always print them as the equivalent, new aliases. +# CHECK-INST: csrrs t4, fcsr, zero +# CHECK-ALIAS: frcsr t4 +frsr x29 +# CHECK-INST: csrrw t5, fcsr, t6 +# CHECK-ALIAS: fscsr t5, t6 +fssr x30, x31 +# CHECK-INST: csrrw zero, fcsr, s0 +# CHECK-ALIAS: fscsr s0 +fssr x8 + +# CHECK-INST: csrrs t4, frm, zero +# CHECK-ALIAS: frrm t4 +frrm x29 +# CHECK-INST: csrrw t5, frm, t4 +# CHECK-ALIAS: fsrm t5, t4 +fsrm x30, x29 +# CHECK-INST: csrrw zero, frm, t6 +# CHECK-ALIAS: fsrm t6 +fsrm x31 +# CHECK-INST: csrrwi a0, frm, 31 +# CHECK-ALIAS: fsrmi a0, 31 +fsrmi x10, 0x1f +# CHECK-INST: csrrwi zero, frm, 30 +# CHECK-ALIAS: fsrmi 30 +fsrmi 0x1e + +# CHECK-INST: csrrs a1, fflags, zero +# CHECK-ALIAS: frflags a1 +frflags x11 +# CHECK-INST: csrrw a2, fflags, a1 +# CHECK-ALIAS: fsflags a2, a1 +fsflags x12, x11 +# CHECK-INST: csrrw zero, fflags, a3 +# CHECK-ALIAS: fsflags a3 +fsflags x13 +# CHECK-INST: csrrwi a4, fflags, 29 +# CHECK-ALIAS: fsflagsi a4, 29 +fsflagsi x14, 0x1d +# CHECK-INST: csrrwi zero, fflags, 28 +# CHECK-ALIAS: fsflagsi 28 +fsflagsi 0x1c + ##===----------------------------------------------------------------------===## ## Aliases which omit the rounding mode. ##===----------------------------------------------------------------------===## diff --git a/llvm/test/Transforms/Coroutines/gh107139-split-in-scc.ll b/llvm/test/Transforms/Coroutines/gh107139-split-in-scc.ll new file mode 100644 index 0000000..1fae4d2 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/gh107139-split-in-scc.ll @@ -0,0 +1,38 @@ +; Verify that we don't crash on mutually recursive coroutines +; RUN: opt < %s -passes='cgscc(coro-split)' -S | FileCheck %s + +target triple = "x86_64-redhat-linux-gnu" + +; CHECK-LABEL: define void @foo +define void @foo() presplitcoroutine personality ptr null { +entry: + + %0 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %1 = call ptr @llvm.coro.begin(token %0, ptr null) + %2 = call token @llvm.coro.save(ptr null) + %3 = call i8 @llvm.coro.suspend(token none, i1 false) + %4 = call token @llvm.coro.save(ptr null) + ; CHECK: call void @bar(ptr null, ptr null) + call void @llvm.coro.await.suspend.void(ptr null, ptr null, ptr @bar) + ret void +} + +; CHECK-LABEL: define void @bar({{.*}}) +define void @bar(ptr %0, ptr %1) { +entry: + ; CHECK: call void @foo() + call void @foo() + ret void +} + +; CHECK-LABEL: @foo.resume({{.*}}) +; CHECK-LABEL: @foo.destroy({{.*}}) +; CHECK-LABEL: @foo.cleanup({{.*}}) + +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #0 +declare ptr @llvm.coro.begin(token, ptr writeonly) nounwind +declare token @llvm.coro.save(ptr) nomerge nounwind +declare void @llvm.coro.await.suspend.void(ptr, ptr, ptr) +declare i8 @llvm.coro.suspend(token, i1) nounwind + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } diff --git a/llvm/test/Transforms/InstCombine/fmod.ll b/llvm/test/Transforms/InstCombine/fmod.ll new file mode 100644 index 0000000..c021d27 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fmod.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define float @test_inf_const(float %f) { +; CHECK-LABEL: define float @test_inf_const( +; CHECK-SAME: float [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ABS:%.*]] = tail call float @llvm.fabs.f32(float [[F]]) +; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq float [[ABS]], 0x7FF0000000000000 +; CHECK-NEXT: br i1 [[ISINF]], label [[RETURN:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00) +; CHECK-NEXT: ret float [[CALL]] +; CHECK: return: +; CHECK-NEXT: ret float 0.000000e+00 +; +entry: + %abs = tail call float @llvm.fabs.f32(float %f) + %isinf = fcmp oeq float %abs, 0x7FF0000000000000 + br i1 %isinf, label %return, label %if.end + +if.end: + %call = tail call float @fmodf(float %f, float 2.0) + ret float %call + +return: + ret float 0.0 +} + +define float @test_const_zero(float %f) { +; CHECK-LABEL: define float @test_const_zero( +; CHECK-SAME: float [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ISZERO:%.*]] = fcmp oeq float [[F]], 0.000000e+00 +; CHECK-NEXT: br i1 [[ISZERO]], label [[RETURN:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float 2.000000e+00, float [[F]]) +; CHECK-NEXT: ret float [[CALL]] +; CHECK: return: +; CHECK-NEXT: ret float 0.000000e+00 +; +entry: + %iszero = fcmp oeq float %f, 0.0 + br i1 %iszero, label %return, label %if.end + +if.end: + %call = tail call float @fmodf(float 2.0, float %f) + ret float %call + +return: + ret float 0.0 +} + +define float @test_unknown_const(float %f) { +; CHECK-LABEL: define float @test_unknown_const( +; CHECK-SAME: float [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00) +; CHECK-NEXT: ret float [[CALL]] +; +entry: + %call = tail call float @fmodf(float %f, float 2.000000e+00) + ret float %call +} + +define float @test_noinf_nozero(float nofpclass(inf) %f, float nofpclass(zero) %g) { +; CHECK-LABEL: define float @test_noinf_nozero( +; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]]) +; CHECK-NEXT: ret float [[CALL]] +; +entry: + %call = tail call nnan float @fmodf(float %f, float %g) + ret float %call +} + +define double @test_double(double nofpclass(inf) %f, double nofpclass(zero) %g) { +; CHECK-LABEL: define double @test_double( +; CHECK-SAME: double nofpclass(inf) [[F:%.*]], double nofpclass(zero) [[G:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call double @fmod(double [[F]], double [[G]]) +; CHECK-NEXT: ret double [[CALL]] +; +entry: + %call = tail call double @fmod(double %f, double %g) + ret double %call +} + +define fp128 @test_fp128(fp128 nofpclass(inf) %f, fp128 nofpclass(zero) %g) { +; CHECK-LABEL: define fp128 @test_fp128( +; CHECK-SAME: fp128 nofpclass(inf) [[F:%.*]], fp128 nofpclass(zero) [[G:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call fp128 @fmodl(fp128 [[F]], fp128 [[G]]) +; CHECK-NEXT: ret fp128 [[CALL]] +; +entry: + %call = tail call fp128 @fmodl(fp128 %f, fp128 %g) + ret fp128 %call +} + +define float @test_noinf_nozero_dazpreservesign(float nofpclass(inf) %f, float nofpclass(zero) %g) "denormal-fp-math"="preserve-sign,preserve-sign" { +; CHECK-LABEL: define float @test_noinf_nozero_dazpreservesign( +; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]]) +; CHECK-NEXT: ret float [[CALL]] +; +entry: + %call = tail call nnan float @fmodf(float %f, float %g) + ret float %call +} + +define float @test_noinf_nozero_dazdynamic(float nofpclass(inf) %f, float nofpclass(zero) %g) "denormal-fp-math"="dynamic,dynamic" { +; CHECK-LABEL: define float @test_noinf_nozero_dazdynamic( +; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]]) +; CHECK-NEXT: ret float [[CALL]] +; +entry: + %call = tail call nnan float @fmodf(float %f, float %g) + ret float %call +} + +declare float @fmodf(float, float) +declare double @fmod(double, double) +declare fp128 @fmodl(fp128, fp128) diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll index 44802f9..f4bbd3e 100644 --- a/llvm/test/Transforms/InstCombine/pow-1.ll +++ b/llvm/test/Transforms/InstCombine/pow-1.ll @@ -862,6 +862,30 @@ define double @pow_libcall_half_no_FMF(double %x) { ret double %retval } +define double @pow_libcall_half_fromdomcondition(double %x) { +; CHECK-LABEL: define double @pow_libcall_half_fromdomcondition( +; CHECK-SAME: double [[X:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call double @llvm.fabs.f64(double [[X]]) +; CHECK-NEXT: [[C:%.*]] = fcmp oeq double [[A]], 0x7FF0000000000000 +; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: ret double 0.000000e+00 +; CHECK: else: +; CHECK-NEXT: [[RETVAL:%.*]] = call double @pow(double [[X]], double 5.000000e-01) +; CHECK-NEXT: ret double [[RETVAL]] +; + %a = call double @llvm.fabs.f64(double %x) + %c = fcmp oeq double %a, 0x7FF0000000000000 + br i1 %c, label %then, label %else + +then: + ret double 0.0 + +else: + %retval = call double @pow(double %x, double 0.5) + ret double %retval +} + define double @pow_libcall_half_no_FMF_noerrno(double %x) { ; LIB-LABEL: define double @pow_libcall_half_no_FMF_noerrno( ; LIB-SAME: double [[X:%.*]]) { diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll index 9d2a947..2cf56df 100644 --- a/llvm/test/Transforms/InstCombine/rem.ll +++ b/llvm/test/Transforms/InstCombine/rem.ll @@ -1073,3 +1073,106 @@ define i16 @rem_pow2(i16 %x, i16 %y) { %rem = urem i16 %x, %y ret i16 %rem } + +define i64 @rem_pow2_domcond(i64 %a, i64 %b) { +; CHECK-LABEL: @rem_pow2_domcond( +; CHECK-NEXT: start: +; CHECK-NEXT: [[CPOP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[CPOP]], 1 +; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B]], -1 +; CHECK-NEXT: [[REM:%.*]] = and i64 [[A:%.*]], [[TMP0]] +; CHECK-NEXT: ret i64 [[REM]] +; CHECK: bb2: +; CHECK-NEXT: ret i64 0 +; +start: + %cpop = call i64 @llvm.ctpop.i64(i64 %b) + %cond = icmp eq i64 %cpop, 1 + br i1 %cond, label %bb1, label %bb2 + +bb1: + %rem = urem i64 %a, %b + ret i64 %rem + +bb2: + ret i64 0 +} + +define i64 @rem_pow2_domcond_in_else(i64 %a, i64 %b) { +; CHECK-LABEL: @rem_pow2_domcond_in_else( +; CHECK-NEXT: start: +; CHECK-NEXT: [[CPOP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp eq i64 [[CPOP]], 1 +; CHECK-NEXT: br i1 [[COND_NOT]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B]], -1 +; CHECK-NEXT: [[REM:%.*]] = and i64 [[A:%.*]], [[TMP0]] +; CHECK-NEXT: ret i64 [[REM]] +; CHECK: bb2: +; CHECK-NEXT: ret i64 0 +; +start: + %cpop = call i64 @llvm.ctpop.i64(i64 %b) + %cond = icmp ne i64 %cpop, 1 + br i1 %cond, label %bb2, label %bb1 + +bb1: + %rem = urem i64 %a, %b + ret i64 %rem + +bb2: + ret i64 0 +} + +define i64 @rem_pow2_or_zero_domcond(i64 %a, i64 %b) { +; CHECK-LABEL: @rem_pow2_or_zero_domcond( +; CHECK-NEXT: start: +; CHECK-NEXT: [[CPOP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[COND:%.*]] = icmp ult i64 [[CPOP]], 2 +; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B]], -1 +; CHECK-NEXT: [[REM:%.*]] = and i64 [[A:%.*]], [[TMP0]] +; CHECK-NEXT: ret i64 [[REM]] +; CHECK: bb2: +; CHECK-NEXT: ret i64 0 +; +start: + %cpop = call i64 @llvm.ctpop.i64(i64 %b) + %cond = icmp ult i64 %cpop, 2 + br i1 %cond, label %bb1, label %bb2 + +bb1: + %rem = urem i64 %a, %b + ret i64 %rem + +bb2: + ret i64 0 +} + +define i64 @rem_pow2_non_domcond(i64 %a, i64 %b) { +; CHECK-LABEL: @rem_pow2_non_domcond( +; CHECK-NEXT: start: +; CHECK-NEXT: [[CPOP:%.*]] = call range(i64 0, 65) i64 @llvm.ctpop.i64(i64 [[B:%.*]]) +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp eq i64 [[CPOP]], 1 +; CHECK-NEXT: br i1 [[COND_NOT]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[REM:%.*]] = urem i64 [[A:%.*]], [[B]] +; CHECK-NEXT: ret i64 [[REM]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB1]] +; +start: + %cpop = call i64 @llvm.ctpop.i64(i64 %b) + %cond = icmp ne i64 %cpop, 1 + br i1 %cond, label %bb2, label %bb1 + +bb1: + %rem = urem i64 %a, %b + ret i64 %rem + +bb2: + br label %bb1 +} diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 0c9ab24..69b8519 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -221,8 +221,8 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]> -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> ; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-from-end vp<[[RED_RES]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> ; CHECK-NEXT: EMIT branch-on-cond ir<true> ; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll new file mode 100644 index 0000000..edc0381 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll @@ -0,0 +1,865 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux \ +; RUN: -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,NOFP16 +; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux \ +; RUN: -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,FP16 + +define half @reduce_fast_half2(<2 x half> %vec2) { +; CHECK-LABEL: define half @reduce_fast_half2( +; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 +; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] +; CHECK-NEXT: ret half [[ADD1]] +; +entry: + %elt0 = extractelement <2 x half> %vec2, i64 0 + %elt1 = extractelement <2 x half> %vec2, i64 1 + %add1 = fadd fast half %elt1, %elt0 + ret half %add1 +} + +define half @reduce_half2(<2 x half> %vec2) { +; CHECK-LABEL: define half @reduce_half2( +; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 +; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] +; CHECK-NEXT: ret half [[ADD1]] +; +entry: + %elt0 = extractelement <2 x half> %vec2, i64 0 + %elt1 = extractelement <2 x half> %vec2, i64 1 + %add1 = fadd half %elt1, %elt0 + ret half %add1 +} + +define half @reduce_fast_half4(<4 x half> %vec4) { +; CHECK-LABEL: define half @reduce_fast_half4( +; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]]) +; CHECK-NEXT: ret half [[TMP0]] +; +entry: + %elt0 = extractelement <4 x half> %vec4, i64 0 + %elt1 = extractelement <4 x half> %vec4, i64 1 + %elt2 = extractelement <4 x half> %vec4, i64 2 + %elt3 = extractelement <4 x half> %vec4, i64 3 + %add1 = fadd fast half %elt1, %elt0 + %add2 = fadd fast half %elt2, %add1 + %add3 = fadd fast half %elt3, %add2 + ret half %add3 +} + +define half @reduce_half4(<4 x half> %vec4) { +; CHECK-LABEL: define half @reduce_half4( +; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 +; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 +; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 +; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] +; CHECK-NEXT: ret half [[ADD3]] +; +entry: + %elt0 = extractelement <4 x half> %vec4, i64 0 + %elt1 = extractelement <4 x half> %vec4, i64 1 + %elt2 = extractelement <4 x half> %vec4, i64 2 + %elt3 = extractelement <4 x half> %vec4, i64 3 + %add1 = fadd half %elt1, %elt0 + %add2 = fadd half %elt2, %add1 + %add3 = fadd half %elt3, %add2 + ret half %add3 +} + +define half @reduce_fast_half8(<8 x half> %vec8) { +; CHECK-LABEL: define half @reduce_fast_half8( +; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4 +; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5 +; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6 +; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[ELT4]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[ELT5]], [[ELT6]] +; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[OP_RDX]], [[OP_RDX1]] +; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[OP_RDX2]], [[ELT7]] +; CHECK-NEXT: ret half [[OP_RDX3]] +; +entry: + %elt0 = extractelement <8 x half> %vec8, i64 0 + %elt1 = extractelement <8 x half> %vec8, i64 1 + %elt2 = extractelement <8 x half> %vec8, i64 2 + %elt3 = extractelement <8 x half> %vec8, i64 3 + %elt4 = extractelement <8 x half> %vec8, i64 4 + %elt5 = extractelement <8 x half> %vec8, i64 5 + %elt6 = extractelement <8 x half> %vec8, i64 6 + %elt7 = extractelement <8 x half> %vec8, i64 7 + %add1 = fadd fast half %elt1, %elt0 + %add2 = fadd fast half %elt2, %add1 + %add3 = fadd fast half %elt3, %add2 + %add4 = fadd fast half %elt4, %add3 + %add5 = fadd fast half %elt5, %add4 + %add6 = fadd fast half %elt6, %add5 + %add7 = fadd fast half %elt7, %add6 + ret half %add7 +} + +define half @reduce_half8(<8 x half> %vec8) { +; CHECK-LABEL: define half @reduce_half8( +; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1 +; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2 +; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3 +; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4 +; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5 +; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6 +; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7 +; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]] +; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]] +; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]] +; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]] +; CHECK-NEXT: ret half [[ADD7]] +; +entry: + %elt0 = extractelement <8 x half> %vec8, i64 0 + %elt1 = extractelement <8 x half> %vec8, i64 1 + %elt2 = extractelement <8 x half> %vec8, i64 2 + %elt3 = extractelement <8 x half> %vec8, i64 3 + %elt4 = extractelement <8 x half> %vec8, i64 4 + %elt5 = extractelement <8 x half> %vec8, i64 5 + %elt6 = extractelement <8 x half> %vec8, i64 6 + %elt7 = extractelement <8 x half> %vec8, i64 7 + %add1 = fadd half %elt1, %elt0 + %add2 = fadd half %elt2, %add1 + %add3 = fadd half %elt3, %add2 + %add4 = fadd half %elt4, %add3 + %add5 = fadd half %elt5, %add4 + %add6 = fadd half %elt6, %add5 + %add7 = fadd half %elt7, %add6 + ret half %add7 +} + +define half @reduce_fast_half16(<16 x half> %vec16) { +; NOFP16-LABEL: define half @reduce_fast_half16( +; NOFP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] { +; NOFP16-NEXT: [[ENTRY:.*:]] +; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[VEC16]]) +; NOFP16-NEXT: ret half [[TMP0]] +; +; FP16-LABEL: define half @reduce_fast_half16( +; FP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] { +; FP16-NEXT: [[ENTRY:.*:]] +; FP16-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4 +; FP16-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5 +; FP16-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6 +; FP16-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7 +; FP16-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12 +; FP16-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13 +; FP16-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14 +; FP16-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15 +; FP16-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; FP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]]) +; FP16-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11> +; FP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP2]]) +; FP16-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[TMP3]] +; FP16-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[OP_RDX]], [[ELT4]] +; FP16-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[ELT5]], [[ELT6]] +; FP16-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[ELT7]], [[ELT12]] +; FP16-NEXT: [[OP_RDX4:%.*]] = fadd fast half [[ELT13]], [[ELT14]] +; FP16-NEXT: [[OP_RDX5:%.*]] = fadd fast half [[OP_RDX1]], [[OP_RDX2]] +; FP16-NEXT: [[OP_RDX6:%.*]] = fadd fast half [[OP_RDX3]], [[OP_RDX4]] +; FP16-NEXT: [[OP_RDX7:%.*]] = fadd fast half [[OP_RDX5]], [[OP_RDX6]] +; FP16-NEXT: [[OP_RDX8:%.*]] = fadd fast half [[OP_RDX7]], [[ELT15]] +; FP16-NEXT: ret half [[OP_RDX8]] +; +entry: + %elt0 = extractelement <16 x half> %vec16, i64 0 + %elt1 = extractelement <16 x half> %vec16, i64 1 + %elt2 = extractelement <16 x half> %vec16, i64 2 + %elt3 = extractelement <16 x half> %vec16, i64 3 + %elt4 = extractelement <16 x half> %vec16, i64 4 + %elt5 = extractelement <16 x half> %vec16, i64 5 + %elt6 = extractelement <16 x half> %vec16, i64 6 + %elt7 = extractelement <16 x half> %vec16, i64 7 + %elt8 = extractelement <16 x half> %vec16, i64 8 + %elt9 = extractelement <16 x half> %vec16, i64 9 + %elt10 = extractelement <16 x half> %vec16, i64 10 + %elt11 = extractelement <16 x half> %vec16, i64 11 + %elt12 = extractelement <16 x half> %vec16, i64 12 + %elt13 = extractelement <16 x half> %vec16, i64 13 + %elt14 = extractelement <16 x half> %vec16, i64 14 + %elt15 = extractelement <16 x half> %vec16, i64 15 + %add1 = fadd fast half %elt1, %elt0 + %add2 = fadd fast half %elt2, %add1 + %add3 = fadd fast half %elt3, %add2 + %add4 = fadd fast half %elt4, %add3 + %add5 = fadd fast half %elt5, %add4 + %add6 = fadd fast half %elt6, %add5 + %add7 = fadd fast half %elt7, %add6 + %add8 = fadd fast half %elt8, %add7 + %add9 = fadd fast half %elt9, %add8 + %add10 = fadd fast half %elt10, %add9 + %add11 = fadd fast half %elt11, %add10 + %add12 = fadd fast half %elt12, %add11 + %add13 = fadd fast half %elt13, %add12 + %add14 = fadd fast half %elt14, %add13 + %add15 = fadd fast half %elt15, %add14 + ret half %add15 +} + +define half @reduce_half16(<16 x half> %vec16) { +; CHECK-LABEL: define half @reduce_half16( +; CHECK-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <16 x half> [[VEC16]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1 +; CHECK-NEXT: [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2 +; CHECK-NEXT: [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3 +; CHECK-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4 +; CHECK-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5 +; CHECK-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6 +; CHECK-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7 +; CHECK-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8 +; CHECK-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9 +; CHECK-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10 +; CHECK-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11 +; CHECK-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12 +; CHECK-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13 +; CHECK-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14 +; CHECK-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15 +; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]] +; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]] +; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]] +; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]] +; CHECK-NEXT: [[ADD8:%.*]] = fadd half [[ELT8]], [[ADD7]] +; CHECK-NEXT: [[ADD9:%.*]] = fadd half [[ELT9]], [[ADD8]] +; CHECK-NEXT: [[ADD10:%.*]] = fadd half [[ELT10]], [[ADD9]] +; CHECK-NEXT: [[ADD11:%.*]] = fadd half [[ELT11]], [[ADD10]] +; CHECK-NEXT: [[ADD12:%.*]] = fadd half [[ELT12]], [[ADD11]] +; CHECK-NEXT: [[ADD13:%.*]] = fadd half [[ELT13]], [[ADD12]] +; CHECK-NEXT: [[ADD14:%.*]] = fadd half [[ELT14]], [[ADD13]] +; CHECK-NEXT: [[ADD15:%.*]] = fadd half [[ELT15]], [[ADD14]] +; CHECK-NEXT: ret half [[ADD15]] +; +entry: + %elt0 = extractelement <16 x half> %vec16, i64 0 + %elt1 = extractelement <16 x half> %vec16, i64 1 + %elt2 = extractelement <16 x half> %vec16, i64 2 + %elt3 = extractelement <16 x half> %vec16, i64 3 + %elt4 = extractelement <16 x half> %vec16, i64 4 + %elt5 = extractelement <16 x half> %vec16, i64 5 + %elt6 = extractelement <16 x half> %vec16, i64 6 + %elt7 = extractelement <16 x half> %vec16, i64 7 + %elt8 = extractelement <16 x half> %vec16, i64 8 + %elt9 = extractelement <16 x half> %vec16, i64 9 + %elt10 = extractelement <16 x half> %vec16, i64 10 + %elt11 = extractelement <16 x half> %vec16, i64 11 + %elt12 = extractelement <16 x half> %vec16, i64 12 + %elt13 = extractelement <16 x half> %vec16, i64 13 + %elt14 = extractelement <16 x half> %vec16, i64 14 + %elt15 = extractelement <16 x half> %vec16, i64 15 + %add1 = fadd half %elt1, %elt0 + %add2 = fadd half %elt2, %add1 + %add3 = fadd half %elt3, %add2 + %add4 = fadd half %elt4, %add3 + %add5 = fadd half %elt5, %add4 + %add6 = fadd half %elt6, %add5 + %add7 = fadd half %elt7, %add6 + %add8 = fadd half %elt8, %add7 + %add9 = fadd half %elt9, %add8 + %add10 = fadd half %elt10, %add9 + %add11 = fadd half %elt11, %add10 + %add12 = fadd half %elt12, %add11 + %add13 = fadd half %elt13, %add12 + %add14 = fadd half %elt14, %add13 + %add15 = fadd half %elt15, %add14 + ret half %add15 +} + +define float @reduce_fast_float2(<2 x float> %vec2) { +; CHECK-LABEL: define float @reduce_fast_float2( +; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1 +; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]] +; CHECK-NEXT: ret float [[ADD1]] +; +entry: + %elt0 = extractelement <2 x float> %vec2, i64 0 + %elt1 = extractelement <2 x float> %vec2, i64 1 + %add1 = fadd fast float %elt1, %elt0 + ret float %add1 +} + +define float @reduce_float2(<2 x float> %vec2) { +; CHECK-LABEL: define float @reduce_float2( +; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1 +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] +; CHECK-NEXT: ret float [[ADD1]] +; +entry: + %elt0 = extractelement <2 x float> %vec2, i64 0 + %elt1 = extractelement <2 x float> %vec2, i64 1 + %add1 = fadd float %elt1, %elt0 + ret float %add1 +} + +define float @reduce_fast_float4(<4 x float> %vec4) { +; CHECK-LABEL: define float @reduce_fast_float4( +; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC4]]) +; CHECK-NEXT: ret float [[TMP0]] +; +entry: + %elt0 = extractelement <4 x float> %vec4, i64 0 + %elt1 = extractelement <4 x float> %vec4, i64 1 + %elt2 = extractelement <4 x float> %vec4, i64 2 + %elt3 = extractelement <4 x float> %vec4, i64 3 + %add1 = fadd fast float %elt1, %elt0 + %add2 = fadd fast float %elt2, %add1 + %add3 = fadd fast float %elt3, %add2 + ret float %add3 +} + +define float @reduce_float4(<4 x float> %vec4) { +; CHECK-LABEL: define float @reduce_float4( +; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[VEC4]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[VEC4]], i64 1 +; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x float> [[VEC4]], i64 2 +; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x float> [[VEC4]], i64 3 +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]] +; CHECK-NEXT: ret float [[ADD3]] +; +entry: + %elt0 = extractelement <4 x float> %vec4, i64 0 + %elt1 = extractelement <4 x float> %vec4, i64 1 + %elt2 = extractelement <4 x float> %vec4, i64 2 + %elt3 = extractelement <4 x float> %vec4, i64 3 + %add1 = fadd float %elt1, %elt0 + %add2 = fadd float %elt2, %add1 + %add3 = fadd float %elt3, %add2 + ret float %add3 +} + +define float @reduce_fast_float8(<8 x float> %vec8) { +; CHECK-LABEL: define float @reduce_fast_float8( +; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[VEC8]]) +; CHECK-NEXT: ret float [[TMP0]] +; +entry: + %elt0 = extractelement <8 x float> %vec8, i64 0 + %elt1 = extractelement <8 x float> %vec8, i64 1 + %elt2 = extractelement <8 x float> %vec8, i64 2 + %elt3 = extractelement <8 x float> %vec8, i64 3 + %elt4 = extractelement <8 x float> %vec8, i64 4 + %elt5 = extractelement <8 x float> %vec8, i64 5 + %elt6 = extractelement <8 x float> %vec8, i64 6 + %elt7 = extractelement <8 x float> %vec8, i64 7 + %add1 = fadd fast float %elt1, %elt0 + %add2 = fadd fast float %elt2, %add1 + %add3 = fadd fast float %elt3, %add2 + %add4 = fadd fast float %elt4, %add3 + %add5 = fadd fast float %elt5, %add4 + %add6 = fadd fast float %elt6, %add5 + %add7 = fadd fast float %elt7, %add6 + ret float %add7 +} + +define float @reduce_float8(<8 x float> %vec8) { +; CHECK-LABEL: define float @reduce_float8( +; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x float> [[VEC8]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x float> [[VEC8]], i64 1 +; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x float> [[VEC8]], i64 2 +; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x float> [[VEC8]], i64 3 +; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x float> [[VEC8]], i64 4 +; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x float> [[VEC8]], i64 5 +; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x float> [[VEC8]], i64 6 +; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x float> [[VEC8]], i64 7 +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ELT4]], [[ADD3]] +; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ELT5]], [[ADD4]] +; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ELT6]], [[ADD5]] +; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ELT7]], [[ADD6]] +; CHECK-NEXT: ret float [[ADD7]] +; +entry: + %elt0 = extractelement <8 x float> %vec8, i64 0 + %elt1 = extractelement <8 x float> %vec8, i64 1 + %elt2 = extractelement <8 x float> %vec8, i64 2 + %elt3 = extractelement <8 x float> %vec8, i64 3 + %elt4 = extractelement <8 x float> %vec8, i64 4 + %elt5 = extractelement <8 x float> %vec8, i64 5 + %elt6 = extractelement <8 x float> %vec8, i64 6 + %elt7 = extractelement <8 x float> %vec8, i64 7 + %add1 = fadd float %elt1, %elt0 + %add2 = fadd float %elt2, %add1 + %add3 = fadd float %elt3, %add2 + %add4 = fadd float %elt4, %add3 + %add5 = fadd float %elt5, %add4 + %add6 = fadd float %elt6, %add5 + %add7 = fadd float %elt7, %add6 + ret float %add7 +} + +define double @reduce_fast_double2(<2 x double> %vec2) { +; CHECK-LABEL: define double @reduce_fast_double2( +; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1 +; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]] +; CHECK-NEXT: ret double [[ADD1]] +; +entry: + %elt0 = extractelement <2 x double> %vec2, i64 0 + %elt1 = extractelement <2 x double> %vec2, i64 1 + %add1 = fadd fast double %elt1, %elt0 + ret double %add1 +} + +define double @reduce_double2(<2 x double> %vec2) { +; CHECK-LABEL: define double @reduce_double2( +; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1 +; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]] +; CHECK-NEXT: ret double [[ADD1]] +; +entry: + %elt0 = extractelement <2 x double> %vec2, i64 0 + %elt1 = extractelement <2 x double> %vec2, i64 1 + %add1 = fadd double %elt1, %elt0 + ret double %add1 +} + +define double @reduce_fast_double4(<4 x double> %vec4) { +; CHECK-LABEL: define double @reduce_fast_double4( +; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VEC4]]) +; CHECK-NEXT: ret double [[TMP0]] +; +entry: + %elt0 = extractelement <4 x double> %vec4, i64 0 + %elt1 = extractelement <4 x double> %vec4, i64 1 + %elt2 = extractelement <4 x double> %vec4, i64 2 + %elt3 = extractelement <4 x double> %vec4, i64 3 + %add1 = fadd fast double %elt1, %elt0 + %add2 = fadd fast double %elt2, %add1 + %add3 = fadd fast double %elt3, %add2 + ret double %add3 +} + +define double @reduce_double4(<4 x double> %vec4) { +; CHECK-LABEL: define double @reduce_double4( +; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x double> [[VEC4]], i64 0 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x double> [[VEC4]], i64 1 +; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x double> [[VEC4]], i64 2 +; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x double> [[VEC4]], i64 3 +; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[ELT2]], [[ADD1]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ELT3]], [[ADD2]] +; CHECK-NEXT: ret double [[ADD3]] +; +entry: + %elt0 = extractelement <4 x double> %vec4, i64 0 + %elt1 = extractelement <4 x double> %vec4, i64 1 + %elt2 = extractelement <4 x double> %vec4, i64 2 + %elt3 = extractelement <4 x double> %vec4, i64 3 + %add1 = fadd double %elt1, %elt0 + %add2 = fadd double %elt2, %add1 + %add3 = fadd double %elt3, %add2 + ret double %add3 +} + +; Fixed iteration count. sum += a[i] +define float @reduce_fast_float_case1(ptr %a) { +; CHECK-LABEL: define float @reduce_fast_float_case1( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[LOAD1]], [[LOAD]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[LOAD2]], [[ADD1]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12 +; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = fadd fast float [[LOAD3]], [[ADD2]] +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 +; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[LOAD4]], [[ADD3]] +; CHECK-NEXT: ret float [[ADD4]] +; +entry: + %load = load float, ptr %a + %gep = getelementptr inbounds i8, ptr %a, i64 4 + %load1 = load float, ptr %gep + %add1 = fadd fast float %load1, %load + %gep2 = getelementptr inbounds i8, ptr %a, i64 8 + %load2 = load float, ptr %gep2 + %add2 = fadd fast float %load2, %add1 + %gep3 = getelementptr inbounds i8, ptr %a, i64 12 + %load3 = load float, ptr %gep3 + %add3 = fadd fast float %load3, %add2 + %gep4 = getelementptr inbounds i8, ptr %a, i64 16 + %load4 = load float, ptr %gep4 + %add4 = fadd fast float %load4, %add3 + ret float %add4 +} + +; Fixed iteration count. sum += a[i] +define float @reduce_float_case1(ptr %a) { +; CHECK-LABEL: define float @reduce_float_case1( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOAD1]], [[LOAD]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[LOAD2]], [[ADD1]] +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12 +; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOAD3]], [[ADD2]] +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16 +; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 +; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[LOAD4]], [[ADD3]] +; CHECK-NEXT: ret float [[ADD4]] +; +entry: + %load = load float, ptr %a + %gep = getelementptr inbounds i8, ptr %a, i64 4 + %load1 = load float, ptr %gep + %add1 = fadd float %load1, %load + %gep2 = getelementptr inbounds i8, ptr %a, i64 8 + %load2 = load float, ptr %gep2 + %add2 = fadd float %load2, %add1 + %gep3 = getelementptr inbounds i8, ptr %a, i64 12 + %load3 = load float, ptr %gep3 + %add3 = fadd float %load3, %add2 + %gep4 = getelementptr inbounds i8, ptr %a, i64 16 + %load4 = load float, ptr %gep4 + %add4 = fadd float %load4, %add3 + ret float %add4 +} + +; Reduction needs a shuffle. See add2 and add3. +define float @reduce_fast_float_case2(ptr %a, ptr %b) { +; CHECK-LABEL: define float @reduce_fast_float_case2( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEPA2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 +; CHECK-NEXT: [[GEPA3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 +; CHECK-NEXT: [[GEPB2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2 +; CHECK-NEXT: [[GEPB3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3 +; CHECK-NEXT: [[LOADA2:%.*]] = load float, ptr [[GEPA2]], align 4 +; CHECK-NEXT: [[LOADA3:%.*]] = load float, ptr [[GEPA3]], align 4 +; CHECK-NEXT: [[LOADB2:%.*]] = load float, ptr [[GEPB2]], align 4 +; CHECK-NEXT: [[LOADB3:%.*]] = load float, ptr [[GEPB3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[B]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[LOADA3]], [[LOADB2]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd fast float [[LOADA2]], [[LOADB3]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[RED1:%.*]] = fadd fast float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RED2:%.*]] = fadd fast float [[ADD2]], [[RED1]] +; CHECK-NEXT: [[RED3:%.*]] = fadd fast float [[ADD3]], [[RED2]] +; CHECK-NEXT: ret float [[RED3]] +; +entry: + %gepa1 = getelementptr inbounds float, ptr %a, i32 1 + %gepa2 = getelementptr inbounds float, ptr %a, i32 2 + %gepa3 = getelementptr inbounds float, ptr %a, i32 3 + %gepb1 = getelementptr inbounds float, ptr %b, i32 1 + %gepb2 = getelementptr inbounds float, ptr %b, i32 2 + %gepb3 = getelementptr inbounds float, ptr %b, i32 3 + %loada = load float, ptr %a + %loada1 = load float, ptr %gepa1 + %loada2 = load float, ptr %gepa2 + %loada3 = load float, ptr %gepa3 + %loadb = load float, ptr %b + %loadb1 = load float, ptr %gepb1 + %loadb2 = load float, ptr %gepb2 + %loadb3 = load float, ptr %gepb3 + %add = fadd fast float %loada, %loadb + %add1 = fadd fast float %loada1, %loadb1 + %add2 = fadd fast float %loada3, %loadb2 + %add3 = fadd fast float %loada2, %loadb3 + %red1 = fadd fast float %add, %add1 + %red2 = fadd fast float %add2, %red1 + %red3 = fadd fast float %add3, %red2 + ret float %red3 +} + +; Reduction needs a shuffle. See add2 and add3. +define float @reduce_float_case2(ptr %a, ptr %b) { +; CHECK-LABEL: define float @reduce_float_case2( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEPA2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 +; CHECK-NEXT: [[GEPA3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 +; CHECK-NEXT: [[GEPB2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2 +; CHECK-NEXT: [[GEPB3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3 +; CHECK-NEXT: [[LOADA2:%.*]] = load float, ptr [[GEPA2]], align 4 +; CHECK-NEXT: [[LOADA3:%.*]] = load float, ptr [[GEPA3]], align 4 +; CHECK-NEXT: [[LOADB2:%.*]] = load float, ptr [[GEPB2]], align 4 +; CHECK-NEXT: [[LOADB3:%.*]] = load float, ptr [[GEPB3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[B]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[LOADA3]], [[LOADB2]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOADA2]], [[LOADB3]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 +; CHECK-NEXT: [[RED1:%.*]] = fadd float [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RED2:%.*]] = fadd float [[ADD2]], [[RED1]] +; CHECK-NEXT: [[RED3:%.*]] = fadd float [[ADD3]], [[RED2]] +; CHECK-NEXT: ret float [[RED3]] +; +entry: + %gepa1 = getelementptr inbounds float, ptr %a, i32 1 + %gepa2 = getelementptr inbounds float, ptr %a, i32 2 + %gepa3 = getelementptr inbounds float, ptr %a, i32 3 + %gepb1 = getelementptr inbounds float, ptr %b, i32 1 + %gepb2 = getelementptr inbounds float, ptr %b, i32 2 + %gepb3 = getelementptr inbounds float, ptr %b, i32 3 + %loada = load float, ptr %a + %loada1 = load float, ptr %gepa1 + %loada2 = load float, ptr %gepa2 + %loada3 = load float, ptr %gepa3 + %loadb = load float, ptr %b + %loadb1 = load float, ptr %gepb1 + %loadb2 = load float, ptr %gepb2 + %loadb3 = load float, ptr %gepb3 + %add = fadd float %loada, %loadb + %add1 = fadd float %loada1, %loadb1 + %add2 = fadd float %loada3, %loadb2 + %add3 = fadd float %loada2, %loadb3 + %red1 = fadd float %add, %add1 + %red2 = fadd float %add2, %red1 + %red3 = fadd float %add3, %red2 + ret float %red3 +} + +; Addition of log. +define float @reduce_fast_float_case3(ptr %a) { +; CHECK-LABEL: define float @reduce_fast_float_case3( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5 +; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7 +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 +; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 +; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 +; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4 +; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4 +; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4 +; CHECK-NEXT: [[LOG:%.*]] = call fast float @llvm.log.f32(float [[LOAD]]) +; CHECK-NEXT: [[LOG1:%.*]] = call fast float @llvm.log.f32(float [[LOAD1]]) +; CHECK-NEXT: [[LOG2:%.*]] = call fast float @llvm.log.f32(float [[LOAD2]]) +; CHECK-NEXT: [[LOG3:%.*]] = call fast float @llvm.log.f32(float [[LOAD3]]) +; CHECK-NEXT: [[LOG4:%.*]] = call fast float @llvm.log.f32(float [[LOAD4]]) +; CHECK-NEXT: [[LOG5:%.*]] = call fast float @llvm.log.f32(float [[LOAD5]]) +; CHECK-NEXT: [[LOG6:%.*]] = call fast float @llvm.log.f32(float [[LOAD6]]) +; CHECK-NEXT: [[LOG7:%.*]] = call fast float @llvm.log.f32(float [[LOAD7]]) +; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[LOG]], [[LOG1]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[ADD1]], [[LOG2]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd fast float [[ADD2]], [[LOG3]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[ADD3]], [[LOG4]] +; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[LOG5]] +; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float [[ADD5]], [[LOG6]] +; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD6]], [[LOG7]] +; CHECK-NEXT: ret float [[ADD7]] +; +entry: + %gep1 = getelementptr inbounds float, ptr %a, i32 1 + %gep2 = getelementptr inbounds float, ptr %a, i32 2 + %gep3 = getelementptr inbounds float, ptr %a, i32 3 + %gep4 = getelementptr inbounds float, ptr %a, i32 4 + %gep5 = getelementptr inbounds float, ptr %a, i32 5 + %gep6 = getelementptr inbounds float, ptr %a, i32 6 + %gep7 = getelementptr inbounds float, ptr %a, i32 7 + %load = load float, ptr %a + %load1 = load float, ptr %gep1 + %load2 = load float, ptr %gep2 + %load3 = load float, ptr %gep3 + %load4 = load float, ptr %gep4 + %load5 = load float, ptr %gep5 + %load6 = load float, ptr %gep6 + %load7 = load float, ptr %gep7 + %log = call fast float @llvm.log.f32(float %load) + %log1 = call fast float @llvm.log.f32(float %load1) + %log2 = call fast float @llvm.log.f32(float %load2) + %log3 = call fast float @llvm.log.f32(float %load3) + %log4 = call fast float @llvm.log.f32(float %load4) + %log5 = call fast float @llvm.log.f32(float %load5) + %log6 = call fast float @llvm.log.f32(float %load6) + %log7 = call fast float @llvm.log.f32(float %load7) + %add1 = fadd fast float %log, %log1 + %add2 = fadd fast float %add1, %log2 + %add3 = fadd fast float %add2, %log3 + %add4 = fadd fast float %add3, %log4 + %add5 = fadd fast float %add4, %log5 + %add6 = fadd fast float %add5, %log6 + %add7 = fadd fast float %add6, %log7 + ret float %add7 +} + +; Addition of log. +define float @reduce_float_case3(ptr %a) { +; CHECK-LABEL: define float @reduce_float_case3( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5 +; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7 +; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 +; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 +; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 +; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4 +; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4 +; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4 +; CHECK-NEXT: [[LOG:%.*]] = call float @llvm.log.f32(float [[LOAD]]) +; CHECK-NEXT: [[LOG1:%.*]] = call float @llvm.log.f32(float [[LOAD1]]) +; CHECK-NEXT: [[LOG2:%.*]] = call float @llvm.log.f32(float [[LOAD2]]) +; CHECK-NEXT: [[LOG3:%.*]] = call float @llvm.log.f32(float [[LOAD3]]) +; CHECK-NEXT: [[LOG4:%.*]] = call float @llvm.log.f32(float [[LOAD4]]) +; CHECK-NEXT: [[LOG5:%.*]] = call float @llvm.log.f32(float [[LOAD5]]) +; CHECK-NEXT: [[LOG6:%.*]] = call float @llvm.log.f32(float [[LOAD6]]) +; CHECK-NEXT: [[LOG7:%.*]] = call float @llvm.log.f32(float [[LOAD7]]) +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOG]], [[LOG1]] +; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ADD1]], [[LOG2]] +; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ADD2]], [[LOG3]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ADD3]], [[LOG4]] +; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ADD4]], [[LOG5]] +; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]] +; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]] +; CHECK-NEXT: ret float [[ADD7]] +; +entry: + %gep1 = getelementptr inbounds float, ptr %a, i32 1 + %gep2 = getelementptr inbounds float, ptr %a, i32 2 + %gep3 = getelementptr inbounds float, ptr %a, i32 3 + %gep4 = getelementptr inbounds float, ptr %a, i32 4 + %gep5 = getelementptr inbounds float, ptr %a, i32 5 + %gep6 = getelementptr inbounds float, ptr %a, i32 6 + %gep7 = getelementptr inbounds float, ptr %a, i32 7 + %load = load float, ptr %a + %load1 = load float, ptr %gep1 + %load2 = load float, ptr %gep2 + %load3 = load float, ptr %gep3 + %load4 = load float, ptr %gep4 + %load5 = load float, ptr %gep5 + %load6 = load float, ptr %gep6 + %load7 = load float, ptr %gep7 + %log = call float @llvm.log.f32(float %load) + %log1 = call float @llvm.log.f32(float %load1) + %log2 = call float @llvm.log.f32(float %load2) + %log3 = call float @llvm.log.f32(float %load3) + %log4 = call float @llvm.log.f32(float %load4) + %log5 = call float @llvm.log.f32(float %load5) + %log6 = call float @llvm.log.f32(float %load6) + %log7 = call float @llvm.log.f32(float %load7) + %add1 = fadd float %log, %log1 + %add2 = fadd float %add1, %log2 + %add3 = fadd float %add2, %log3 + %add4 = fadd float %add3, %log4 + %add5 = fadd float %add4, %log5 + %add6 = fadd float %add5, %log6 + %add7 = fadd float %add6, %log7 + ret float %add7 +} + +define half @reduce_unordered_fast_half4(<4 x half> %vec4) { +; CHECK-LABEL: define half @reduce_unordered_fast_half4( +; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]]) +; CHECK-NEXT: ret half [[TMP0]] +; +entry: + %elt0 = extractelement <4 x half> %vec4, i64 0 + %elt1 = extractelement <4 x half> %vec4, i64 1 + %elt2 = extractelement <4 x half> %vec4, i64 2 + %elt3 = extractelement <4 x half> %vec4, i64 3 + %add1 = fadd fast half %elt1, %elt0 + %add2 = fadd fast half %elt2, %elt3 + %add3 = fadd fast half %add1, %add2 + ret half %add3 +} + +define half @reduce_unordered_half4(<4 x half> %vec4) { +; CHECK-LABEL: define half @reduce_unordered_half4( +; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 1, i32 2> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 0, i32 3> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x half> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x half> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x half> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret half [[ADD3]] +; +entry: + %elt0 = extractelement <4 x half> %vec4, i64 0 + %elt1 = extractelement <4 x half> %vec4, i64 1 + %elt2 = extractelement <4 x half> %vec4, i64 2 + %elt3 = extractelement <4 x half> %vec4, i64 3 + %add1 = fadd half %elt1, %elt0 + %add2 = fadd half %elt2, %elt3 + %add3 = fadd half %add1, %add2 + ret half %add3 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll new file mode 100644 index 0000000..e6a166c2 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=cascadelake < %s | FileCheck %s + +define i32 @foo() { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[D:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7> +; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4 +; CHECK-NEXT: ret i32 0 +; +entry: + %d = load i32, ptr null, align 4 + %0 = extractelement <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i32 0 + %1 = extractelement <8 x i32> zeroinitializer, i32 0 + %2 = or i32 0, %d + %3 = or i32 0, %d + %4 = or i32 0, %d + %5 = add i32 0, 0 + %6 = or i32 0, %0 + %7 = or i32 0, %d + %8 = or i32 0, %d + %9 = or i32 0, %1 + store i32 %2, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 22), align 8 + store i32 %3, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 21), align 4 + store i32 %4, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 20), align 16 + store i32 %5, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 19), align 4 + store i32 %6, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 18), align 8 + store i32 %7, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 17), align 4 + store i32 %8, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 16), align 16 + store i32 %9, ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4 + ret i32 0 +} diff --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s index 333956d..3987671 100644 --- a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s +++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s @@ -4,7 +4,7 @@ # RUN: -split-dwarf-file=%t.dwo -dwarf-version=4 # RUN: llvm-dwp %t.dwo -o %t.dwp # RUN: llvm-dwarfdump -debug-info -debug-types -debug-cu-index -debug-tu-index %t.dwp | FileCheck %s -# RUN: llvm-dwarfdump -debug-info -debug-types -debug-cu-index -debug-tu-index -manaully-generate-unit-index %t.dwp | FileCheck %s +# RUN: llvm-dwarfdump -debug-info -debug-types -debug-cu-index -debug-tu-index -manually-generate-unit-index %t.dwp | FileCheck %s ## Note: In order to check whether the type unit index is generated ## there is no need to add the missing DIEs for the structure type of the type unit. diff --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s index fde41f3..4dee886 100644 --- a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s +++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5.s @@ -4,7 +4,7 @@ # RUN: -split-dwarf-file=%t.dwo -dwarf-version=5 # RUN: llvm-dwp %t.dwo -o %t.dwp # RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index %t.dwp | FileCheck %s -# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index -manaully-generate-unit-index %t.dwp | FileCheck %s +# RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index -manually-generate-unit-index %t.dwp | FileCheck %s ## Note: In order to check whether the type unit index is generated ## there is no need to add the missing DIEs for the structure type of the type unit. diff --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s index 1f63b21..b131774 100644 --- a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s +++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s @@ -5,7 +5,7 @@ # RUN: -split-dwarf-file=%t.dwo -dwarf-version=5 # RUN: llvm-dwp %t.dwo -o %t.dwp # RUN: llvm-dwarfdump -debug-info -debug-cu-index -debug-tu-index \ -# RUN: -manaully-generate-unit-index %t.dwp 2>&1 | FileCheck %s +# RUN: -manually-generate-unit-index %t.dwp 2>&1 | FileCheck %s ## Note: In order to check whether the type unit index is generated ## there is no need to add the missing DIEs for the structure type of the type unit. diff --git a/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s b/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s index e554294..7ed77a1 100644 --- a/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s +++ b/llvm/test/tools/llvm-dwp/X86/debug_macro_v5.s @@ -3,7 +3,7 @@ # RUN: llvm-mc -triple x86_64-unknown-linux --filetype=obj --split-dwarf-file=%t.dwo -dwarf-version=5 %s -o %t.o # RUN: llvm-dwp %t.dwo -o %t.dwp 2>&1 # RUN: llvm-dwarfdump -debug-macro -debug-cu-index %t.dwp | FileCheck -check-prefix=CHECK %s -# RUN: llvm-dwarfdump -debug-macro -debug-cu-index -manaully-generate-unit-index %t.dwp | FileCheck -check-prefix=CHECK2 %s +# RUN: llvm-dwarfdump -debug-macro -debug-cu-index -manually-generate-unit-index %t.dwp | FileCheck -check-prefix=CHECK2 %s # CHECK-DAG: .debug_macro.dwo contents: # CHECK: macro header: version = 0x0005, flags = 0x00, format = DWARF32 diff --git a/llvm/test/tools/llvm-dwp/X86/type_dedup.test b/llvm/test/tools/llvm-dwp/X86/type_dedup.test index 78e50fe..9d89c62 100644 --- a/llvm/test/tools/llvm-dwp/X86/type_dedup.test +++ b/llvm/test/tools/llvm-dwp/X86/type_dedup.test @@ -1,10 +1,10 @@ RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %p/../Inputs/type_dedup/b.dwo -o %t RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix=CHECK %s -RUN: llvm-dwarfdump -v -manaully-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s +RUN: llvm-dwarfdump -v -manually-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s RUN: llvm-dwp %p/../Inputs/type_dedup/b.dwo -o %tb.dwp RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %tb.dwp -o %t RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix=CHECK %s -RUN: llvm-dwarfdump -v -manaully-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s +RUN: llvm-dwarfdump -v -manually-generate-unit-index %t | FileCheck -check-prefix=CHECK2 %s a.cpp: struct common { }; diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s index cc3c286..b6ebd93 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s @@ -161,13 +161,13 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 139 -# CHECK-NEXT: Total Cycles: 42 +# CHECK-NEXT: Total Cycles: 40 # CHECK-NEXT: Total uOps: 139 # CHECK: Dispatch Width: 6 -# CHECK-NEXT: uOps Per Cycle: 3.31 -# CHECK-NEXT: IPC: 3.31 -# CHECK-NEXT: Block RThroughput: 25.8 +# CHECK-NEXT: uOps Per Cycle: 3.48 +# CHECK-NEXT: IPC: 3.48 +# CHECK-NEXT: Block RThroughput: 24.8 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -301,7 +301,7 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK-NEXT: 1 1 0.25 vpxorq %xmm19, %xmm19, %xmm19 # CHECK-NEXT: 1 1 0.25 vpxord %ymm19, %ymm19, %ymm19 # CHECK-NEXT: 1 1 0.25 vpxorq %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: 1 1 0.50 vpxord %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 0 0.17 vpxord %zmm19, %zmm19, %zmm19 # CHECK-NEXT: 1 1 0.50 vpxorq %zmm19, %zmm19, %zmm19 # CHECK-NEXT: 1 0 0.17 vxorps %xmm4, %xmm4, %xmm5 # CHECK-NEXT: 1 0 0.17 vxorpd %xmm1, %xmm1, %xmm3 @@ -315,17 +315,17 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK-NEXT: 1 1 0.25 vpxorq %xmm19, %xmm19, %xmm21 # CHECK-NEXT: 1 1 0.25 vpxord %ymm19, %ymm19, %ymm21 # CHECK-NEXT: 1 1 0.25 vpxorq %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: 1 1 0.50 vpxord %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 0 0.17 vpxord %zmm19, %zmm19, %zmm21 # CHECK-NEXT: 1 1 0.50 vpxorq %zmm19, %zmm19, %zmm21 # CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 65 -# CHECK-NEXT: Max number of mappings used: 45 +# CHECK-NEXT: Total number of mappings created: 63 +# CHECK-NEXT: Max number of mappings used: 43 # CHECK: * Register File #1 -- Zn4FpPRF: # CHECK-NEXT: Number of physical registers: 192 -# CHECK-NEXT: Total number of mappings created: 65 -# CHECK-NEXT: Max number of mappings used: 45 +# CHECK-NEXT: Total number of mappings created: 63 +# CHECK-NEXT: Max number of mappings used: 43 # CHECK: * Register File #2 -- Zn4IntegerPRF: # CHECK-NEXT: Number of physical registers: 224 @@ -359,7 +359,7 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: - - - - - - - - 25.00 25.00 27.00 26.00 - - - - - - - - - - - +# CHECK-NEXT: - - - - - - - - 24.00 25.00 25.00 25.00 - - - - - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -453,9 +453,9 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpandnd %xmm19, %xmm19, %xmm19 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnq %xmm19, %xmm19, %xmm19 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnd %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpandnq %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpandnd %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpandnq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vpandnd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpandnq %zmm19, %zmm19, %zmm19 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm5 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vandnpd %xmm1, %xmm1, %xmm5 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpandn %xmm3, %xmm3, %xmm5 @@ -478,174 +478,174 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm1 # CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vxorps %zmm2, %zmm2, %zmm2 # CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm1 -# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - pxor %mm2, %mm2 -# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - pxor %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - pxor %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - pxor %xmm2, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm3 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm3 # CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxord %xmm19, %xmm19, %xmm19 # CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm19 # CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm19 # CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm19 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorps %xmm4, %xmm4, %xmm5 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorps %ymm4, %ymm4, %ymm5 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm3 -# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vxorps %zmm4, %zmm4, %zmm5 -# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm3 +# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vxorps %zmm4, %zmm4, %zmm5 +# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm3 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm5 # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm5 # CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxord %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm21 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 01 - -# CHECK: [0,0] DR . . . . . . . .. subl %eax, %eax -# CHECK-NEXT: [0,1] DR . . . . . . . .. subq %rax, %rax -# CHECK-NEXT: [0,2] DR . . . . . . . .. xorl %eax, %eax -# CHECK-NEXT: [0,3] DR . . . . . . . .. xorq %rax, %rax -# CHECK-NEXT: [0,4] DeER . . . . . . . .. pcmpgtb %mm2, %mm2 -# CHECK-NEXT: [0,5] D=eER. . . . . . . .. pcmpgtd %mm2, %mm2 -# CHECK-NEXT: [0,6] .D=eER . . . . . . .. pcmpgtw %mm2, %mm2 -# CHECK-NEXT: [0,7] .DeE-R . . . . . . .. pcmpgtb %xmm2, %xmm2 -# CHECK-NEXT: [0,8] .DeE-R . . . . . . .. pcmpgtd %xmm2, %xmm2 -# CHECK-NEXT: [0,9] .DeE-R . . . . . . .. pcmpgtq %xmm2, %xmm2 -# CHECK-NEXT: [0,10] .D=eER . . . . . . .. pcmpgtw %xmm2, %xmm2 -# CHECK-NEXT: [0,11] .D---R . . . . . . .. vpcmpgtb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,12] . D--R . . . . . . .. vpcmpgtd %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,13] . D--R . . . . . . .. vpcmpgtq %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,14] . D--R . . . . . . .. vpcmpgtw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,15] . D---R . . . . . . .. vpcmpgtb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,16] . D---R . . . . . . .. vpcmpgtd %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,17] . D---R . . . . . . .. vpcmpgtq %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,18] . D--R . . . . . . .. vpcmpgtw %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,19] . D--R . . . . . . .. vpcmpgtb %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,20] . D--R . . . . . . .. vpcmpgtd %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,21] . D--R . . . . . . .. vpcmpgtq %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,22] . D--R . . . . . . .. vpcmpgtw %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,23] . D--R . . . . . . .. vpcmpgtb %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,24] . D--R . . . . . . .. vpcmpgtd %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,25] . D--R . . . . . . .. vpcmpgtq %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,26] . D--R . . . . . . .. vpcmpgtw %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,27] . DeER . . . . . . .. psubb %mm2, %mm2 -# CHECK-NEXT: [0,28] . D=eER . . . . . . .. psubd %mm2, %mm2 -# CHECK-NEXT: [0,29] . D==eER. . . . . . .. psubq %mm2, %mm2 -# CHECK-NEXT: [0,30] . D==eER . . . . . .. psubw %mm2, %mm2 -# CHECK-NEXT: [0,31] . DeE--R . . . . . .. psubb %xmm2, %xmm2 -# CHECK-NEXT: [0,32] . DeE--R . . . . . .. psubd %xmm2, %xmm2 -# CHECK-NEXT: [0,33] . DeE--R . . . . . .. psubq %xmm2, %xmm2 -# CHECK-NEXT: [0,34] . D=eE-R . . . . . .. psubw %xmm2, %xmm2 -# CHECK-NEXT: [0,35] . D----R . . . . . .. vpsubb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,36] . .D---R . . . . . .. vpsubd %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,37] . .D---R . . . . . .. vpsubq %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,38] . .D---R . . . . . .. vpsubw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,39] . .D----R . . . . . .. vpsubb %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,40] . .D----R . . . . . .. vpsubd %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,41] . .D----R . . . . . .. vpsubq %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,42] . . D---R . . . . . .. vpsubw %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,43] . . D---R . . . . . .. vpsubb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,44] . . D---R . . . . . .. vpsubd %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,45] . . D---R . . . . . .. vpsubq %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,46] . . D---R . . . . . .. vpsubw %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,47] . . D---R . . . . . .. vpsubb %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,48] . . D---R . . . . . .. vpsubd %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,49] . . D---R . . . . . .. vpsubq %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,50] . . D---R . . . . . .. vpsubw %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,51] . . DeE-R . . . . . .. vpsubb %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: [0,52] . . D=eER . . . . . .. vpsubd %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: [0,53] . . D==eER . . . . . .. vpsubq %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: [0,54] . . D==eER. . . . . .. vpsubw %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: [0,55] . . D===eER . . . . .. vpsubb %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: [0,56] . . D====eER . . . . .. vpsubd %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: [0,57] . . D=====eER . . . . .. vpsubq %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: [0,58] . . D======eER . . . . .. vpsubw %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: [0,59] . . D=======eER. . . . .. vpsubb %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: [0,60] . . D=======eER . . . .. vpsubd %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: [0,61] . . D========eER . . . .. vpsubq %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: [0,62] . . D=========eER . . . .. vpsubw %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: [0,63] . . D==========eER . . . .. vpsubb %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: [0,64] . . D===========eER. . . .. vpsubd %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: [0,65] . . D===========eER. . . .. vpsubq %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: [0,66] . . .D==========eER. . . .. vpsubw %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: [0,67] . . .D==========eER. . . .. vpsubb %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: [0,68] . . .D===========eER . . .. vpsubd %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: [0,69] . . .D===========eER . . .. vpsubq %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: [0,70] . . .D===========eER . . .. vpsubw %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: [0,71] . . .D===========eER . . .. vpsubb %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: [0,72] . . . D===========eER . . .. vpsubd %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: [0,73] . . . D===========eER . . .. vpsubq %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: [0,74] . . . D===========eER . . .. vpsubw %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: [0,75] . . . DeE-----------R . . .. andnps %xmm0, %xmm0 -# CHECK-NEXT: [0,76] . . . DeE-----------R . . .. andnpd %xmm1, %xmm1 -# CHECK-NEXT: [0,77] . . . D-------------R . . .. vandnps %xmm2, %xmm2, %xmm2 -# CHECK-NEXT: [0,78] . . . D------------R . . .. vandnpd %xmm1, %xmm1, %xmm1 -# CHECK-NEXT: [0,79] . . . D------------R . . .. vandnps %ymm2, %ymm2, %ymm2 -# CHECK-NEXT: [0,80] . . . D------------R . . .. vandnpd %ymm1, %ymm1, %ymm1 -# CHECK-NEXT: [0,81] . . . DeE-----------R . . .. vandnps %zmm2, %zmm2, %zmm2 -# CHECK-NEXT: [0,82] . . . DeE-----------R . . .. vandnpd %zmm1, %zmm1, %zmm1 -# CHECK-NEXT: [0,83] . . . DeE-----------R . . .. pandn %mm2, %mm2 -# CHECK-NEXT: [0,84] . . . DeE----------R . . .. pandn %xmm2, %xmm2 -# CHECK-NEXT: [0,85] . . . D------------R . . .. vpandn %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,86] . . . D------------R . . .. vpandn %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,87] . . . D==========eER . . .. vpandnd %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: [0,88] . . . D===========eER . . .. vpandnq %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: [0,89] . . . D============eER. . .. vpandnd %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: [0,90] . . . D============eER . .. vpandnq %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: [0,91] . . . D=============eER . .. vpandnd %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: [0,92] . . . D==============eER . .. vpandnq %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: [0,93] . . . D----------------R . .. vandnps %xmm2, %xmm2, %xmm5 -# CHECK-NEXT: [0,94] . . . D----------------R . .. vandnpd %xmm1, %xmm1, %xmm5 -# CHECK-NEXT: [0,95] . . . D----------------R . .. vpandn %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,96] . . . .D---------------R . .. vandnps %ymm2, %ymm2, %ymm5 -# CHECK-NEXT: [0,97] . . . .D---------------R . .. vandnpd %ymm1, %ymm1, %ymm5 -# CHECK-NEXT: [0,98] . . . .D---------------R . .. vpandn %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,99] . . . .DeE-------------R . .. vandnps %zmm2, %zmm2, %zmm5 -# CHECK-NEXT: [0,100] . . . .DeE-------------R . .. vandnpd %zmm1, %zmm1, %zmm5 -# CHECK-NEXT: [0,101] . . . .D==============eER . .. vpandnd %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: [0,102] . . . . D=============eER . .. vpandnq %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: [0,103] . . . . D=============eER . .. vpandnd %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: [0,104] . . . . D==============eER. .. vpandnq %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: [0,105] . . . . D==============eER. .. vpandnd %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: [0,106] . . . . D==============eER. .. vpandnq %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: [0,107] . . . . D=eE-------------R. .. xorps %xmm0, %xmm0 -# CHECK-NEXT: [0,108] . . . . DeE-------------R. .. xorpd %xmm1, %xmm1 -# CHECK-NEXT: [0,109] . . . . D---------------R. .. vxorps %xmm2, %xmm2, %xmm2 -# CHECK-NEXT: [0,110] . . . . D---------------R. .. vxorpd %xmm1, %xmm1, %xmm1 -# CHECK-NEXT: [0,111] . . . . D---------------R. .. vxorps %ymm2, %ymm2, %ymm2 -# CHECK-NEXT: [0,112] . . . . D---------------R. .. vxorpd %ymm1, %ymm1, %ymm1 -# CHECK-NEXT: [0,113] . . . . D=eE-------------R .. vxorps %zmm2, %zmm2, %zmm2 -# CHECK-NEXT: [0,114] . . . . DeE-------------R .. vxorpd %zmm1, %zmm1, %zmm1 -# CHECK-NEXT: [0,115] . . . . D======eE-------R .. pxor %mm2, %mm2 -# CHECK-NEXT: [0,116] . . . . D======eE-------R .. pxor %xmm2, %xmm2 -# CHECK-NEXT: [0,117] . . . . D---------------R .. vpxor %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,118] . . . . D---------------R .. vpxor %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: [0,119] . . . . D============eE-R .. vpxord %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: [0,120] . . . . D============eER .. vpxorq %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: [0,121] . . . . D=============eER .. vpxord %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: [0,122] . . . . D==============eER .. vpxorq %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: [0,123] . . . . D===============eER .. vpxord %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: [0,124] . . . . D================eER.. vpxorq %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: [0,125] . . . . D------------------R.. vxorps %xmm4, %xmm4, %xmm5 -# CHECK-NEXT: [0,126] . . . . .D-----------------R.. vxorpd %xmm1, %xmm1, %xmm3 -# CHECK-NEXT: [0,127] . . . . .D-----------------R.. vxorps %ymm4, %ymm4, %ymm5 -# CHECK-NEXT: [0,128] . . . . .D-----------------R.. vxorpd %ymm1, %ymm1, %ymm3 -# CHECK-NEXT: [0,129] . . . . .D====eE-----------R.. vxorps %zmm4, %zmm4, %zmm5 -# CHECK-NEXT: [0,130] . . . . .D=====eE----------R.. vxorpd %zmm1, %zmm1, %zmm3 -# CHECK-NEXT: [0,131] . . . . .D-----------------R.. vpxor %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,132] . . . . . D----------------R.. vpxor %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: [0,133] . . . . . D===============eER. vpxord %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: [0,134] . . . . . D===============eER. vpxorq %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: [0,135] . . . . . D===============eER. vpxord %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: [0,136] . . . . . D================eER vpxorq %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: [0,137] . . . . . D================eER vpxord %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: [0,138] . . . . . D===============eER vpxorq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: Index 0123456789 0123456789 + +# CHECK: [0,0] DR . . . . . . . . subl %eax, %eax +# CHECK-NEXT: [0,1] DR . . . . . . . . subq %rax, %rax +# CHECK-NEXT: [0,2] DR . . . . . . . . xorl %eax, %eax +# CHECK-NEXT: [0,3] DR . . . . . . . . xorq %rax, %rax +# CHECK-NEXT: [0,4] DeER . . . . . . . . pcmpgtb %mm2, %mm2 +# CHECK-NEXT: [0,5] D=eER. . . . . . . . pcmpgtd %mm2, %mm2 +# CHECK-NEXT: [0,6] .D=eER . . . . . . . pcmpgtw %mm2, %mm2 +# CHECK-NEXT: [0,7] .DeE-R . . . . . . . pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: [0,8] .DeE-R . . . . . . . pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: [0,9] .DeE-R . . . . . . . pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: [0,10] .D=eER . . . . . . . pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: [0,11] .D---R . . . . . . . vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,12] . D--R . . . . . . . vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,13] . D--R . . . . . . . vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,14] . D--R . . . . . . . vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,15] . D---R . . . . . . . vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,16] . D---R . . . . . . . vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,17] . D---R . . . . . . . vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,18] . D--R . . . . . . . vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,19] . D--R . . . . . . . vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,20] . D--R . . . . . . . vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,21] . D--R . . . . . . . vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,22] . D--R . . . . . . . vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,23] . D--R . . . . . . . vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,24] . D--R . . . . . . . vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,25] . D--R . . . . . . . vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,26] . D--R . . . . . . . vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,27] . DeER . . . . . . . psubb %mm2, %mm2 +# CHECK-NEXT: [0,28] . D=eER . . . . . . . psubd %mm2, %mm2 +# CHECK-NEXT: [0,29] . D==eER. . . . . . . psubq %mm2, %mm2 +# CHECK-NEXT: [0,30] . D==eER . . . . . . psubw %mm2, %mm2 +# CHECK-NEXT: [0,31] . DeE--R . . . . . . psubb %xmm2, %xmm2 +# CHECK-NEXT: [0,32] . DeE--R . . . . . . psubd %xmm2, %xmm2 +# CHECK-NEXT: [0,33] . DeE--R . . . . . . psubq %xmm2, %xmm2 +# CHECK-NEXT: [0,34] . D=eE-R . . . . . . psubw %xmm2, %xmm2 +# CHECK-NEXT: [0,35] . D----R . . . . . . vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,36] . .D---R . . . . . . vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,37] . .D---R . . . . . . vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,38] . .D---R . . . . . . vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,39] . .D----R . . . . . . vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,40] . .D----R . . . . . . vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,41] . .D----R . . . . . . vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,42] . . D---R . . . . . . vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,43] . . D---R . . . . . . vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,44] . . D---R . . . . . . vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,45] . . D---R . . . . . . vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,46] . . D---R . . . . . . vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,47] . . D---R . . . . . . vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,48] . . D---R . . . . . . vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,49] . . D---R . . . . . . vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,50] . . D---R . . . . . . vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,51] . . DeE-R . . . . . . vpsubb %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,52] . . D=eER . . . . . . vpsubd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,53] . . D==eER . . . . . . vpsubq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,54] . . D==eER. . . . . . vpsubw %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,55] . . D===eER . . . . . vpsubb %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,56] . . D====eER . . . . . vpsubd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,57] . . D=====eER . . . . . vpsubq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,58] . . D======eER . . . . . vpsubw %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,59] . . D=======eER. . . . . vpsubb %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,60] . . D=======eER . . . . vpsubd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,61] . . D========eER . . . . vpsubq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,62] . . D=========eER . . . . vpsubw %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,63] . . D==========eER . . . . vpsubb %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,64] . . D===========eER. . . . vpsubd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,65] . . D===========eER. . . . vpsubq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,66] . . .D==========eER. . . . vpsubw %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,67] . . .D==========eER. . . . vpsubb %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,68] . . .D===========eER . . . vpsubd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,69] . . .D===========eER . . . vpsubq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,70] . . .D===========eER . . . vpsubw %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,71] . . .D===========eER . . . vpsubb %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,72] . . . D===========eER . . . vpsubd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,73] . . . D===========eER . . . vpsubq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,74] . . . D===========eER . . . vpsubw %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,75] . . . DeE-----------R . . . andnps %xmm0, %xmm0 +# CHECK-NEXT: [0,76] . . . DeE-----------R . . . andnpd %xmm1, %xmm1 +# CHECK-NEXT: [0,77] . . . D-------------R . . . vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,78] . . . D------------R . . . vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,79] . . . D------------R . . . vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,80] . . . D------------R . . . vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,81] . . . DeE-----------R . . . vandnps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: [0,82] . . . DeE-----------R . . . vandnpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: [0,83] . . . DeE-----------R . . . pandn %mm2, %mm2 +# CHECK-NEXT: [0,84] . . . DeE----------R . . . pandn %xmm2, %xmm2 +# CHECK-NEXT: [0,85] . . . D------------R . . . vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,86] . . . D------------R . . . vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,87] . . . D==========eER . . . vpandnd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,88] . . . D===========eER . . . vpandnq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,89] . . . D============eER. . . vpandnd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,90] . . . D============eER . . vpandnq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,91] . . . D=============eER . . vpandnd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,92] . . . D==============eER . . vpandnq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,93] . . . D----------------R . . vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: [0,94] . . . D----------------R . . vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: [0,95] . . . D----------------R . . vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,96] . . . .D---------------R . . vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: [0,97] . . . .D---------------R . . vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: [0,98] . . . .D---------------R . . vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,99] . . . .DeE-------------R . . vandnps %zmm2, %zmm2, %zmm5 +# CHECK-NEXT: [0,100] . . . .DeE-------------R . . vandnpd %zmm1, %zmm1, %zmm5 +# CHECK-NEXT: [0,101] . . . .D==============eER . . vpandnd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,102] . . . . D=============eER . . vpandnq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,103] . . . . D==============eER. . vpandnd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,104] . . . . D==============eER. . vpandnq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,105] . . . . D==============eER. . vpandnd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,106] . . . . D==============eER. . vpandnq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,107] . . . . D=eE-------------R. . xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,108] . . . . DeE-------------R. . xorpd %xmm1, %xmm1 +# CHECK-NEXT: [0,109] . . . . D---------------R. . vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,110] . . . . D---------------R. . vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,111] . . . . D---------------R. . vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,112] . . . . D----------------R . vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,113] . . . . D=eE-------------R . vxorps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: [0,114] . . . . DeE-------------R . vxorpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: [0,115] . . . . D======eE-------R . pxor %mm2, %mm2 +# CHECK-NEXT: [0,116] . . . . D======eE-------R . pxor %xmm2, %xmm2 +# CHECK-NEXT: [0,117] . . . . D---------------R . vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,118] . . . . D---------------R . vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,119] . . . . D=============eER . vpxord %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,120] . . . . D=============eER . vpxorq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,121] . . . . D==============eER . vpxord %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,122] . . . . D===============eER. vpxorq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,123] . . . . D-----------------R. vpxord %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,124] . . . . D=====eE----------R. vpxorq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,125] . . . . D-----------------R. vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: [0,126] . . . . .D----------------R. vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: [0,127] . . . . .D----------------R. vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: [0,128] . . . . .D----------------R. vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: [0,129] . . . . .D=====eE---------R. vxorps %zmm4, %zmm4, %zmm5 +# CHECK-NEXT: [0,130] . . . . .D=====eE---------R. vxorpd %zmm1, %zmm1, %zmm3 +# CHECK-NEXT: [0,131] . . . . .D-----------------R vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,132] . . . . . D----------------R vpxor %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,133] . . . . . D=====eE---------R vpxord %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,134] . . . . . D======eE--------R vpxorq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,135] . . . . . D======eE--------R vpxord %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,136] . . . . . D======eE--------R vpxorq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,137] . . . . . D----------------R vpxord %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,138] . . . . . D======eE-------R vpxorq %zmm19, %zmm19, %zmm21 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -757,7 +757,7 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK-NEXT: 100. 1 1.0 1.0 13.0 vandnpd %zmm1, %zmm1, %zmm5 # CHECK-NEXT: 101. 1 15.0 0.0 0.0 vpandnd %xmm19, %xmm19, %xmm21 # CHECK-NEXT: 102. 1 14.0 0.0 0.0 vpandnq %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: 103. 1 14.0 0.0 0.0 vpandnd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 103. 1 15.0 1.0 0.0 vpandnd %ymm19, %ymm19, %ymm21 # CHECK-NEXT: 104. 1 15.0 1.0 0.0 vpandnq %ymm19, %ymm19, %ymm21 # CHECK-NEXT: 105. 1 15.0 1.0 0.0 vpandnd %zmm19, %zmm19, %zmm21 # CHECK-NEXT: 106. 1 15.0 1.0 0.0 vpandnq %zmm19, %zmm19, %zmm21 @@ -766,31 +766,31 @@ vpxorq %zmm19, %zmm19, %zmm21 # CHECK-NEXT: 109. 1 0.0 0.0 15.0 vxorps %xmm2, %xmm2, %xmm2 # CHECK-NEXT: 110. 1 0.0 0.0 15.0 vxorpd %xmm1, %xmm1, %xmm1 # CHECK-NEXT: 111. 1 0.0 0.0 15.0 vxorps %ymm2, %ymm2, %ymm2 -# CHECK-NEXT: 112. 1 0.0 0.0 15.0 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 112. 1 0.0 0.0 16.0 vxorpd %ymm1, %ymm1, %ymm1 # CHECK-NEXT: 113. 1 2.0 2.0 13.0 vxorps %zmm2, %zmm2, %zmm2 # CHECK-NEXT: 114. 1 1.0 1.0 13.0 vxorpd %zmm1, %zmm1, %zmm1 # CHECK-NEXT: 115. 1 7.0 7.0 7.0 pxor %mm2, %mm2 # CHECK-NEXT: 116. 1 7.0 7.0 7.0 pxor %xmm2, %xmm2 # CHECK-NEXT: 117. 1 0.0 0.0 15.0 vpxor %xmm3, %xmm3, %xmm3 # CHECK-NEXT: 118. 1 0.0 0.0 15.0 vpxor %ymm3, %ymm3, %ymm3 -# CHECK-NEXT: 119. 1 13.0 1.0 1.0 vpxord %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: 120. 1 13.0 0.0 0.0 vpxorq %xmm19, %xmm19, %xmm19 -# CHECK-NEXT: 121. 1 14.0 0.0 0.0 vpxord %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: 122. 1 15.0 0.0 0.0 vpxorq %ymm19, %ymm19, %ymm19 -# CHECK-NEXT: 123. 1 16.0 0.0 0.0 vpxord %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: 124. 1 17.0 0.0 0.0 vpxorq %zmm19, %zmm19, %zmm19 -# CHECK-NEXT: 125. 1 0.0 0.0 18.0 vxorps %xmm4, %xmm4, %xmm5 -# CHECK-NEXT: 126. 1 0.0 0.0 17.0 vxorpd %xmm1, %xmm1, %xmm3 -# CHECK-NEXT: 127. 1 0.0 0.0 17.0 vxorps %ymm4, %ymm4, %ymm5 -# CHECK-NEXT: 128. 1 0.0 0.0 17.0 vxorpd %ymm1, %ymm1, %ymm3 -# CHECK-NEXT: 129. 1 5.0 5.0 11.0 vxorps %zmm4, %zmm4, %zmm5 -# CHECK-NEXT: 130. 1 6.0 6.0 10.0 vxorpd %zmm1, %zmm1, %zmm3 +# CHECK-NEXT: 119. 1 14.0 2.0 0.0 vpxord %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 120. 1 14.0 0.0 0.0 vpxorq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 121. 1 15.0 0.0 0.0 vpxord %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 122. 1 16.0 0.0 0.0 vpxorq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 123. 1 0.0 0.0 17.0 vpxord %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 124. 1 6.0 6.0 10.0 vpxorq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 125. 1 0.0 0.0 17.0 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 126. 1 0.0 0.0 16.0 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 127. 1 0.0 0.0 16.0 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 128. 1 0.0 0.0 16.0 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 129. 1 6.0 6.0 9.0 vxorps %zmm4, %zmm4, %zmm5 +# CHECK-NEXT: 130. 1 6.0 6.0 9.0 vxorpd %zmm1, %zmm1, %zmm3 # CHECK-NEXT: 131. 1 0.0 0.0 17.0 vpxor %xmm3, %xmm3, %xmm5 # CHECK-NEXT: 132. 1 0.0 0.0 16.0 vpxor %ymm3, %ymm3, %ymm5 -# CHECK-NEXT: 133. 1 16.0 0.0 0.0 vpxord %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: 134. 1 16.0 0.0 0.0 vpxorq %xmm19, %xmm19, %xmm21 -# CHECK-NEXT: 135. 1 16.0 0.0 0.0 vpxord %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: 136. 1 17.0 1.0 0.0 vpxorq %ymm19, %ymm19, %ymm21 -# CHECK-NEXT: 137. 1 17.0 1.0 0.0 vpxord %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: 138. 1 16.0 1.0 0.0 vpxorq %zmm19, %zmm19, %zmm21 -# CHECK-NEXT: 1 4.5 0.6 4.6 <total> +# CHECK-NEXT: 133. 1 6.0 1.0 9.0 vpxord %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 134. 1 7.0 2.0 8.0 vpxorq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 135. 1 7.0 2.0 8.0 vpxord %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 136. 1 7.0 2.0 8.0 vpxorq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 137. 1 0.0 0.0 16.0 vpxord %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 138. 1 7.0 3.0 7.0 vpxorq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 3.9 0.7 5.1 <total> diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index d00cf52..6858e82 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -272,7 +272,7 @@ static cl::opt<bool> "expressed in bytes."), cat(DwarfDumpCategory)); static cl::opt<bool> ManuallyGenerateUnitIndex( - "manaully-generate-unit-index", + "manually-generate-unit-index", cl::desc("if the input is dwp file, parse .debug_info " "section and use it to populate " "DW_SECT_INFO contributions in cu-index. " diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp index 1bdfa71..c5bc7b4 100644 --- a/llvm/tools/opt/optdriver.cpp +++ b/llvm/tools/opt/optdriver.cpp @@ -375,6 +375,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) { "fix-irreducible", "expand-large-fp-convert", "callbrprepare", + "scalarizer", }; for (const auto &P : PassNamePrefix) if (Pass.starts_with(P)) diff --git a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp index f48f4f1..a7950e1 100644 --- a/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp +++ b/llvm/unittests/ProfileData/PGOCtxProfReaderWriterTest.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" #include "llvm/Bitcode/BitcodeAnalyzer.h" #include "llvm/ProfileData/CtxInstrContextNode.h" #include "llvm/ProfileData/PGOCtxProfReader.h" diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 6280963..ad5508f 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -729,6 +729,72 @@ define void @foo() { EXPECT_EQ(UndefStruct->getNumElements(), 2u); } +TEST_F(SandboxIRTest, GlobalValue) { + parseIR(C, R"IR( +declare external void @bar() +define void @foo() { + call void @bar() + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + auto *LLVMBB = &*LLVMF.begin(); + auto LLVMIt = LLVMBB->begin(); + auto *LLVMCall = cast<llvm::CallInst>(&*LLVMIt++); + auto *LLVMGV = cast<llvm::GlobalValue>(LLVMCall->getCalledOperand()); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *Call = cast<sandboxir::CallInst>(&*It++); + [[maybe_unused]] auto *Ret = cast<sandboxir::ReturnInst>(&*It++); + + // Check classof(), creation, getFunction(), getBasicBlock(). + auto *GV = cast<sandboxir::GlobalValue>(Call->getCalledOperand()); + // Check getAddressSpace(). + EXPECT_EQ(GV->getAddressSpace(), LLVMGV->getAddressSpace()); + // Check hasGlobalUnnamedAddr(). + EXPECT_EQ(GV->hasGlobalUnnamedAddr(), LLVMGV->hasGlobalUnnamedAddr()); + // Check hasAtLeastLocalUnnamedAddr(). + EXPECT_EQ(GV->hasAtLeastLocalUnnamedAddr(), + LLVMGV->hasAtLeastLocalUnnamedAddr()); + // Check getUnnamedAddr(). + EXPECT_EQ(GV->getUnnamedAddr(), LLVMGV->getUnnamedAddr()); + // Check setUnnamedAddr(). + auto OrigUnnamedAddr = GV->getUnnamedAddr(); + auto NewUnnamedAddr = sandboxir::GlobalValue::UnnamedAddr::Global; + EXPECT_NE(NewUnnamedAddr, OrigUnnamedAddr); + GV->setUnnamedAddr(NewUnnamedAddr); + EXPECT_EQ(GV->getUnnamedAddr(), NewUnnamedAddr); + GV->setUnnamedAddr(OrigUnnamedAddr); + EXPECT_EQ(GV->getUnnamedAddr(), OrigUnnamedAddr); + // Check getMinUnnamedAddr(). + EXPECT_EQ( + sandboxir::GlobalValue::getMinUnnamedAddr(OrigUnnamedAddr, + NewUnnamedAddr), + llvm::GlobalValue::getMinUnnamedAddr(OrigUnnamedAddr, NewUnnamedAddr)); + // Check hasComdat(). + EXPECT_EQ(GV->hasComdat(), LLVMGV->hasComdat()); + // Check getVisibility(). + EXPECT_EQ(GV->getVisibility(), LLVMGV->getVisibility()); + // Check hasDefaultVisibility(). + EXPECT_EQ(GV->hasDefaultVisibility(), LLVMGV->hasDefaultVisibility()); + // Check hasHiddenVisibility(). + EXPECT_EQ(GV->hasHiddenVisibility(), LLVMGV->hasHiddenVisibility()); + // Check hasProtectedVisibility(). + EXPECT_EQ(GV->hasProtectedVisibility(), LLVMGV->hasProtectedVisibility()); + // Check setVisibility(). + auto OrigVisibility = GV->getVisibility(); + auto NewVisibility = + sandboxir::GlobalValue::VisibilityTypes::ProtectedVisibility; + EXPECT_NE(NewVisibility, OrigVisibility); + GV->setVisibility(NewVisibility); + EXPECT_EQ(GV->getVisibility(), NewVisibility); + GV->setVisibility(OrigVisibility); + EXPECT_EQ(GV->getVisibility(), OrigVisibility); +} + TEST_F(SandboxIRTest, BlockAddress) { parseIR(C, R"IR( define void @foo(ptr %ptr) { diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp index a1a4117..6454c54 100644 --- a/llvm/unittests/SandboxIR/TrackerTest.cpp +++ b/llvm/unittests/SandboxIR/TrackerTest.cpp @@ -1521,6 +1521,43 @@ define void @foo(i64 %i0, i64 %i1, float %f0, float %f1) { checkCmpInst(Ctx, ICmp); } +TEST_F(TrackerTest, GlobalValueSetters) { + parseIR(C, R"IR( +define void @foo() { + call void @foo() + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto *BB = &*F.begin(); + auto *Call = cast<sandboxir::CallInst>(&*BB->begin()); + + auto *GV = cast<sandboxir::GlobalValue>(Call->getCalledOperand()); + // Check setUnnamedAddr(). + auto OrigUnnamedAddr = GV->getUnnamedAddr(); + auto NewUnnamedAddr = sandboxir::GlobalValue::UnnamedAddr::Global; + EXPECT_NE(NewUnnamedAddr, OrigUnnamedAddr); + Ctx.save(); + GV->setUnnamedAddr(NewUnnamedAddr); + EXPECT_EQ(GV->getUnnamedAddr(), NewUnnamedAddr); + Ctx.revert(); + EXPECT_EQ(GV->getUnnamedAddr(), OrigUnnamedAddr); + + // Check setVisibility(). + auto OrigVisibility = GV->getVisibility(); + auto NewVisibility = + sandboxir::GlobalValue::VisibilityTypes::ProtectedVisibility; + EXPECT_NE(NewVisibility, OrigVisibility); + Ctx.save(); + GV->setVisibility(NewVisibility); + EXPECT_EQ(GV->getVisibility(), NewVisibility); + Ctx.revert(); + EXPECT_EQ(GV->getVisibility(), OrigVisibility); +} + TEST_F(TrackerTest, SetVolatile) { parseIR(C, R"IR( define void @foo(ptr %arg0, i8 %val) { diff --git a/llvm/utils/gn/build/toolchain/target_flags.gni b/llvm/utils/gn/build/toolchain/target_flags.gni index af8adcd..cbfa229 100644 --- a/llvm/utils/gn/build/toolchain/target_flags.gni +++ b/llvm/utils/gn/build/toolchain/target_flags.gni @@ -54,6 +54,6 @@ if (current_os == "android") { target_flags += [ "--target=$llvm_current_triple" ] } -if (current_cpu == "x86") { +if (current_cpu == "x86" && current_os != "win") { target_flags += [ "-m32" ] } diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn index d8c75a0..02c2048 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/BUILD.gn @@ -10,16 +10,16 @@ group("lib") { if (current_os == "linux" || current_os == "android") { deps += [ "//compiler-rt/lib/ubsan_minimal" ] } - if (current_os != "win" && current_os != "baremetal") { + if (current_os != "baremetal") { deps += [ "//compiler-rt/lib/asan", - "//compiler-rt/lib/ubsan", + "//compiler-rt/lib/profile", ] + } + if (current_os != "win" && current_os != "baremetal") { + deps += [ "//compiler-rt/lib/ubsan" ] if (current_cpu == "x64" || current_cpu == "arm64") { deps += [ "//compiler-rt/lib/tsan/rtl" ] } } - if (current_os != "baremetal") { - deps += [ "//compiler-rt/lib/profile" ] - } } diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn index cf30875..42467c2 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn @@ -60,12 +60,12 @@ if (current_toolchain == host_toolchain) { "asan_thread.h", "asan_win.cpp", ] - if (target_os != "mac" && target_os != "win") { + if (current_os != "mac" && current_os != "win") { asan_sources += [ "asan_interceptors_vfork.S" ] } config("asan_config") { cflags = [] - if (target_os != "win") { + if (current_os != "win") { cflags += [ "-ftls-model=initial-exec" ] } else { ldflags = [ "/OPT:NOICF" ] @@ -76,11 +76,11 @@ if (current_toolchain == host_toolchain) { # FIXME: add_sanitizer_rt_version_list (cf hwasan) # FIXME: need libclang_rt.asan*.a.syms? - if (target_os == "android") { + if (current_os == "android") { ldflags = [ "-Wl,-z,global" ] } - if (target_os == "mac") { + if (current_os == "mac") { # The -U flags below correspond to the add_weak_symbols() calls in CMake. ldflags = [ "-lc++", @@ -145,7 +145,7 @@ if (current_toolchain == host_toolchain) { configs -= [ "//llvm/utils/gn/build:llvm_code" ] configs += [ "//llvm/utils/gn/build:crt_code" ] sources = [ "asan_rtl_static.cpp" ] - if (target_os != "mac" && target_os != "win") { + if (current_os != "mac" && current_os != "win") { sources += [ "asan_rtl_x86_64.S" ] } } @@ -183,7 +183,7 @@ if (current_toolchain == host_toolchain) { } } - if (current_os != "mac") { + if (current_os != "mac" && current_os != "win") { static_library("asan_static_library") { output_dir = crt_current_out_dir output_name = "clang_rt.asan$crt_current_target_suffix" @@ -232,7 +232,8 @@ if (current_toolchain == host_toolchain) { if (current_os == "win") { static_library("asan_static_runtime_thunk") { output_dir = crt_current_out_dir - output_name = "clang_rt.asan_static_runtime_thunk$crt_current_target_suffix" + output_name = + "clang_rt.asan_static_runtime_thunk$crt_current_target_suffix" configs -= [ "//llvm/utils/gn/build:llvm_code" ] configs += [ "//llvm/utils/gn/build:crt_code" ] complete_static_lib = true @@ -277,11 +278,11 @@ if (current_toolchain == host_toolchain) { deps = [ ":asan_shared_library" ] if (current_os == "win") { deps += [ - ":asan_static_runtime_thunk", ":asan_dynamic_runtime_thunk", + ":asan_static_runtime_thunk", ] } - if (current_os != "mac") { + if (current_os != "mac" && current_os != "win") { deps += [ ":asan_cxx", ":asan_preinit", diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index b907e66..a6d1204 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -496,6 +496,7 @@ if (current_toolchain == default_toolchain) { "__fwd/format.h", "__fwd/fstream.h", "__fwd/functional.h", + "__fwd/get.h", "__fwd/ios.h", "__fwd/istream.h", "__fwd/mdspan.h", @@ -512,6 +513,7 @@ if (current_toolchain == default_toolchain) { "__fwd/string_view.h", "__fwd/subrange.h", "__fwd/tuple.h", + "__fwd/variant.h", "__fwd/vector.h", "__hash_table", "__ios/fpos.h", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn index e296a7b..bed2fff 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Coroutines/BUILD.gn @@ -18,6 +18,7 @@ static_library("Coroutines") { "CoroFrame.cpp", "CoroSplit.cpp", "Coroutines.cpp", + "MaterializationUtils.cpp", "SpillUtils.cpp", "SuspendCrossingInfo.cpp", ] diff --git a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake index 0a486c1..c07c55b 100644 --- a/mlir/cmake/modules/MLIRDetectPythonEnv.cmake +++ b/mlir/cmake/modules/MLIRDetectPythonEnv.cmake @@ -19,7 +19,7 @@ macro(mlir_configure_python_dev_packages) set(_python_development_component Development.Module) find_package(Python3 ${LLVM_MINIMUM_PYTHON_VERSION} - COMPONENTS Interpreter ${_python_development_component} NumPy REQUIRED) + COMPONENTS Interpreter ${_python_development_component} REQUIRED) unset(_python_development_component) message(STATUS "Found python include dirs: ${Python3_INCLUDE_DIRS}") message(STATUS "Found python libraries: ${Python3_LIBRARIES}") diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index c2785f3..f80d279 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -671,18 +671,27 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> { matchAndRewrite(WMMAOp op, WMMAOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { Location loc = op.getLoc(); - Type outType = typeConverter->convertType(op.getDestD().getType()); + auto outType = + typeConverter->convertType<VectorType>(op.getDestD().getType()); + if (!outType) + return rewriter.notifyMatchFailure(op, "type conversion failed"); if (chipset.majorVersion != 11 && chipset.majorVersion != 12) return op->emitOpError("WMMA only supported on gfx11 and gfx12"); + // The WMMA operations represent vectors of bf16s as vectors of i16s, so we + // need to bitcast bfloats to i16 and then bitcast them back. + VectorType rawOutType = outType; + if (outType.getElementType().isBF16()) + rawOutType = outType.clone(rewriter.getI16Type()); + std::optional<StringRef> maybeIntrinsic = wmmaOpToIntrinsic(op, chipset); if (!maybeIntrinsic.has_value()) return op.emitOpError("no intrinsic matching WMMA on the given chipset"); OperationState loweredOp(loc, *maybeIntrinsic); - loweredOp.addTypes(outType); + loweredOp.addTypes(rawOutType); SmallVector<Value, 4> operands; wmmaPushInputOperand(rewriter, loc, typeConverter, op.getUnsignedA(), @@ -694,7 +703,12 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> { loweredOp.addOperands(operands); Operation *lowered = rewriter.create(loweredOp); - rewriter.replaceOp(op, lowered->getResults()); + + Operation *maybeCastBack = lowered; + if (rawOutType != outType) + maybeCastBack = + rewriter.create<LLVM::BitcastOp>(loc, outType, lowered->getResult(0)); + rewriter.replaceOp(op, maybeCastBack->getResults()); return success(); } @@ -1033,15 +1047,6 @@ struct ConvertAMDGPUToROCDLPass void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, Chipset chipset) { - converter.addConversion([](BFloat16Type t) -> Type { - return IntegerType::get(t.getContext(), 16); - }); - converter.addConversion([&converter](VectorType t) -> std::optional<Type> { - if (!t.getElementType().isBF16()) - return std::nullopt; - return converter.convertType(t.clone(IntegerType::get(t.getContext(), 16))); - }); - patterns .add<RawBufferOpLowering<RawBufferLoadOp, ROCDL::RawPtrBufferLoadOp>, RawBufferOpLowering<RawBufferStoreOp, ROCDL::RawPtrBufferStoreOp>, diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 2992671..fc3e1fc 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -26,6 +26,7 @@ #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/MathToLLVM/MathToLLVM.h" #include "mlir/Conversion/MathToROCDL/MathToROCDL.h" #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h" #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" @@ -290,6 +291,7 @@ struct LowerGpuOpsToROCDLOpsPass populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns, *maybeChipset); populateVectorToLLVMConversionPatterns(converter, llvmPatterns); + populateMathToLLVMConversionPatterns(converter, llvmPatterns); cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns); populateFuncToLLVMConversionPatterns(converter, llvmPatterns); populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns); @@ -332,7 +334,11 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) { target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>(); - + // These ops are legal for f16 and f32 type. + target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) { + return any_of(op->getOperandTypes(), + llvm::IsaPred<Float16Type, Float32Type>); + }); // TODO: Remove once we support replacing non-root ops. target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>(); } diff --git a/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp index 5bc6d4e..91702ce 100644 --- a/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp +++ b/mlir/lib/Dialect/Transform/Interfaces/TransformInterfaces.cpp @@ -934,12 +934,10 @@ transform::TransformState::applyTransform(TransformOpInterface transform) { assert(scopeIt != regionStack.rend() && "could not find region scope for handle"); RegionScope *scope = *scopeIt; - for (Operation *user : handle.getUsers()) { - if (user != scope->currentTransform && - !happensBefore(user, scope->currentTransform)) - return false; - } - return true; + return llvm::all_of(handle.getUsers(), [&](Operation *user) { + return user == scope->currentTransform || + happensBefore(user, scope->currentTransform); + }); }; transform::ErrorCheckingTrackingListener trackingListener(*this, transform, config); diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index df95e5db..4d2d738 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -150,6 +150,10 @@ if(MLIR_ENABLE_CUDA_RUNNER) list(APPEND MLIR_TEST_DEPENDS mlir_cuda_runtime) endif() +if(MLIR_ENABLE_EXECUTION_ENGINE) + list(APPEND MLIR_TEST_DEPENDS mlir-capi-execution-engine-test) +endif() + if(MLIR_ENABLE_ROCM_RUNNER) list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime) endif() diff --git a/mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir b/mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir index 1a4ef33..7b14480 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/wmma.mlir @@ -15,9 +15,11 @@ func.func @mfma_to_rocdl(%arg0 : vector<16xf16>, %arg1 : vector<8xf32>, %arg2 : amdgpu.wmma %arg0 * %arg0 + %arg0 {subwordOffset = 1 : i32}: vector<16xf16>, vector<16xf16>, vector<16xf16> // CHECK: rocdl.wmma.f16.16x16x16.f16{{.*}}: (vector<16xf16>, vector<16xf16>, vector<8xf16>, i1) -> vector<8xf16> amdgpu.wmma %arg0 * %arg0 + %arg4 {subwordOffset = 0 : i32}: vector<16xf16>, vector<16xf16>, vector<8xf16> - // CHECK: rocdl.wmma.bf16.16x16x16.bf16{{.*}}: (vector<16xi16>, vector<16xi16>, vector<16xi16>, i1) -> vector<16xi16> + // CHECK: %[[raw_bf16x16:.+]] = rocdl.wmma.bf16.16x16x16.bf16{{.*}}: (vector<16xi16>, vector<16xi16>, vector<16xi16>, i1) -> vector<16xi16> + // CHECK-NEXT: llvm.bitcast %[[raw_bf16x16]] : vector<16xi16> to vector<16xbf16> amdgpu.wmma %arg3 * %arg3 + %arg3 {subwordOffset = 1 : i32}: vector<16xbf16>, vector<16xbf16>, vector<16xbf16> - // CHECK: rocdl.wmma.bf16.16x16x16.bf16{{.*}}: (vector<16xi16>, vector<16xi16>, vector<8xi16>, i1) -> vector<8xi16> + // CHECK: %[[raw_bf16x8:.+]] = rocdl.wmma.bf16.16x16x16.bf16{{.*}}: (vector<16xi16>, vector<16xi16>, vector<8xi16>, i1) -> vector<8xi16> + // CHECK-NEXT: llvm.bitcast %[[raw_bf16x8]] : vector<8xi16> to vector<8xbf16> amdgpu.wmma %arg3 * %arg3 + %arg5 {subwordOffset = 0 : i32}: vector<16xbf16>, vector<16xbf16>, vector<8xbf16> // CHECK: rocdl.wmma.i32.16x16x16.iu8{{.*}}: (i1, vector<4xi32>, i1, vector<4xi32>, vector<4xi32>, i1) -> vector<4xi32> amdgpu.wmma %arg6 * %arg6 + %arg7 {clamp}: vector<16xi8>, vector<16xi8>, vector<4xi32> diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index 56b65be..eb065cb 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -132,6 +132,68 @@ gpu.module @test_module { // ----- gpu.module @test_module { + // CHECK-LABEL: func @gpu_sqrt + func.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) { + %result16 = math.sqrt %arg_f16 : f16 + // CHECK: llvm.intr.sqrt(%{{.*}}) : (f16) -> f16 + %result32 = math.sqrt %arg_f32 : f32 + // CHECK: llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 + %result64 = math.sqrt %arg_f64 : f64 + // CHECK: llvm.intr.sqrt(%{{.*}}) : (f64) -> f64 + func.return %result16, %result32, %result64 : f16, f32, f64 + } +} + +// ----- + +gpu.module @test_module { + // CHECK-LABEL: func @gpu_fabs + func.func @gpu_fabs(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) { + %result16 = math.absf %arg_f16 : f16 + // CHECK: llvm.intr.fabs(%{{.*}}) : (f16) -> f16 + %result32 = math.absf %arg_f32 : f32 + // CHECK: llvm.intr.fabs(%{{.*}}) : (f32) -> f32 + %result64 = math.absf %arg_f64 : f64 + // CHECK: llvm.intr.fabs(%{{.*}}) : (f64) -> f64 + func.return %result16, %result32, %result64 : f16, f32, f64 + } +} + +// ----- + +gpu.module @test_module { + // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64 + // CHECK-LABEL: func @gpu_exp + func.func @gpu_exp(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) { + %result16 = math.exp %arg_f16 : f16 + // CHECK: llvm.intr.exp(%{{.*}}) : (f16) -> f16 + %result32 = math.exp %arg_f32 : f32 + // CHECK: llvm.intr.exp(%{{.*}}) : (f32) -> f32 + %result64 = math.exp %arg_f64 : f64 + // CHECK: llvm.call @__ocml_exp_f64(%{{.*}}) : (f64) -> f64 + func.return %result16, %result32, %result64 : f16, f32, f64 + } +} + +// ----- + +gpu.module @test_module { + // CHECK: llvm.func @__ocml_log_f64(f64) -> f64 + // CHECK-LABEL: func @gpu_log + func.func @gpu_log(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) { + %result16 = math.log %arg_f16 : f16 + // CHECK: llvm.intr.log(%{{.*}}) : (f16) -> f16 + %result32 = math.log %arg_f32 : f32 + // CHECK: llvm.intr.log(%{{.*}}) : (f32) -> f32 + %result64 = math.log %arg_f64 : f64 + // CHECK: llvm.call @__ocml_log_f64(%{{.*}}) : (f64) -> f64 + func.return %result16, %result32, %result64 : f16, f32, f64 + } +} + +// ----- + +gpu.module @test_module { // CHECK: llvm.func @__ocml_cbrt_f32(f32) -> f32 // CHECK: llvm.func @__ocml_cbrt_f64(f64) -> f64 // CHECK-LABEL: func @gpu_cbrt @@ -445,22 +507,22 @@ gpu.module @test_module { // ----- -// Test that the bf16 type is lowered away on this target. +// Test that the bf16 type is passed through to LLVM. gpu.module @test_module { // CHECK-LABEL: func @bf16_id func.func @bf16_id(%arg0 : bf16) -> bf16 { - // CHECK-SAME: (%[[ARG0:.+]]: i16) - // CHECK-SAME: -> i16 - // CHECK: return %[[ARG0]] : i16 + // CHECK-SAME: (%[[ARG0:.+]]: bf16) + // CHECK-SAME: -> bf16 + // CHECK: return %[[ARG0]] : bf16 func.return %arg0 : bf16 } // CHECK-LABEL: func @bf16x4_id func.func @bf16x4_id(%arg0 : vector<4xbf16>) -> vector<4xbf16> { - // CHECK-SAME: (%[[ARG0:.+]]: vector<4xi16>) - // CHECK-SAME: -> vector<4xi16> - // CHECK: return %[[ARG0]] : vector<4xi16> + // CHECK-SAME: (%[[ARG0:.+]]: vector<4xbf16>) + // CHECK-SAME: -> vector<4xbf16> + // CHECK: return %[[ARG0]] : vector<4xbf16> func.return %arg0 : vector<4xbf16> } diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index c931898..4124897 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6004,6 +6004,7 @@ cc_library( ":LLVMCommonConversion", ":LLVMDialect", ":MathDialect", + ":MathToLLVM", ":MathToROCDL", ":MemRefDialect", ":MemRefToLLVM", |