diff options
Diffstat (limited to 'llvm')
84 files changed, 3538 insertions, 568 deletions
diff --git a/llvm/examples/CMakeLists.txt b/llvm/examples/CMakeLists.txt index 74613bd..b10a94c 100644 --- a/llvm/examples/CMakeLists.txt +++ b/llvm/examples/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(ModuleMaker) add_subdirectory(OrcV2Examples) add_subdirectory(SpeculativeJIT) add_subdirectory(Bye) +add_subdirectory(OptSubcommand) if(LLVM_ENABLE_EH AND (NOT WIN32) AND (NOT "${LLVM_NATIVE_ARCH}" STREQUAL "ARM")) add_subdirectory(ExceptionDemo) diff --git a/llvm/examples/OptSubcommand/CMakeLists.txt b/llvm/examples/OptSubcommand/CMakeLists.txt new file mode 100644 index 0000000..debc948 --- /dev/null +++ b/llvm/examples/OptSubcommand/CMakeLists.txt @@ -0,0 +1,19 @@ +# Set the .td file to be processed for this target. +set(LLVM_TARGET_DEFINITIONS Opts.td) + +tablegen(LLVM Opts.inc -gen-opt-parser-defs) +add_public_tablegen_target(HelloSubTableGen) + +set(LLVM_LINK_COMPONENTS + Support + Option + ) + +add_llvm_example(OptSubcommand + llvm-hello-sub.cpp + ) + +target_include_directories(OptSubcommand + PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} + ) diff --git a/llvm/examples/OptSubcommand/Opts.td b/llvm/examples/OptSubcommand/Opts.td new file mode 100644 index 0000000..7c980ee --- /dev/null +++ b/llvm/examples/OptSubcommand/Opts.td @@ -0,0 +1,18 @@ +include "llvm/Option/OptParser.td" + +def sc_foo : SubCommand<"foo", "HelpText for SubCommand foo.">; + +def sc_bar : SubCommand<"bar", "HelpText for SubCommand bar.", + "OptSubcommand bar <options>">; + +def help : Flag<["--"], "help">, + HelpText<"OptSubcommand <subcommand> <options>">; + +def version : Flag<["-"], "version">, + HelpText<"Toplevel Display the version number">; + +def uppercase : Flag<["-"], "uppercase", [sc_foo, sc_bar]>, + HelpText<"Print in uppercase">; + +def lowercase : Flag<["-"], "lowercase", [sc_foo]>, + HelpText<"Print in lowercase">; diff --git a/llvm/examples/OptSubcommand/llvm-hello-sub.cpp b/llvm/examples/OptSubcommand/llvm-hello-sub.cpp new file mode 100644 index 0000000..8071f56 --- /dev/null +++ b/llvm/examples/OptSubcommand/llvm-hello-sub.cpp @@ -0,0 +1,137 @@ +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::opt; + +namespace { +enum ID { + OPT_INVALID = 0, +#define OPTION(PREFIXES, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ + VALUES, SUBCOMMANDIDS_OFFSET) \ + OPT_##ID, +#include "Opts.inc" +#undef OPTION +}; +#define OPTTABLE_STR_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE + +#define OPTTABLE_SUBCOMMAND_IDS_TABLE_CODE +#include "Opts.inc" +#undef OPTTABLE_SUBCOMMAND_IDS_TABLE_CODE + +#define OPTTABLE_SUBCOMMANDS_CODE +#include "Opts.inc" +#undef OPTTABLE_SUBCOMMANDS_CODE + +static constexpr OptTable::Info InfoTable[] = { +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), +#include "Opts.inc" +#undef OPTION +}; + +class HelloSubOptTable : public GenericOptTable { +public: + HelloSubOptTable() + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + /*IgnoreCase=*/false, OptionSubCommands, + OptionSubCommandIDsTable) {} +}; +} // namespace + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + HelloSubOptTable T; + unsigned MissingArgIndex, MissingArgCount; + + auto HandleMultipleSubcommands = [](ArrayRef<StringRef> SubCommands) { + assert(SubCommands.size() > 1); + llvm::errs() << "error: more than one subcommand passed [\n"; + for (auto SC : SubCommands) + llvm::errs() << " `" << SC << "`\n"; + llvm::errs() << "]\n"; + llvm::errs() << "See --help.\n"; + exit(1); + }; + + auto HandleOtherPositionals = [](ArrayRef<StringRef> Positionals) { + assert(!Positionals.empty()); + llvm::errs() << "error: unknown positional argument(s) [\n"; + for (auto SC : Positionals) + llvm::errs() << " `" << SC << "`\n"; + llvm::errs() << "]\n"; + llvm::errs() << "See --help.\n"; + exit(1); + }; + + InputArgList Args = T.ParseArgs(ArrayRef(argv + 1, argc - 1), MissingArgIndex, + MissingArgCount); + + StringRef SubCommand = Args.getSubCommand( + T.getSubCommands(), HandleMultipleSubcommands, HandleOtherPositionals); + // Handle help. When help options is found, ignore all other options and exit + // after printing help. + + if (Args.hasArg(OPT_help)) { + T.printHelp(llvm::outs(), "llvm-hello-sub [subcommand] [options]", + "LLVM Hello SubCommand Example", false, false, Visibility(), + SubCommand); + return 0; + } + + auto HandleSubCommandArg = [&](ID OptionType) { + if (!Args.hasArg(OptionType)) + return false; + auto O = T.getOption(OptionType); + if (!O.isRegisteredSC(SubCommand)) { + llvm::errs() << "Option [" << O.getName() + << "] is not valid for SubCommand [" << SubCommand << "]\n"; + return false; + } + return true; + }; + + bool HasUnknownOptions = false; + for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { + HasUnknownOptions = true; + llvm::errs() << "Unknown option `" << A->getAsString(Args) << "'\n"; + } + if (HasUnknownOptions) { + llvm::errs() << "See `OptSubcommand --help`.\n"; + return 1; + } + if (SubCommand.empty()) { + if (Args.hasArg(OPT_version)) + llvm::outs() << "LLVM Hello SubCommand Example 1.0\n"; + } else if (SubCommand == "foo") { + if (HandleSubCommandArg(OPT_uppercase)) + llvm::outs() << "FOO\n"; + else if (HandleSubCommandArg(OPT_lowercase)) + llvm::outs() << "foo\n"; + + if (HandleSubCommandArg(OPT_version)) + llvm::outs() << "LLVM Hello SubCommand foo Example 1.0\n"; + + } else if (SubCommand == "bar") { + if (HandleSubCommandArg(OPT_lowercase)) + llvm::outs() << "bar\n"; + else if (HandleSubCommandArg(OPT_uppercase)) + llvm::outs() << "BAR\n"; + + if (HandleSubCommandArg(OPT_version)) + llvm::outs() << "LLVM Hello SubCommand bar Example 1.0\n"; + } + + return 0; +} diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h index 8944e736..0b56462 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainer.h +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -201,19 +201,9 @@ enum class RootParameterType : uint32_t { LLVM_ABI ArrayRef<EnumEntry<RootParameterType>> getRootParameterTypes(); -#define ROOT_PARAMETER(Val, Enum) \ - case Val: \ - return true; -inline bool isValidParameterType(uint32_t V) { - switch (V) { -#include "DXContainerConstants.def" - } - return false; -} +bool isValidParameterType(uint32_t V); -inline bool isValidRangeType(uint32_t V) { - return V <= llvm::to_underlying(dxil::ResourceClass::LastEntry); -} +bool isValidRangeType(uint32_t V); #define SHADER_VISIBILITY(Val, Enum) Enum = Val, enum class ShaderVisibility : uint32_t { @@ -222,30 +212,14 @@ enum class ShaderVisibility : uint32_t { LLVM_ABI ArrayRef<EnumEntry<ShaderVisibility>> getShaderVisibility(); -#define SHADER_VISIBILITY(Val, Enum) \ - case Val: \ - return true; -inline bool isValidShaderVisibility(uint32_t V) { - switch (V) { -#include "DXContainerConstants.def" - } - return false; -} +bool isValidShaderVisibility(uint32_t V); #define FILTER(Val, Enum) Enum = Val, enum class SamplerFilter : uint32_t { #include "DXContainerConstants.def" }; -#define FILTER(Val, Enum) \ - case Val: \ - return true; -inline bool isValidSamplerFilter(uint32_t V) { - switch (V) { -#include "DXContainerConstants.def" - } - return false; -} +bool isValidSamplerFilter(uint32_t V); LLVM_ABI ArrayRef<EnumEntry<SamplerFilter>> getSamplerFilters(); @@ -256,15 +230,7 @@ enum class TextureAddressMode : uint32_t { LLVM_ABI ArrayRef<EnumEntry<TextureAddressMode>> getTextureAddressModes(); -#define TEXTURE_ADDRESS_MODE(Val, Enum) \ - case Val: \ - return true; -inline bool isValidAddress(uint32_t V) { - switch (V) { -#include "DXContainerConstants.def" - } - return false; -} +bool isValidAddress(uint32_t V); #define COMPARISON_FUNC(Val, Enum) Enum = Val, enum class ComparisonFunc : uint32_t { @@ -273,30 +239,14 @@ enum class ComparisonFunc : uint32_t { LLVM_ABI ArrayRef<EnumEntry<ComparisonFunc>> getComparisonFuncs(); -#define COMPARISON_FUNC(Val, Enum) \ - case Val: \ - return true; -inline bool isValidComparisonFunc(uint32_t V) { - switch (V) { -#include "DXContainerConstants.def" - } - return false; -} +bool isValidComparisonFunc(uint32_t V); #define STATIC_BORDER_COLOR(Val, Enum) Enum = Val, enum class StaticBorderColor : uint32_t { #include "DXContainerConstants.def" }; -#define STATIC_BORDER_COLOR(Val, Enum) \ - case Val: \ - return true; -inline bool isValidBorderColor(uint32_t V) { - switch (V) { -#include "DXContainerConstants.def" - } - return false; -} +bool isValidBorderColor(uint32_t V); LLVM_ABI ArrayRef<EnumEntry<StaticBorderColor>> getStaticBorderColors(); diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h new file mode 100644 index 0000000..2809df8 --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h @@ -0,0 +1,95 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for OnDiskDataAllocator, a file backed data +/// pool can be used to allocate space to store data packed in a single file. It +/// is based on MappedFileRegionArena and includes a header in the beginning to +/// provide metadata. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKDATAALLOCATOR_H +#define LLVM_CAS_ONDISKDATAALLOCATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CAS/FileOffset.h" +#include "llvm/Support/Error.h" + +namespace llvm::cas { + +/// Sink for data. Stores variable length data with 8-byte alignment. Does not +/// track size of data, which is assumed to known from context, or embedded. +/// Uses 0-padding but does not guarantee 0-termination. +class OnDiskDataAllocator { +public: + using ValueProxy = MutableArrayRef<char>; + + /// A pointer to data stored on disk. + class OnDiskPtr { + public: + FileOffset getOffset() const { return Offset; } + explicit operator bool() const { return bool(getOffset()); } + const ValueProxy &operator*() const { + assert(Offset && "Null dereference"); + return Value; + } + const ValueProxy *operator->() const { + assert(Offset && "Null dereference"); + return &Value; + } + + OnDiskPtr() = default; + + private: + friend class OnDiskDataAllocator; + OnDiskPtr(FileOffset Offset, ValueProxy Value) + : Offset(Offset), Value(Value) {} + FileOffset Offset; + ValueProxy Value; + }; + + /// Get the data of \p Size stored at the given \p Offset. Note the allocator + /// doesn't keep track of the allocation size, thus \p Size doesn't need to + /// match the size of allocation but needs to be smaller. + Expected<ArrayRef<char>> get(FileOffset Offset, size_t Size) const; + + /// Allocate at least \p Size with 8-byte alignment. + Expected<OnDiskPtr> allocate(size_t Size); + + /// \returns the buffer that was allocated at \p create time, with size + /// \p UserHeaderSize. + MutableArrayRef<uint8_t> getUserHeader(); + + size_t size() const; + size_t capacity() const; + + static Expected<OnDiskDataAllocator> + create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, + uint32_t UserHeaderSize = 0, + function_ref<void(void *)> UserHeaderInit = nullptr); + + OnDiskDataAllocator(OnDiskDataAllocator &&RHS); + OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS); + + // No copy. Just call \a create() again. + OnDiskDataAllocator(const OnDiskDataAllocator &) = delete; + OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete; + + ~OnDiskDataAllocator(); + +private: + struct ImplType; + explicit OnDiskDataAllocator(std::unique_ptr<ImplType> Impl); + std::unique_ptr<ImplType> Impl; +}; + +} // namespace llvm::cas + +#endif // LLVM_CAS_ONDISKDATAALLOCATOR_H diff --git a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h index 5e41bf6..fbd68d0 100644 --- a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h +++ b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h @@ -133,38 +133,38 @@ public: bool IsValue = false; }; - class pointer; - class const_pointer : public PointerImpl<ConstValueProxy> { + class OnDiskPtr; + class ConstOnDiskPtr : public PointerImpl<ConstValueProxy> { public: - const_pointer() = default; + ConstOnDiskPtr() = default; private: - friend class pointer; + friend class OnDiskPtr; friend class OnDiskTrieRawHashMap; - using const_pointer::PointerImpl::PointerImpl; + using ConstOnDiskPtr::PointerImpl::PointerImpl; }; - class pointer : public PointerImpl<ValueProxy> { + class OnDiskPtr : public PointerImpl<ValueProxy> { public: - operator const_pointer() const { - return const_pointer(Value, getOffset(), IsValue); + operator ConstOnDiskPtr() const { + return ConstOnDiskPtr(Value, getOffset(), IsValue); } - pointer() = default; + OnDiskPtr() = default; private: friend class OnDiskTrieRawHashMap; - using pointer::PointerImpl::PointerImpl; + using OnDiskPtr::PointerImpl::PointerImpl; }; /// Find the value from hash. /// /// \returns pointer to the value if exists, otherwise returns a non-value /// pointer that evaluates to `false` when convert to boolean. - const_pointer find(ArrayRef<uint8_t> Hash) const; + ConstOnDiskPtr find(ArrayRef<uint8_t> Hash) const; /// Helper function to recover a pointer into the trie from file offset. - Expected<const_pointer> recoverFromFileOffset(FileOffset Offset) const; + Expected<ConstOnDiskPtr> recoverFromFileOffset(FileOffset Offset) const; using LazyInsertOnConstructCB = function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue)>; @@ -186,11 +186,11 @@ public: /// The in-memory \a TrieRawHashMap uses LazyAtomicPointer to synchronize /// simultaneous writes, but that seems dangerous to use in a memory-mapped /// file in case a process crashes in the busy state. - Expected<pointer> insertLazy(ArrayRef<uint8_t> Hash, - LazyInsertOnConstructCB OnConstruct = nullptr, - LazyInsertOnLeakCB OnLeak = nullptr); + Expected<OnDiskPtr> insertLazy(ArrayRef<uint8_t> Hash, + LazyInsertOnConstructCB OnConstruct = nullptr, + LazyInsertOnLeakCB OnLeak = nullptr); - Expected<pointer> insert(const ConstValueProxy &Value) { + Expected<OnDiskPtr> insert(const ConstValueProxy &Value) { return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) { assert(Allocated.Hash == Value.Hash); assert(Allocated.Data.size() == Value.Data.size()); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index 6e1bce1..7bec7e0 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -239,6 +239,9 @@ enum class OpenMPOffloadMappingFlags : uint64_t { // dynamic. // This is an OpenMP extension for the sake of OpenACC support. OMP_MAP_OMPX_HOLD = 0x2000, + // Attach pointer and pointee, after processing all other maps. + // Applicable to map-entering directives. Does not change ref-count. + OMP_MAP_ATTACH = 0x4000, /// Signal that the runtime library should use args as an array of /// descriptor_dim pointers and use args_size as dims. Used when we have /// non-contiguous list items in target update directive diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 783f8f6..041a4ce 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1722,16 +1722,19 @@ public: return Insert(BinOp, Name); } - Value *CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name = "") { + Value *CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name = "", + Instruction *MDFrom = nullptr) { assert(Cond2->getType()->isIntOrIntVectorTy(1)); return CreateSelect(Cond1, Cond2, - ConstantInt::getNullValue(Cond2->getType()), Name); + ConstantInt::getNullValue(Cond2->getType()), Name, + MDFrom); } - Value *CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name = "") { + Value *CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name = "", + Instruction *MDFrom = nullptr) { assert(Cond2->getType()->isIntOrIntVectorTy(1)); return CreateSelect(Cond1, ConstantInt::getAllOnesValue(Cond2->getType()), - Cond2, Name); + Cond2, Name, MDFrom); } Value *CreateLogicalOp(Instruction::BinaryOps Opc, Value *Cond1, Value *Cond2, diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h index 3e80574..db36509 100644 --- a/llvm/include/llvm/Option/ArgList.h +++ b/llvm/include/llvm/Option/ArgList.h @@ -20,6 +20,7 @@ #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/Option.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" #include <algorithm> #include <cstddef> #include <initializer_list> @@ -280,6 +281,22 @@ public: /// list. virtual unsigned getNumInputArgStrings() const = 0; + /// getSubCommand - Find subcommand from the arguments if the usage is valid. + /// + /// \param AllSubCommands - A list of all valid subcommands. + /// \param HandleMultipleSubcommands - A callback for the case where multiple + /// subcommands are present in the arguments. It gets a list of all found + /// subcommands. + /// \param HandleOtherPositionals - A callback for the case where positional + /// arguments that are not subcommands are present. + /// \return The name of the subcommand found. If no subcommand is found, + /// this returns an empty StringRef. If multiple subcommands are found, the + /// first one is returned. + StringRef getSubCommand( + ArrayRef<OptTable::SubCommand> AllSubCommands, + std::function<void(ArrayRef<StringRef>)> HandleMultipleSubcommands, + std::function<void(ArrayRef<StringRef>)> HandleOtherPositionals) const; + /// @} /// @name Argument Lookup Utilities /// @{ diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td index 9fd606b..8f32fb4 100644 --- a/llvm/include/llvm/Option/OptParser.td +++ b/llvm/include/llvm/Option/OptParser.td @@ -98,7 +98,15 @@ class HelpTextVariant<list<OptionVisibility> visibilities, string text> { string Text = text; } -class Option<list<string> prefixes, string name, OptionKind kind> { +// Class definition for positional subcommands. +class SubCommand<string name, string helpText, string usage = ""> { + string Name = name; + string HelpText = helpText; + string Usage = usage; +} + +class Option<list<string> prefixes, string name, OptionKind kind, + list<SubCommand> subcommands = []> { string EnumName = ?; // Uses the def name if undefined. list<string> Prefixes = prefixes; string Name = name; @@ -129,26 +137,34 @@ class Option<list<string> prefixes, string name, OptionKind kind> { code ValueMerger = "mergeForwardValue"; code ValueExtractor = "extractForwardValue"; list<code> NormalizedValues = ?; + list<SubCommand> SubCommands = subcommands; } // Helpers for defining options. -class Flag<list<string> prefixes, string name> - : Option<prefixes, name, KIND_FLAG>; -class Joined<list<string> prefixes, string name> - : Option<prefixes, name, KIND_JOINED>; -class Separate<list<string> prefixes, string name> - : Option<prefixes, name, KIND_SEPARATE>; -class CommaJoined<list<string> prefixes, string name> - : Option<prefixes, name, KIND_COMMAJOINED>; -class MultiArg<list<string> prefixes, string name, int numargs> - : Option<prefixes, name, KIND_MULTIARG> { +class Flag<list<string> prefixes, string name, + list<SubCommand> subcommands = []> + : Option<prefixes, name, KIND_FLAG, subcommands>; +class Joined<list<string> prefixes, string name, + list<SubCommand> subcommands = []> + : Option<prefixes, name, KIND_JOINED, subcommands>; +class Separate<list<string> prefixes, string name, + list<SubCommand> subcommands = []> + : Option<prefixes, name, KIND_SEPARATE, subcommands>; +class CommaJoined<list<string> prefixes, string name, + list<SubCommand> subcommands = []> + : Option<prefixes, name, KIND_COMMAJOINED, subcommands>; +class MultiArg<list<string> prefixes, string name, int numargs, + list<SubCommand> subcommands = []> + : Option<prefixes, name, KIND_MULTIARG, subcommands> { int NumArgs = numargs; } -class JoinedOrSeparate<list<string> prefixes, string name> - : Option<prefixes, name, KIND_JOINED_OR_SEPARATE>; -class JoinedAndSeparate<list<string> prefixes, string name> - : Option<prefixes, name, KIND_JOINED_AND_SEPARATE>; +class JoinedOrSeparate<list<string> prefixes, string name, + list<SubCommand> subcommands = []> + : Option<prefixes, name, KIND_JOINED_OR_SEPARATE, subcommands>; +class JoinedAndSeparate<list<string> prefixes, string name, + list<SubCommand> subcommands = []> + : Option<prefixes, name, KIND_JOINED_AND_SEPARATE, subcommands>; // Mix-ins for adding optional attributes. diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index df42ee3..f641ca4 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -53,6 +53,13 @@ public: /// parts of the driver still use Option instances where convenient. class LLVM_ABI OptTable { public: + /// Represents a subcommand and its options in the option table. + struct SubCommand { + const char *Name; + const char *HelpText; + const char *Usage; + }; + /// Entry for a single option instance in the option data table. struct Info { unsigned PrefixesOffset; @@ -79,6 +86,8 @@ public: unsigned short AliasID; const char *AliasArgs; const char *Values; + // Offset into OptTable's SubCommandIDsTable. + unsigned SubCommandIDsOffset; bool hasNoPrefix() const { return PrefixesOffset == 0; } @@ -94,6 +103,21 @@ public: getNumPrefixes(PrefixesTable)); } + bool hasSubCommands() const { return SubCommandIDsOffset != 0; } + + unsigned getNumSubCommandIDs(ArrayRef<unsigned> SubCommandIDsTable) const { + // We embed the number of subcommand IDs in the value of the first offset. + return SubCommandIDsTable[SubCommandIDsOffset]; + } + + ArrayRef<unsigned> + getSubCommandIDs(ArrayRef<unsigned> SubCommandIDsTable) const { + return hasSubCommands() ? SubCommandIDsTable.slice( + SubCommandIDsOffset + 1, + getNumSubCommandIDs(SubCommandIDsTable)) + : ArrayRef<unsigned>(); + } + void appendPrefixes(const StringTable &StrTable, ArrayRef<StringTable::Offset> PrefixesTable, SmallVectorImpl<StringRef> &Prefixes) const { @@ -119,6 +143,22 @@ public: } }; +public: + bool isValidForSubCommand(const Info *CandidateInfo, + StringRef SubCommand) const { + assert(!SubCommand.empty() && + "This helper is only for valid registered subcommands."); + auto SCIT = + std::find_if(SubCommands.begin(), SubCommands.end(), + [&](const auto &C) { return SubCommand == C.Name; }); + assert(SCIT != SubCommands.end() && + "This helper is only for valid registered subcommands."); + auto SubCommandIDs = CandidateInfo->getSubCommandIDs(SubCommandIDsTable); + unsigned CurrentSubCommandID = SCIT - &SubCommands[0]; + return std::find(SubCommandIDs.begin(), SubCommandIDs.end(), + CurrentSubCommandID) != SubCommandIDs.end(); + } + private: // A unified string table for these options. Individual strings are stored as // null terminated C-strings at offsets within this table. @@ -134,6 +174,13 @@ private: ArrayRef<Info> OptionInfos; bool IgnoreCase; + + /// The subcommand information table. + ArrayRef<SubCommand> SubCommands; + + /// The subcommand IDs table. + ArrayRef<unsigned> SubCommandIDsTable; + bool GroupedShortOptions = false; bool DashDashParsing = false; const char *EnvVar = nullptr; @@ -168,7 +215,9 @@ protected: /// manually call \c buildPrefixChars once they are fully constructed. OptTable(const StringTable &StrTable, ArrayRef<StringTable::Offset> PrefixesTable, - ArrayRef<Info> OptionInfos, bool IgnoreCase = false); + ArrayRef<Info> OptionInfos, bool IgnoreCase = false, + ArrayRef<SubCommand> SubCommands = {}, + ArrayRef<unsigned> SubCommandIDsTable = {}); /// Build (or rebuild) the PrefixChars member. void buildPrefixChars(); @@ -179,6 +228,8 @@ public: /// Return the string table used for option names. const StringTable &getStrTable() const { return *StrTable; } + ArrayRef<SubCommand> getSubCommands() const { return SubCommands; } + /// Return the prefixes table used for option names. ArrayRef<StringTable::Offset> getPrefixesTable() const { return PrefixesTable; @@ -410,7 +461,8 @@ public: /// texts. void printHelp(raw_ostream &OS, const char *Usage, const char *Title, bool ShowHidden = false, bool ShowAllAliases = false, - Visibility VisibilityMask = Visibility()) const; + Visibility VisibilityMask = Visibility(), + StringRef SubCommand = {}) const; void printHelp(raw_ostream &OS, const char *Usage, const char *Title, unsigned FlagsToInclude, unsigned FlagsToExclude, @@ -418,7 +470,8 @@ public: private: void internalPrintHelp(raw_ostream &OS, const char *Usage, const char *Title, - bool ShowHidden, bool ShowAllAliases, + StringRef SubCommand, bool ShowHidden, + bool ShowAllAliases, std::function<bool(const Info &)> ExcludeOption, Visibility VisibilityMask) const; }; @@ -428,7 +481,9 @@ class GenericOptTable : public OptTable { protected: LLVM_ABI GenericOptTable(const StringTable &StrTable, ArrayRef<StringTable::Offset> PrefixesTable, - ArrayRef<Info> OptionInfos, bool IgnoreCase = false); + ArrayRef<Info> OptionInfos, bool IgnoreCase = false, + ArrayRef<SubCommand> SubCommands = {}, + ArrayRef<unsigned> SubCommandIDsTable = {}); }; class PrecomputedOptTable : public OptTable { @@ -437,8 +492,11 @@ protected: ArrayRef<StringTable::Offset> PrefixesTable, ArrayRef<Info> OptionInfos, ArrayRef<StringTable::Offset> PrefixesUnionOffsets, - bool IgnoreCase = false) - : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase) { + bool IgnoreCase = false, + ArrayRef<SubCommand> SubCommands = {}, + ArrayRef<unsigned> SubCommandIDsTable = {}) + : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase, SubCommands, + SubCommandIDsTable) { for (auto PrefixOffset : PrefixesUnionOffsets) PrefixesUnion.push_back(StrTable[PrefixOffset]); buildPrefixChars(); @@ -452,33 +510,36 @@ protected: #define LLVM_MAKE_OPT_ID_WITH_ID_PREFIX( \ ID_PREFIX, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ - METAVAR, VALUES) \ + METAVAR, VALUES, SUBCOMMANDIDS_OFFSET) \ ID_PREFIX##ID #define LLVM_MAKE_OPT_ID(PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, \ GROUP, ALIAS, ALIASARGS, FLAGS, VISIBILITY, PARAM, \ - HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ - LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, \ - ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, \ - HELPTEXTSFORVARIANTS, METAVAR, VALUES) + HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ + SUBCOMMANDIDS_OFFSET) \ + LLVM_MAKE_OPT_ID_WITH_ID_PREFIX( \ + OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ + ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES, SUBCOMMANDIDS_OFFSET) #define LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX( \ ID_PREFIX, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ - METAVAR, VALUES) \ + METAVAR, VALUES, SUBCOMMANDIDS_OFFSET) \ llvm::opt::OptTable::Info { \ PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, HELPTEXT, HELPTEXTSFORVARIANTS, \ METAVAR, ID_PREFIX##ID, llvm::opt::Option::KIND##Class, PARAM, FLAGS, \ - VISIBILITY, ID_PREFIX##GROUP, ID_PREFIX##ALIAS, ALIASARGS, VALUES \ + VISIBILITY, ID_PREFIX##GROUP, ID_PREFIX##ALIAS, ALIASARGS, VALUES, \ + SUBCOMMANDIDS_OFFSET \ } #define LLVM_CONSTRUCT_OPT_INFO( \ PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ + SUBCOMMANDIDS_OFFSET) \ LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX( \ OPT_, PREFIXES_OFFSET, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, \ ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ - METAVAR, VALUES) + METAVAR, VALUES, SUBCOMMANDIDS_OFFSET) #endif // LLVM_OPTION_OPTTABLE_H diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h index 51c330a..192cb3c9 100644 --- a/llvm/include/llvm/Option/Option.h +++ b/llvm/include/llvm/Option/Option.h @@ -216,6 +216,12 @@ public: /// always be false. LLVM_ABI bool matches(OptSpecifier ID) const; + LLVM_ABI bool isRegisteredSC(StringRef SubCommand) const { + assert(Info && "Must have a valid info!"); + assert(Owner && "Must have a valid owner!"); + return Owner->isValidForSubCommand(Info, SubCommand); + } + /// Potentially accept the current argument, returning a new Arg instance, /// or 0 if the option does not accept this argument (or the argument is /// missing values). diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h index 754714d..eaca0a8 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -79,6 +79,19 @@ public: LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +/// Additional 'norecurse' attribute deduction during postlink LTO phase. +/// +/// This is a module pass that infers 'norecurse' attribute on functions. +/// It runs during LTO and analyzes the module's call graph to find functions +/// that are guaranteed not to call themselves, either directly or indirectly. +/// The pass uses a module-wide flag which checks if any function's address is +/// taken or any function in the module has external linkage, to safely handle +/// indirect and library function calls from current function. +class NoRecurseLTOInferencePass + : public PassInfoMixin<NoRecurseLTOInferencePass> { +public: + LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/llvm/lib/BinaryFormat/DXContainer.cpp b/llvm/lib/BinaryFormat/DXContainer.cpp index c06a3e3..b334f86 100644 --- a/llvm/lib/BinaryFormat/DXContainer.cpp +++ b/llvm/lib/BinaryFormat/DXContainer.cpp @@ -18,6 +18,70 @@ using namespace llvm; using namespace llvm::dxbc; +#define ROOT_PARAMETER(Val, Enum) \ + case Val: \ + return true; +bool llvm::dxbc::isValidParameterType(uint32_t V) { + switch (V) { +#include "llvm/BinaryFormat/DXContainerConstants.def" + } + return false; +} + +bool llvm::dxbc::isValidRangeType(uint32_t V) { + return V <= llvm::to_underlying(dxil::ResourceClass::LastEntry); +} + +#define SHADER_VISIBILITY(Val, Enum) \ + case Val: \ + return true; +bool llvm::dxbc::isValidShaderVisibility(uint32_t V) { + switch (V) { +#include "llvm/BinaryFormat/DXContainerConstants.def" + } + return false; +} + +#define FILTER(Val, Enum) \ + case Val: \ + return true; +bool llvm::dxbc::isValidSamplerFilter(uint32_t V) { + switch (V) { +#include "llvm/BinaryFormat/DXContainerConstants.def" + } + return false; +} + +#define TEXTURE_ADDRESS_MODE(Val, Enum) \ + case Val: \ + return true; +bool llvm::dxbc::isValidAddress(uint32_t V) { + switch (V) { +#include "llvm/BinaryFormat/DXContainerConstants.def" + } + return false; +} + +#define COMPARISON_FUNC(Val, Enum) \ + case Val: \ + return true; +bool llvm::dxbc::isValidComparisonFunc(uint32_t V) { + switch (V) { +#include "llvm/BinaryFormat/DXContainerConstants.def" + } + return false; +} + +#define STATIC_BORDER_COLOR(Val, Enum) \ + case Val: \ + return true; +bool llvm::dxbc::isValidBorderColor(uint32_t V) { + switch (V) { +#include "llvm/BinaryFormat/DXContainerConstants.def" + } + return false; +} + dxbc::PartType dxbc::parsePartType(StringRef S) { #define CONTAINER_PART(PartName) .Case(#PartName, PartType::PartName) return StringSwitch<dxbc::PartType>(S) diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index 7ae5f7e..bca39b6 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMCAS MappedFileRegionArena.cpp ObjectStore.cpp OnDiskCommon.cpp + OnDiskDataAllocator.cpp OnDiskTrieRawHashMap.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp new file mode 100644 index 0000000..13bbd66 --- /dev/null +++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp @@ -0,0 +1,234 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file Implements OnDiskDataAllocator. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "DatabaseFile.h" +#include "llvm/Config/llvm-config.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +#if LLVM_ENABLE_ONDISK_CAS + +//===----------------------------------------------------------------------===// +// DataAllocator data structures. +//===----------------------------------------------------------------------===// + +namespace { +/// DataAllocator table layout: +/// - [8-bytes: Generic table header] +/// - 8-bytes: AllocatorOffset (reserved for implementing free lists) +/// - 8-bytes: Size for user data header +/// - <user data buffer> +/// +/// Record layout: +/// - <data> +class DataAllocatorHandle { +public: + static constexpr TableHandle::TableKind Kind = + TableHandle::TableKind::DataAllocator; + + struct Header { + TableHandle::Header GenericHeader; + std::atomic<int64_t> AllocatorOffset; + const uint64_t UserHeaderSize; + }; + + operator TableHandle() const { + if (!H) + return TableHandle(); + return TableHandle(*Region, H->GenericHeader); + } + + Expected<MutableArrayRef<char>> allocate(MappedFileRegionArena &Alloc, + size_t DataSize) { + assert(&Alloc.getRegion() == Region); + auto Ptr = Alloc.allocate(DataSize); + if (LLVM_UNLIKELY(!Ptr)) + return Ptr.takeError(); + return MutableArrayRef(*Ptr, DataSize); + } + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + MappedFileRegion &getRegion() const { return *Region; } + + MutableArrayRef<uint8_t> getUserHeader() { + return MutableArrayRef(reinterpret_cast<uint8_t *>(H + 1), + H->UserHeaderSize); + } + + static Expected<DataAllocatorHandle> + create(MappedFileRegionArena &Alloc, StringRef Name, uint32_t UserHeaderSize); + + DataAllocatorHandle() = default; + DataAllocatorHandle(MappedFileRegion &Region, Header &H) + : Region(&Region), H(&H) {} + DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : DataAllocatorHandle( + Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +} // end anonymous namespace + +struct OnDiskDataAllocator::ImplType { + DatabaseFile File; + DataAllocatorHandle Store; +}; + +Expected<DataAllocatorHandle> +DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name, + uint32_t UserHeaderSize) { + // Allocate. + auto Offset = + Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + + // Construct the header and the name. + assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); + auto *H = new (Alloc.getRegion().data() + *Offset) + Header{{TableHandle::TableKind::DataAllocator, + static_cast<uint16_t>(Name.size()), + static_cast<int32_t>(sizeof(Header) + UserHeaderSize)}, + /*AllocatorOffset=*/{0}, + /*UserHeaderSize=*/UserHeaderSize}; + // Memset UserHeader. + char *UserHeader = reinterpret_cast<char *>(H + 1); + memset(UserHeader, 0, UserHeaderSize); + // Write database file name (null-terminated). + char *NameStorage = UserHeader + UserHeaderSize; + llvm::copy(Name, NameStorage); + NameStorage[Name.size()] = 0; + return DataAllocatorHandle(Alloc.getRegion(), *H); +} + +Expected<OnDiskDataAllocator> OnDiskDataAllocator::create( + const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize, + function_ref<void(void *)> UserHeaderInit) { + assert(!UserHeaderSize || UserHeaderInit); + SmallString<128> PathStorage; + StringRef Path = PathTwine.toStringRef(PathStorage); + SmallString<128> TableNameStorage; + StringRef TableName = TableNameTwine.toStringRef(TableNameStorage); + + // Constructor for if the file doesn't exist. + auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { + auto Store = + DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize); + if (LLVM_UNLIKELY(!Store)) + return Store.takeError(); + + if (auto E = DB.addTable(*Store)) + return E; + + if (UserHeaderSize) + UserHeaderInit(Store->getUserHeader().data()); + return Error::success(); + }; + + // Get or create the file. + Expected<DatabaseFile> File = + DatabaseFile::create(Path, MaxFileSize, NewDBConstructor); + if (!File) + return File.takeError(); + + // Find the table and validate it. + std::optional<TableHandle> Table = File->findTable(TableName); + if (!Table) + return createTableConfigError(std::errc::argument_out_of_domain, Path, + TableName, "table not found"); + if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind, + (size_t)Table->getHeader().Kind, Path, TableName)) + return std::move(E); + auto Store = Table->cast<DataAllocatorHandle>(); + assert(Store && "Already checked the kind"); + + // Success. + OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store}; + return OnDiskDataAllocator(std::make_unique<ImplType>(std::move(Impl))); +} + +Expected<OnDiskDataAllocator::OnDiskPtr> +OnDiskDataAllocator::allocate(size_t Size) { + auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size); + if (LLVM_UNLIKELY(!Data)) + return Data.takeError(); + + return OnDiskPtr(FileOffset(Data->data() - Impl->Store.getRegion().data()), + *Data); +} + +Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { + assert(Offset); + assert(Impl); + if (Offset.get() + Size >= Impl->File.getAlloc().size()) + return createStringError(make_error_code(std::errc::protocol_error), + "requested size too large in allocator"); + return ArrayRef<char>{Impl->File.getRegion().data() + Offset.get(), Size}; +} + +MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { + return Impl->Store.getUserHeader(); +} + +size_t OnDiskDataAllocator::size() const { return Impl->File.size(); } +size_t OnDiskDataAllocator::capacity() const { + return Impl->File.getRegion().size(); +} + +OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr<ImplType> Impl) + : Impl(std::move(Impl)) {} + +#else // !LLVM_ENABLE_ONDISK_CAS + +struct OnDiskDataAllocator::ImplType {}; + +Expected<OnDiskDataAllocator> OnDiskDataAllocator::create( + const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, uint32_t UserHeaderSize, + function_ref<void(void *)> UserHeaderInit) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +Expected<OnDiskDataAllocator::OnDiskPtr> +OnDiskDataAllocator::allocate(size_t Size) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +Expected<ArrayRef<char>> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +MutableArrayRef<uint8_t> OnDiskDataAllocator::getUserHeader() { return {}; } + +size_t OnDiskDataAllocator::size() const { return 0; } +size_t OnDiskDataAllocator::capacity() const { return 0; } + +#endif // LLVM_ENABLE_ONDISK_CAS + +OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator & +OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator::~OnDiskDataAllocator() = default; diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp index 9403893..323b21e 100644 --- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp +++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp @@ -427,7 +427,7 @@ TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc, return Record; } -Expected<OnDiskTrieRawHashMap::const_pointer> +Expected<OnDiskTrieRawHashMap::ConstOnDiskPtr> OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { // Check alignment. if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get())) @@ -448,17 +448,17 @@ OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { // Looks okay... TrieRawHashMapHandle::RecordData D = Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); - return const_pointer(D.Proxy, D.getFileOffset()); + return ConstOnDiskPtr(D.Proxy, D.getFileOffset()); } -OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::ConstOnDiskPtr OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { TrieRawHashMapHandle Trie = Impl->Trie; assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); SubtrieHandle S = Trie.getRoot(); if (!S) - return const_pointer(); + return ConstOnDiskPtr(); TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash); size_t Index = IndexGen.next(); @@ -466,13 +466,13 @@ OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { // Try to set the content. SubtrieSlotValue V = S.load(Index); if (!V) - return const_pointer(); + return ConstOnDiskPtr(); // Check for an exact match. if (V.isData()) { TrieRawHashMapHandle::RecordData D = Trie.getRecord(V); - return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset()) - : const_pointer(); + return D.Proxy.Hash == Hash ? ConstOnDiskPtr(D.Proxy, D.getFileOffset()) + : ConstOnDiskPtr(); } Index = IndexGen.next(); @@ -490,7 +490,7 @@ void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) { H->NumBits = NumBits; } -Expected<OnDiskTrieRawHashMap::pointer> +Expected<OnDiskTrieRawHashMap::OnDiskPtr> OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, LazyInsertOnConstructCB OnConstruct, LazyInsertOnLeakCB OnLeak) { @@ -523,7 +523,8 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, } if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset)) - return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset()); + return OnDiskPtr(NewRecord->Proxy, + NewRecord->Offset.asDataFileOffset()); // Race means that Existing is no longer empty; fall through... } @@ -540,8 +541,8 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, if (NewRecord && OnLeak) OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy, ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy); - return pointer(ExistingRecord.Proxy, - ExistingRecord.Offset.asDataFileOffset()); + return OnDiskPtr(ExistingRecord.Proxy, + ExistingRecord.Offset.asDataFileOffset()); } // Sink the existing content as long as the indexes match. @@ -1135,7 +1136,7 @@ OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, "OnDiskTrieRawHashMap is not supported"); } -Expected<OnDiskTrieRawHashMap::pointer> +Expected<OnDiskTrieRawHashMap::OnDiskPtr> OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, LazyInsertOnConstructCB OnConstruct, LazyInsertOnLeakCB OnLeak) { @@ -1143,15 +1144,15 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, "OnDiskTrieRawHashMap is not supported"); } -Expected<OnDiskTrieRawHashMap::const_pointer> +Expected<OnDiskTrieRawHashMap::ConstOnDiskPtr> OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { return createStringError(make_error_code(std::errc::not_supported), "OnDiskTrieRawHashMap is not supported"); } -OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::ConstOnDiskPtr OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { - return const_pointer(); + return ConstOnDiskPtr(); } void OnDiskTrieRawHashMap::print( diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 1a7a5c5..c3a472b 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -419,6 +419,7 @@ findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases, case Instruction::PtrToAddr: case Instruction::PtrToInt: case Instruction::BitCast: + case Instruction::AddrSpaceCast: case Instruction::GetElementPtr: return findBaseObject(CE->getOperand(0), Aliases, Op); default: diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp index c4188b3b..2f4e212 100644 --- a/llvm/lib/Option/ArgList.cpp +++ b/llvm/lib/Option/ArgList.cpp @@ -14,12 +14,14 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Option/Arg.h" #include "llvm/Option/OptSpecifier.h" +#include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> +#include <cstddef> #include <memory> #include <string> #include <utility> @@ -202,6 +204,42 @@ void ArgList::print(raw_ostream &O) const { LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); } #endif +StringRef ArgList::getSubCommand( + ArrayRef<OptTable::SubCommand> AllSubCommands, + std::function<void(ArrayRef<StringRef>)> HandleMultipleSubcommands, + std::function<void(ArrayRef<StringRef>)> HandleOtherPositionals) const { + + SmallVector<StringRef, 4> SubCommands; + SmallVector<StringRef, 4> OtherPositionals; + for (const Arg *A : *this) { + if (A->getOption().getKind() != Option::InputClass) + continue; + + size_t OldSize = SubCommands.size(); + for (const OptTable::SubCommand &CMD : AllSubCommands) { + if (StringRef(CMD.Name) == A->getValue()) + SubCommands.push_back(A->getValue()); + } + + if (SubCommands.size() == OldSize) + OtherPositionals.push_back(A->getValue()); + } + + // Invoke callbacks if necessary. + if (SubCommands.size() > 1) { + HandleMultipleSubcommands(SubCommands); + return {}; + } + if (!OtherPositionals.empty()) { + HandleOtherPositionals(OtherPositionals); + return {}; + } + + if (SubCommands.size() == 1) + return SubCommands.front(); + return {}; // No valid usage of subcommand found. +} + void InputArgList::releaseMemory() { // An InputArgList always owns its arguments. for (Arg *A : *this) diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 6d10e61..14e3b0d 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -79,9 +79,12 @@ OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {} OptTable::OptTable(const StringTable &StrTable, ArrayRef<StringTable::Offset> PrefixesTable, - ArrayRef<Info> OptionInfos, bool IgnoreCase) + ArrayRef<Info> OptionInfos, bool IgnoreCase, + ArrayRef<SubCommand> SubCommands, + ArrayRef<unsigned> SubCommandIDsTable) : StrTable(&StrTable), PrefixesTable(PrefixesTable), - OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) { + OptionInfos(OptionInfos), IgnoreCase(IgnoreCase), + SubCommands(SubCommands), SubCommandIDsTable(SubCommandIDsTable) { // Explicitly zero initialize the error to work around a bug in array // value-initialization on MinGW with gcc 4.3.5. @@ -715,9 +718,10 @@ static const char *getOptionHelpGroup(const OptTable &Opts, OptSpecifier Id) { void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title, bool ShowHidden, bool ShowAllAliases, - Visibility VisibilityMask) const { + Visibility VisibilityMask, + StringRef SubCommand) const { return internalPrintHelp( - OS, Usage, Title, ShowHidden, ShowAllAliases, + OS, Usage, Title, SubCommand, ShowHidden, ShowAllAliases, [VisibilityMask](const Info &CandidateInfo) -> bool { return (CandidateInfo.Visibility & VisibilityMask) == 0; }, @@ -730,7 +734,7 @@ void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title, bool ShowHidden = !(FlagsToExclude & HelpHidden); FlagsToExclude &= ~HelpHidden; return internalPrintHelp( - OS, Usage, Title, ShowHidden, ShowAllAliases, + OS, Usage, Title, /*SubCommand=*/{}, ShowHidden, ShowAllAliases, [FlagsToInclude, FlagsToExclude](const Info &CandidateInfo) { if (FlagsToInclude && !(CandidateInfo.Flags & FlagsToInclude)) return true; @@ -742,16 +746,62 @@ void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title, } void OptTable::internalPrintHelp( - raw_ostream &OS, const char *Usage, const char *Title, bool ShowHidden, - bool ShowAllAliases, std::function<bool(const Info &)> ExcludeOption, + raw_ostream &OS, const char *Usage, const char *Title, StringRef SubCommand, + bool ShowHidden, bool ShowAllAliases, + std::function<bool(const Info &)> ExcludeOption, Visibility VisibilityMask) const { OS << "OVERVIEW: " << Title << "\n\n"; - OS << "USAGE: " << Usage << "\n\n"; // Render help text into a map of group-name to a list of (option, help) // pairs. std::map<std::string, std::vector<OptionInfo>> GroupedOptionHelp; + auto ActiveSubCommand = + std::find_if(SubCommands.begin(), SubCommands.end(), + [&](const auto &C) { return SubCommand == C.Name; }); + if (!SubCommand.empty()) { + assert(ActiveSubCommand != SubCommands.end() && + "Not a valid registered subcommand."); + OS << ActiveSubCommand->HelpText << "\n\n"; + if (!StringRef(ActiveSubCommand->Usage).empty()) + OS << "USAGE: " << ActiveSubCommand->Usage << "\n\n"; + } else { + OS << "USAGE: " << Usage << "\n\n"; + if (SubCommands.size() > 1) { + OS << "SUBCOMMANDS:\n\n"; + for (const auto &C : SubCommands) + OS << C.Name << " - " << C.HelpText << "\n"; + OS << "\n"; + } + } + + auto DoesOptionBelongToSubcommand = [&](const Info &CandidateInfo) { + // Retrieve the SubCommandIDs registered to the given current CandidateInfo + // Option. + ArrayRef<unsigned> SubCommandIDs = + CandidateInfo.getSubCommandIDs(SubCommandIDsTable); + + // If no registered subcommands, then only global options are to be printed. + // If no valid SubCommand (empty) in commandline then print the current + // global CandidateInfo Option. + if (SubCommandIDs.empty()) + return SubCommand.empty(); + + // Handle CandidateInfo Option which has at least one registered SubCommand. + // If no valid SubCommand (empty) in commandline, this CandidateInfo option + // should not be printed. + if (SubCommand.empty()) + return false; + + // Find the ID of the valid subcommand passed in commandline (its index in + // the SubCommands table which contains all subcommands). + unsigned ActiveSubCommandID = ActiveSubCommand - &SubCommands[0]; + // Print if the ActiveSubCommandID is registered with the CandidateInfo + // Option. + return std::find(SubCommandIDs.begin(), SubCommandIDs.end(), + ActiveSubCommandID) != SubCommandIDs.end(); + }; + for (unsigned Id = 1, e = getNumOptions() + 1; Id != e; ++Id) { // FIXME: Split out option groups. if (getOptionKind(Id) == Option::GroupClass) @@ -764,6 +814,9 @@ void OptTable::internalPrintHelp( if (ExcludeOption(CandidateInfo)) continue; + if (!DoesOptionBelongToSubcommand(CandidateInfo)) + continue; + // If an alias doesn't have a help text, show a help text for the aliased // option instead. const char *HelpText = getOptionHelpText(Id, VisibilityMask); @@ -791,8 +844,11 @@ void OptTable::internalPrintHelp( GenericOptTable::GenericOptTable(const StringTable &StrTable, ArrayRef<StringTable::Offset> PrefixesTable, - ArrayRef<Info> OptionInfos, bool IgnoreCase) - : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase) { + ArrayRef<Info> OptionInfos, bool IgnoreCase, + ArrayRef<SubCommand> SubCommands, + ArrayRef<unsigned> SubCommandIDsTable) + : OptTable(StrTable, PrefixesTable, OptionInfos, IgnoreCase, SubCommands, + SubCommandIDsTable) { std::set<StringRef> TmpPrefixesUnion; for (auto const &Info : OptionInfos.drop_front(FirstSearchableIndex)) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 7069e8d..119caea 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1960,6 +1960,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // is fixed. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); + MPM.addPass(NoRecurseLTOInferencePass()); // Stop here at -O1. if (Level == OptimizationLevel::O1) { // The LowerTypeTestsPass needs to run to lower type metadata and the diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index f0e7d36..88550ea 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -119,6 +119,7 @@ MODULE_PASS("metarenamer", MetaRenamerPass()) MODULE_PASS("module-inline", ModuleInlinerPass()) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) +MODULE_PASS("norecurse-lto-inference", NoRecurseLTOInferencePass()) MODULE_PASS("nsan", NumericalStabilitySanitizerPass()) MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("openmp-opt-postlink", diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 50a8754..479e345 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5666,18 +5666,21 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost( VectorType *AccumVectorType = VectorType::get(AccumType, VF.divideCoefficientBy(Ratio)); // We don't yet support all kinds of legalization. - auto TA = TLI->getTypeAction(AccumVectorType->getContext(), - EVT::getEVT(AccumVectorType)); - switch (TA) { + auto TC = TLI->getTypeConversion(AccumVectorType->getContext(), + EVT::getEVT(AccumVectorType)); + switch (TC.first) { default: return Invalid; case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: case TargetLowering::TypeSplitVector: + // The legalised type (e.g. after splitting) must be legal too. + if (TLI->getTypeAction(AccumVectorType->getContext(), TC.second) != + TargetLowering::TypeLegal) + return Invalid; break; } - // Check what kind of type-legalisation happens. std::pair<InstructionCost, MVT> AccumLT = getTypeLegalizationCost(AccumVectorType); std::pair<InstructionCost, MVT> InputLT = diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 6b3c151..1a697f7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1448,10 +1448,10 @@ def Feature45BitNumRecordsBufferResource : SubtargetFeature< "45-bit-num-records "The buffer resource (V#) supports 45-bit num_records" >; -def FeatureCluster : SubtargetFeature< "cluster", - "HasCluster", +def FeatureClusters : SubtargetFeature< "clusters", + "HasClusters", "true", - "Has cluster support" + "Has clusters of workgroups support" >; // Dummy feature used to disable assembler instructions. @@ -2120,7 +2120,7 @@ def FeatureISAVersion12_50 : FeatureSet< Feature45BitNumRecordsBufferResource, FeatureSupportsXNACK, FeatureXNACK, - FeatureCluster, + FeatureClusters, ]>; def FeatureISAVersion12_51 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index a67a7be..d0c0822 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1944,6 +1944,7 @@ public: void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); + void cvtOpSelHelper(MCInst &Inst, unsigned OpSel); bool parseDimId(unsigned &Encoding); ParseStatus parseDim(OperandVector &Operands); @@ -9239,6 +9240,33 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { MCOI::OperandConstraint::TIED_TO) == -1; } +void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) { + unsigned Opc = Inst.getOpcode(); + constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1, + AMDGPU::OpName::src2}; + constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers}; + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); + if (OpIdx == -1) + // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but + // no src1. So continue instead of break. + continue; + + int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); + + if ((OpSel & (1 << J)) != 0) + ModVal |= SISrcMods::OP_SEL_0; + // op_sel[3] is encoded in src0_modifiers. + if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0) + ModVal |= SISrcMods::DST_OP_SEL; + + Inst.getOperand(ModIdx).setImm(ModVal); + } +} + void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; @@ -9275,6 +9303,16 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + + // Some v_interp instructions use op_sel[3] for dst. + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOpSel); + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + cvtOpSelHelper(Inst, OpSel); + } } void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) @@ -9310,31 +9348,10 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) if (OpSelIdx == -1) return; - const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1, - AMDGPU::OpName::src2}; - const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, - AMDGPU::OpName::src1_modifiers, - AMDGPU::OpName::src2_modifiers}; - unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); - - for (int J = 0; J < 3; ++J) { - int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); - if (OpIdx == -1) - break; - - int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); - uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); - - if ((OpSel & (1 << J)) != 0) - ModVal |= SISrcMods::OP_SEL_0; - if (ModOps[J] == AMDGPU::OpName::src0_modifiers && - (OpSel & (1 << 3)) != 0) - ModVal |= SISrcMods::DST_OP_SEL; - - Inst.getOperand(ModIdx).setImm(ModVal); - } + cvtOpSelHelper(Inst, OpSel); } + void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp index 7b94ea3..f291e37 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp @@ -541,7 +541,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const { unsigned GCNSubtarget::getBaseMaxNumVGPRs( const Function &F, std::pair<unsigned, unsigned> NumVGPRBounds) const { - const auto &[Min, Max] = NumVGPRBounds; + const auto [Min, Max] = NumVGPRBounds; // Check if maximum number of VGPRs was explicitly requested using // "amdgpu-num-vgpr" attribute. diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 879bf5a..c2e6078 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -288,7 +288,7 @@ protected: bool Has45BitNumRecordsBufferResource = false; - bool HasCluster = false; + bool HasClusters = false; // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1839,7 +1839,7 @@ public: } /// \returns true if the subtarget supports clusters of workgroups. - bool hasClusters() const { return HasCluster; } + bool hasClusters() const { return HasClusters; } /// \returns true if the subtarget requires a wait for xcnt before atomic /// flat/global stores & rmw. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index d3b5718..3563caa 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -1280,6 +1280,17 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI, (ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue; } + // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but no + // src1. + if (NumOps == 1 && AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src2) && + !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1)) { + Ops[NumOps++] = DefaultValue; // Set src1_modifiers to default. + int Mod2Idx = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); + assert(Mod2Idx != -1); + Ops[NumOps++] = MI->getOperand(Mod2Idx).getImm(); + } + const bool HasDst = (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1) || (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst) != -1); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 3c2dd42..3115579 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1118,12 +1118,7 @@ SIRegisterInfo::getPointerRegClass(unsigned Kind) const { const TargetRegisterClass * SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { - if (isAGPRClass(RC) && !ST.hasGFX90AInsts()) - return getEquivalentVGPRClass(RC); - if (RC == &AMDGPU::SCC_CLASSRegClass) - return getWaveMaskRegClass(); - - return RC; + return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC; } static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 4a2b54d..42ec8ba 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -97,6 +97,7 @@ class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> : VOP3_Pseudo<OpName, P, pattern> { let AsmMatchConverter = "cvtVOP3Interp"; let mayRaiseFPException = 0; + let VOP3_OPSEL = P.HasOpSel; } def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> { @@ -119,16 +120,17 @@ def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> { let HasSrc0Mods = 0; } -class getInterp16Asm <bit HasSrc2, bit HasOMod> { +class getInterp16Asm <bit HasSrc2, bit HasOMod, bit OpSel> { string src2 = !if(HasSrc2, ", $src2_modifiers", ""); string omod = !if(HasOMod, "$omod", ""); + string opsel = !if(OpSel, "$op_sel", ""); string ret = - " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod; + " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod#opsel; } class getInterp16Ins <bit HasSrc2, bit HasOMod, - Operand Src0Mod, Operand Src2Mod> { - dag ret = !if(HasSrc2, + Operand Src0Mod, Operand Src2Mod, bit OpSel> { + dag ret1 = !if(HasSrc2, !if(HasOMod, (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, InterpAttr:$attr, InterpAttrChan:$attrchan, @@ -143,19 +145,22 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod, InterpAttr:$attr, InterpAttrChan:$attrchan, highmod:$high, Clamp0:$clamp, omod0:$omod) ); + dag ret2 = !if(OpSel, (ins op_sel0:$op_sel), (ins)); + dag ret = !con(ret1, ret2); } -class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> { +class VOP3_INTERP16 <list<ValueType> ArgVT, bit OpSel = 0> : VOPProfile<ArgVT> { let IsSingle = 1; let HasOMod = !ne(DstVT.Value, f16.Value); let HasHigh = 1; + let HasOpSel = OpSel; let Src0Mod = FPVRegInputMods; let Src2Mod = FPVRegInputMods; let Outs64 = (outs DstRC.RegClass:$vdst); - let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod>.ret; - let Asm64 = getInterp16Asm<HasSrc2, HasOMod>.ret; + let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod, OpSel>.ret; + let Asm64 = getInterp16Asm<HasSrc2, HasOMod, OpSel>.ret; } //===----------------------------------------------------------------------===// @@ -480,7 +485,7 @@ let SubtargetPredicate = isGFX9Plus in { defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>; defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>; let OtherPredicates = [isNotGFX90APlus] in -def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>; +def V_INTERP_P2_F16_opsel : VOP3Interp <"v_interp_p2_f16_opsel", VOP3_INTERP16<[f16, f32, i32, f32], /*OpSel*/ 1>>; } // End SubtargetPredicate = isGFX9Plus // This predicate should only apply to the selection pattern. The @@ -2676,6 +2681,14 @@ multiclass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> } } +multiclass VOP3Interp_F16_OpSel_Real_gfx9<bits<10> op, string OpName, string AsmName> { + def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>, + VOP3Interp_OpSel_gfx9 <op, !cast<VOP3_Pseudo>(OpName).Pfl> { + VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName); + let AsmString = AsmName # ps.AsmOperands; + } +} + multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> { def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> { @@ -2788,7 +2801,7 @@ defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">; defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">; defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">; defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">; -defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">; +defm V_INTERP_P2_F16_opsel : VOP3Interp_F16_OpSel_Real_gfx9 <0x277, "V_INTERP_P2_F16_opsel", "v_interp_p2_f16">; defm V_ADD_I32 : VOP3_Real_vi <0x29c>; defm V_SUB_I32 : VOP3_Real_vi <0x29d>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 631f0f3..8325c62 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -419,6 +419,13 @@ class VOP3a_ScaleSel_gfx1250<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op, let Inst{14-11} = scale_sel; } +class VOP3Interp_OpSel_gfx9<bits<10> op, VOPProfile p> : VOP3Interp_vi<op, p> { + let Inst{11} = src0_modifiers{2}; + // There's no src1 + let Inst{13} = src2_modifiers{2}; + let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0); +} + class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> { bits<6> attr; bits<2> attrchan; diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 1fc475d..561a9c5 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -349,32 +349,30 @@ public: bool isImm() const override { return Kind == Immediate || Kind == Expression; } - bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); } - bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } - bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); } - bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); } - bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } - bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } - bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); } - bool isU6ImmX2() const { return Kind == Immediate && - isUInt<6>(getImm()) && - (getImm() & 1) == 0; } - bool isU7Imm() const { return Kind == Immediate && isUInt<7>(getImm()); } - bool isU7ImmX4() const { return Kind == Immediate && - isUInt<7>(getImm()) && - (getImm() & 3) == 0; } - bool isU8Imm() const { return Kind == Immediate && isUInt<8>(getImm()); } - bool isU8ImmX8() const { return Kind == Immediate && - isUInt<8>(getImm()) && - (getImm() & 7) == 0; } - - bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); } - bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); } + + template <uint64_t N> bool isUImm() const { + return Kind == Immediate && isUInt<N>(getImm()); + } + template <uint64_t N> bool isSImm() const { + return Kind == Immediate && isInt<N>(getImm()); + } + bool isU6ImmX2() const { return isUImm<6>() && (getImm() & 1) == 0; } + bool isU7ImmX4() const { return isUImm<7>() && (getImm() & 3) == 0; } + bool isU8ImmX8() const { return isUImm<8>() && (getImm() & 7) == 0; } + bool isU16Imm() const { return isExtImm<16>(/*Signed*/ false, 1); } bool isS16Imm() const { return isExtImm<16>(/*Signed*/ true, 1); } bool isS16ImmX4() const { return isExtImm<16>(/*Signed*/ true, 4); } bool isS16ImmX16() const { return isExtImm<16>(/*Signed*/ true, 16); } bool isS17Imm() const { return isExtImm<17>(/*Signed*/ true, 1); } + bool isS34Imm() const { + // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit + // ContextImmediate is needed. + return Kind == Expression || isSImm<34>(); + } + bool isS34ImmX16() const { + return Kind == Expression || (isSImm<34>() && (getImm() & 15) == 0); + } bool isHashImmX8() const { // The Hash Imm form is used for instructions that check or store a hash. @@ -384,16 +382,6 @@ public: (getImm() & 7) == 0); } - bool isS34ImmX16() const { - return Kind == Expression || - (Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0); - } - bool isS34Imm() const { - // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit - // ContextImmediate is needed. - return Kind == Expression || (Kind == Immediate && isInt<34>(getImm())); - } - bool isTLSReg() const { return Kind == TLSRegister; } bool isDirectBr() const { if (Kind == Expression) @@ -1637,7 +1625,7 @@ bool PPCAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name, if (Operands.size() != 5) return false; PPCOperand &EHOp = (PPCOperand &)*Operands[4]; - if (EHOp.isU1Imm() && EHOp.getImm() == 0) + if (EHOp.isUImm<1>() && EHOp.getImm() == 0) Operands.pop_back(); } @@ -1817,7 +1805,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, } PPCOperand &Op = static_cast<PPCOperand &>(AsmOp); - if (Op.isU3Imm() && Op.getImm() == ImmVal) + if (Op.isUImm<3>() && Op.getImm() == ImmVal) return Match_Success; return Match_InvalidOperand; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 48c31c9..81d8e94 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -206,45 +206,24 @@ PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo, return RegBits; } -unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); - - // Add a fixup for the immediate field. - addFixup(Fixups, IsLittleEndian ? 0 : 2, MO.getExpr(), PPC::fixup_ppc_half16); - return 0; -} - -uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, - MCFixupKind Fixup) const { +template <MCFixupKind Fixup> +uint64_t PPCMCCodeEmitter::getImmEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpNo); assert(!MO.isReg() && "Not expecting a register for this operand."); if (MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI); + uint32_t Offset = 0; + if (Fixup == PPC::fixup_ppc_half16) + Offset = IsLittleEndian ? 0 : 2; + // Add a fixup for the immediate field. - addFixup(Fixups, 0, MO.getExpr(), Fixup); + addFixup(Fixups, Offset, MO.getExpr(), Fixup); return 0; } -uint64_t -PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return getImm34Encoding(MI, OpNo, Fixups, STI, PPC::fixup_ppc_imm34); -} - -uint64_t -PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { - return getImm34Encoding(MI, OpNo, Fixups, STI, PPC::fixup_ppc_pcrel34); -} - unsigned PPCMCCodeEmitter::getDispRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h index b574557..3356513 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h @@ -47,19 +47,10 @@ public: unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, - MCFixupKind Fixup) const; - uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; + template <MCFixupKind Fixup> + uint64_t getImmEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getDispRIEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 60efa4c..fdca5ebc 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -14,30 +14,6 @@ //===----------------------------------------------------------------------===// // 64-bit operands. // -def s16imm64 : Operand<i64> { - let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCS16ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def u16imm64 : Operand<i64> { - let PrintMethod = "printU16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCU16ImmAsmOperand; - let DecoderMethod = "decodeUImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} -def s17imm64 : Operand<i64> { - // This operand type is used for addis/lis to allow the assembler parser - // to accept immediates in the range -65536..65535 for compatibility with - // the GNU assembler. The operand is treated as 16-bit otherwise. - let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; - let ParserMatchClass = PPCS17ImmAsmOperand; - let DecoderMethod = "decodeSImmOperand<16>"; - let OperandType = "OPERAND_IMMEDIATE"; -} def tocentry : Operand<iPTR> { let MIOperandInfo = (ops i64imm:$imm); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index c616db4..23d6d88 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -30,6 +30,11 @@ // Altivec transformation functions and pattern fragments. // +// fneg is not legal, and desugared as an xor. +def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x), + (int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)), + (bitconvert (v16i8 immAllOnesV))))))>; + def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG); @@ -467,11 +472,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB), [(set v4f32:$RT, (fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>; -// FIXME: The fma+fneg pattern won't match because fneg is not legal. +// fneg is not legal, hence we have to match on the desugared version. def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB), "vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP, - [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC, - (fneg v4f32:$RB))))]>; + [(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC, + (desugared_fneg v4f32:$RB))))]>; + let hasSideEffects = 1 in { def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, @@ -892,6 +898,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>; // Add def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>; + +// Fused negated multiply-subtract +def : Pat<(v4f32 (desugared_fneg + (int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC, + (desugared_fneg v4f32:$RB)))), + (VNMSUBFP $RA, $RC, $RB)>; + // Saturating adds/subtracts. def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>; def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 6d8c122..65d0484 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -615,7 +615,8 @@ def spe4rc : RegisterOperand<GPRC> { } def PPCU1ImmAsmOperand : AsmOperandClass { - let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; + let Name = "U1Imm"; + let PredicateMethod = "isUImm<1>"; let RenderMethod = "addImmOperands"; } def u1imm : Operand<i32> { @@ -626,7 +627,8 @@ def u1imm : Operand<i32> { } def PPCU2ImmAsmOperand : AsmOperandClass { - let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; + let Name = "U2Imm"; + let PredicateMethod = "isUImm<2>"; let RenderMethod = "addImmOperands"; } def u2imm : Operand<i32> { @@ -647,7 +649,8 @@ def atimm : Operand<i32> { } def PPCU3ImmAsmOperand : AsmOperandClass { - let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; + let Name = "U3Imm"; + let PredicateMethod = "isUImm<3>"; let RenderMethod = "addImmOperands"; } def u3imm : Operand<i32> { @@ -658,7 +661,8 @@ def u3imm : Operand<i32> { } def PPCU4ImmAsmOperand : AsmOperandClass { - let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; + let Name = "U4Imm"; + let PredicateMethod = "isUImm<4>"; let RenderMethod = "addImmOperands"; } def u4imm : Operand<i32> { @@ -668,7 +672,8 @@ def u4imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCS5ImmAsmOperand : AsmOperandClass { - let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; + let Name = "S5Imm"; + let PredicateMethod = "isSImm<5>"; let RenderMethod = "addImmOperands"; } def s5imm : Operand<i32> { @@ -678,7 +683,8 @@ def s5imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU5ImmAsmOperand : AsmOperandClass { - let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; + let Name = "U5Imm"; + let PredicateMethod = "isUImm<5>"; let RenderMethod = "addImmOperands"; } def u5imm : Operand<i32> { @@ -688,7 +694,8 @@ def u5imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU6ImmAsmOperand : AsmOperandClass { - let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; + let Name = "U6Imm"; + let PredicateMethod = "isUImm<6>"; let RenderMethod = "addImmOperands"; } def u6imm : Operand<i32> { @@ -698,7 +705,8 @@ def u6imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU7ImmAsmOperand : AsmOperandClass { - let Name = "U7Imm"; let PredicateMethod = "isU7Imm"; + let Name = "U7Imm"; + let PredicateMethod = "isUImm<7>"; let RenderMethod = "addImmOperands"; } def u7imm : Operand<i32> { @@ -708,7 +716,8 @@ def u7imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU8ImmAsmOperand : AsmOperandClass { - let Name = "U8Imm"; let PredicateMethod = "isU8Imm"; + let Name = "U8Imm"; + let PredicateMethod = "isUImm<8>"; let RenderMethod = "addImmOperands"; } def u8imm : Operand<i32> { @@ -718,7 +727,8 @@ def u8imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU10ImmAsmOperand : AsmOperandClass { - let Name = "U10Imm"; let PredicateMethod = "isU10Imm"; + let Name = "U10Imm"; + let PredicateMethod = "isUImm<10>"; let RenderMethod = "addImmOperands"; } def u10imm : Operand<i32> { @@ -728,7 +738,8 @@ def u10imm : Operand<i32> { let OperandType = "OPERAND_IMMEDIATE"; } def PPCU12ImmAsmOperand : AsmOperandClass { - let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; + let Name = "U12Imm"; + let PredicateMethod = "isUImm<12>"; let RenderMethod = "addImmOperands"; } def u12imm : Operand<i32> { @@ -743,7 +754,14 @@ def PPCS16ImmAsmOperand : AsmOperandClass { } def s16imm : Operand<i32> { let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; + let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def s16imm64 : Operand<i64> { + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; let ParserMatchClass = PPCS16ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; let OperandType = "OPERAND_IMMEDIATE"; @@ -754,7 +772,14 @@ def PPCU16ImmAsmOperand : AsmOperandClass { } def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; + let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def u16imm64 : Operand<i64> { + let PrintMethod = "printU16ImmOperand"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; let ParserMatchClass = PPCU16ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<16>"; let OperandType = "OPERAND_IMMEDIATE"; @@ -768,7 +793,17 @@ def s17imm : Operand<i32> { // to accept immediates in the range -65536..65535 for compatibility with // the GNU assembler. The operand is treated as 16-bit otherwise. let PrintMethod = "printS16ImmOperand"; - let EncoderMethod = "getImm16Encoding"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; + let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; + let OperandType = "OPERAND_IMMEDIATE"; +} +def s17imm64 : Operand<i64> { + // This operand type is used for addis/lis to allow the assembler parser + // to accept immediates in the range -65536..65535 for compatibility with + // the GNU assembler. The operand is treated as 16-bit otherwise. + let PrintMethod = "printS16ImmOperand"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>"; let ParserMatchClass = PPCS17ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; let OperandType = "OPERAND_IMMEDIATE"; @@ -780,14 +815,14 @@ def PPCS34ImmAsmOperand : AsmOperandClass { } def s34imm : Operand<i64> { let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingNoPCRel"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_imm34>"; let ParserMatchClass = PPCS34ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<34>"; let OperandType = "OPERAND_IMMEDIATE"; } def s34imm_pcrel : Operand<i64> { let PrintMethod = "printS34ImmOperand"; - let EncoderMethod = "getImm34EncodingPCRel"; + let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_pcrel34>"; let ParserMatchClass = PPCS34ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<34>"; let OperandType = "OPERAND_IMMEDIATE"; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 597dd12..9f9ae2f 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -324,6 +324,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = GPRValueMapping; + // Atomics always use GPR destinations. Don't refine any further. + if (cast<GLoad>(MI).isAtomic()) + break; + // Use FPR64 for s64 loads on rv32. if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); @@ -358,6 +362,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = GPRValueMapping; + // Atomics always use GPR sources. Don't refine any further. + if (cast<GStore>(MI).isAtomic()) + break; + // Use FPR64 for s64 stores on rv32. if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD()); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index a02de31..27cf057 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1421,7 +1421,7 @@ def HasVendorXMIPSCMov : Predicate<"Subtarget->hasVendorXMIPSCMov()">, AssemblerPredicate<(all_of FeatureVendorXMIPSCMov), "'Xmipscmov' ('mips.ccmov' instruction)">; -def UseCCMovInsn : Predicate<"Subtarget->useCCMovInsn()">; +def UseMIPSCCMovInsn : Predicate<"Subtarget->useMIPSCCMovInsn()">; def FeatureVendorXMIPSLSP : RISCVExtension<1, 0, "MIPS optimization for hardware load-store bonding">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index dcce2d2..b624076 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -434,7 +434,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ABS, MVT::i32, Custom); } - if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov()) + if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov()) setOperationAction(ISD::SELECT, XLenVT, Custom); if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td index 115ab38e..0b5bee1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td @@ -175,7 +175,7 @@ def MIPS_CCMOV : RVInstR4<0b11, 0b011, OPC_CUSTOM_0, (outs GPR:$rd), Sched<[]>; } -let Predicates = [UseCCMovInsn] in { +let Predicates = [UseMIPSCCMovInsn] in { def : Pat<(select (riscv_setne (XLenVT GPR:$rs2)), (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), (MIPS_CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp index c81a20b..115a96e 100644 --- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp @@ -92,7 +92,7 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>(); - if (!Subtarget.useLoadStorePairs()) + if (!Subtarget.useMIPSLoadStorePairs()) return false; bool MadeChange = false; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index e35ffaf..715ac4c 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -65,9 +65,9 @@ static cl::opt<bool> UseMIPSLoadStorePairsOpt( cl::desc("Enable the load/store pair optimization pass"), cl::init(false), cl::Hidden); -static cl::opt<bool> UseCCMovInsn("use-riscv-ccmov", - cl::desc("Use 'mips.ccmov' instruction"), - cl::init(true), cl::Hidden); +static cl::opt<bool> UseMIPSCCMovInsn("use-riscv-mips-ccmov", + cl::desc("Use 'mips.ccmov' instruction"), + cl::init(true), cl::Hidden); void RISCVSubtarget::anchor() {} @@ -246,10 +246,10 @@ void RISCVSubtarget::overridePostRASchedPolicy( } } -bool RISCVSubtarget::useLoadStorePairs() const { +bool RISCVSubtarget::useMIPSLoadStorePairs() const { return UseMIPSLoadStorePairsOpt && HasVendorXMIPSLSP; } -bool RISCVSubtarget::useCCMovInsn() const { - return UseCCMovInsn && HasVendorXMIPSCMov; +bool RISCVSubtarget::useMIPSCCMovInsn() const { + return UseMIPSCCMovInsn && HasVendorXMIPSCMov; } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 7dffa63..6acf799 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -227,8 +227,8 @@ public: unsigned getXLen() const { return is64Bit() ? 64 : 32; } - bool useLoadStorePairs() const; - bool useCCMovInsn() const; + bool useMIPSLoadStorePairs() const; + bool useMIPSCCMovInsn() const; unsigned getFLen() const { if (HasStdExtD) return 64; diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 9f2e075..e16c8f0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -2811,9 +2811,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { GetElementPtrInst *NewGEP = simplifyZeroLengthArrayGepInst(Ref); if (NewGEP) { Ref->replaceAllUsesWith(NewGEP); - if (isInstructionTriviallyDead(Ref)) - DeadInsts.insert(Ref); - + DeadInsts.insert(Ref); Ref = NewGEP; } if (Type *GepTy = getGEPType(Ref)) diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index b906690..62a3c88 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -444,7 +444,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["atomic-fmin-fmax-global-f32"] = true; Features["atomic-fmin-fmax-global-f64"] = true; Features["wavefrontsize32"] = true; - Features["cluster"] = true; + Features["clusters"] = true; break; case GK_GFX1201: case GK_GFX1200: diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 8d9a0e7..50130da 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -2067,6 +2067,36 @@ static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, AI.run(SCCNodes, Changed); } +// Determines if the function 'F' can be marked 'norecurse'. +// It returns true if any call within 'F' could lead to a recursive +// call back to 'F', and false otherwise. +// The 'AnyFunctionsAddressIsTaken' parameter is a module-wide flag +// that is true if any function's address is taken, or if any function +// has external linkage. This is used to determine the safety of +// external/library calls. +static bool mayHaveRecursiveCallee(Function &F, + bool AnyFunctionsAddressIsTaken = true) { + for (const auto &BB : F) { + for (const auto &I : BB.instructionsWithoutDebug()) { + if (const auto *CB = dyn_cast<CallBase>(&I)) { + const Function *Callee = CB->getCalledFunction(); + if (!Callee || Callee == &F) + return true; + + if (Callee->doesNotRecurse()) + continue; + + if (!AnyFunctionsAddressIsTaken || + (Callee->isDeclaration() && + Callee->hasFnAttribute(Attribute::NoCallback))) + continue; + return true; + } + } + } + return false; +} + static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, SmallPtrSet<Function *, 8> &Changed) { // Try and identify functions that do not recurse. @@ -2078,28 +2108,14 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, Function *F = *SCCNodes.begin(); if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) return; - - // If all of the calls in F are identifiable and are to norecurse functions, F - // is norecurse. This check also detects self-recursion as F is not currently - // marked norecurse, so any called from F to F will not be marked norecurse. - for (auto &BB : *F) - for (auto &I : BB.instructionsWithoutDebug()) - if (auto *CB = dyn_cast<CallBase>(&I)) { - Function *Callee = CB->getCalledFunction(); - if (!Callee || Callee == F || - (!Callee->doesNotRecurse() && - !(Callee->isDeclaration() && - Callee->hasFnAttribute(Attribute::NoCallback)))) - // Function calls a potentially recursive function. - return; - } - - // Every call was to a non-recursive function other than this function, and - // we have no indirect recursion as the SCC size is one. This function cannot - // recurse. - F->setDoesNotRecurse(); - ++NumNoRecurse; - Changed.insert(F); + if (!mayHaveRecursiveCallee(*F)) { + // Every call was to a non-recursive function other than this function, and + // we have no indirect recursion as the SCC size is one. This function + // cannot recurse. + F->setDoesNotRecurse(); + ++NumNoRecurse; + Changed.insert(F); + } } // Set the noreturn function attribute if possible. @@ -2429,3 +2445,62 @@ ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { PA.preserve<LazyCallGraphAnalysis>(); return PA; } + +PreservedAnalyses NoRecurseLTOInferencePass::run(Module &M, + ModuleAnalysisManager &MAM) { + + // Check if any function in the whole program has its address taken or has + // potentially external linkage. + // We use this information when inferring norecurse attribute: If there is + // no function whose address is taken and all functions have internal + // linkage, there is no path for a callback to any user function. + bool AnyFunctionsAddressIsTaken = false; + for (Function &F : M) { + if (F.isDeclaration() || F.doesNotRecurse()) + continue; + if (!F.hasLocalLinkage() || F.hasAddressTaken()) { + AnyFunctionsAddressIsTaken = true; + break; + } + } + + // Run norecurse inference on all RefSCCs in the LazyCallGraph for this + // module. + bool Changed = false; + LazyCallGraph &CG = MAM.getResult<LazyCallGraphAnalysis>(M); + CG.buildRefSCCs(); + + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { + // Skip any RefSCC that is part of a call cycle. A RefSCC containing more + // than one SCC indicates a recursive relationship involving indirect calls. + if (RC.size() > 1) + continue; + + // RefSCC contains a single-SCC. SCC size > 1 indicates mutually recursive + // functions. Ex: foo1 -> foo2 -> foo3 -> foo1. + LazyCallGraph::SCC &S = *RC.begin(); + if (S.size() > 1) + continue; + + // Get the single function from this SCC. + Function &F = S.begin()->getFunction(); + if (!F.hasExactDefinition() || F.doesNotRecurse()) + continue; + + // If the analysis confirms that this function has no recursive calls + // (either direct, indirect, or through external linkages), + // we can safely apply the norecurse attribute. + if (!mayHaveRecursiveCallee(F, AnyFunctionsAddressIsTaken)) { + F.setDoesNotRecurse(); + ++NumNoRecurse; + Changed = true; + } + } + + PreservedAnalyses PA; + if (Changed) + PA.preserve<LazyCallGraphAnalysis>(); + else + PA = PreservedAnalyses::all(); + return PA; +} diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 8f60e50..8c8fc69 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3356,7 +3356,10 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { impliesPoisonOrCond(FalseVal, B, /*Expected=*/false)) { // (A || B) || C --> A || (B | C) return replaceInstUsesWith( - SI, Builder.CreateLogicalOr(A, Builder.CreateOr(B, FalseVal))); + SI, Builder.CreateLogicalOr(A, Builder.CreateOr(B, FalseVal), "", + ProfcheckDisableMetadataFixes + ? nullptr + : cast<SelectInst>(CondVal))); } // (A && B) || (C && B) --> (A || C) && B @@ -3398,7 +3401,10 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { impliesPoisonOrCond(TrueVal, B, /*Expected=*/true)) { // (A && B) && C --> A && (B & C) return replaceInstUsesWith( - SI, Builder.CreateLogicalAnd(A, Builder.CreateAnd(B, TrueVal))); + SI, Builder.CreateLogicalAnd(A, Builder.CreateAnd(B, TrueVal), "", + ProfcheckDisableMetadataFixes + ? nullptr + : cast<SelectInst>(CondVal))); } // (A || B) && (C || B) --> (A && C) || B diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 148bfa8..b8cfe3a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4895,9 +4895,8 @@ bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm, // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); - if (TrueWeight != FalseWeight) - setBranchWeights(*NewBI, {TrueWeight, FalseWeight}, - /*IsExpected=*/false, /*ElideAllZero=*/true); + setBranchWeights(*NewBI, {TrueWeight, FalseWeight}, + /*IsExpected=*/false, /*ElideAllZero=*/true); } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this @@ -4982,9 +4981,15 @@ bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI, BasicBlock *TrueBB = TBA->getBasicBlock(); BasicBlock *FalseBB = FBA->getBasicBlock(); + // The select's profile becomes the profile of the conditional branch that + // replaces the indirect branch. + SmallVector<uint32_t> SelectBranchWeights(2); + if (!ProfcheckDisableMetadataFixes) + extractBranchWeights(*SI, SelectBranchWeights); // Perform the actual simplification. - return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0, - 0); + return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, + SelectBranchWeights[0], + SelectBranchWeights[1]); } /// This is called when we find an icmp instruction @@ -7952,19 +7957,27 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; + SmallVector<uint32_t> BranchWeights; + const bool HasBranchWeights = !ProfcheckDisableMetadataFixes && + extractBranchWeights(*IBI, BranchWeights); + + DenseMap<const BasicBlock *, uint64_t> TargetWeight; + if (HasBranchWeights) + for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I) + TargetWeight[IBI->getDestination(I)] += BranchWeights[I]; // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; SmallSetVector<BasicBlock *, 8> RemovedSuccs; - for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - BasicBlock *Dest = IBI->getDestination(i); + for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) { + BasicBlock *Dest = IBI->getDestination(I); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { if (!Dest->hasAddressTaken()) RemovedSuccs.insert(Dest); Dest->removePredecessor(BB); - IBI->removeDestination(i); - --i; - --e; + IBI->removeDestination(I); + --I; + --E; Changed = true; } } @@ -7990,7 +8003,12 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { eraseTerminatorAndDCECond(IBI); return true; } - + if (HasBranchWeights) { + SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations()); + for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I) + NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second; + setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false); + } if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (simplifyIndirectBrOnSelect(IBI, SI)) return requestResimplify(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e434e73..d393a9c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8393,11 +8393,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( R->setOperand(1, WideIV->getStepValue()); } - VPlanTransforms::runPass( - VPlanTransforms::addExitUsersForFirstOrderRecurrences, *Plan, Range); + // TODO: We can't call runPass on these transforms yet, due to verifier + // failures. + VPlanTransforms::addExitUsersForFirstOrderRecurrences(*Plan, Range); DenseMap<VPValue *, VPValue *> IVEndValues; - VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan, - RecipeBuilder, IVEndValues); + VPlanTransforms::addScalarResumePhis(*Plan, RecipeBuilder, IVEndValues); // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to @@ -8508,8 +8508,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { DenseMap<VPValue *, VPValue *> IVEndValues; // TODO: IVEndValues are not used yet in the native path, to optimize exit // values. - VPlanTransforms::runPass(VPlanTransforms::addScalarResumePhis, *Plan, - RecipeBuilder, IVEndValues); + // TODO: We can't call runPass on the transform yet, due to verifier + // failures. + VPlanTransforms::addScalarResumePhis(*Plan, RecipeBuilder, IVEndValues); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; diff --git a/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll b/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll new file mode 100644 index 0000000..fe4f05e --- /dev/null +++ b/llvm/test/Bitcode/thinlto-alias-addrspacecast.ll @@ -0,0 +1,7 @@ +; RUN: opt -module-summary < %s | llvm-dis | FileCheck %s + +@__oclc_ABI_version = linkonce_odr hidden addrspace(4) constant i32 500, align 4 +@_ZL20__oclc_ABI_version__ = internal alias i32, addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr) + +; CHECK: ^1 = gv: (name: "__oclc_ABI_version", summaries: (variable: (module: ^0, flags: {{.*}}))) +; CHECK: ^2 = gv: (name: "_ZL20__oclc_ABI_version__", summaries: (alias: (module: ^0, flags: {{.*}}, aliasee: ^1))) diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index 9e24023..ebbeab9 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -146,9 +146,9 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v32 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND @@ -437,9 +437,9 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 { ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: s_nop 7 -; GFX908-NEXT: v_accvgpr_read_b32 v33, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v35, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v33 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v35 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND @@ -1045,9 +1045,9 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 { ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; copy ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 +; GFX908-NEXT: v_accvgpr_read_b32 v39, a2 ; GFX908-NEXT: s_nop 1 -; GFX908-NEXT: v_accvgpr_write_b32 a3, v32 +; GFX908-NEXT: v_accvgpr_write_b32 a3, v39 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use a3 v[0:31] ; GFX908-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir index a42cf43..7e82382d 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir @@ -40,8 +40,8 @@ body: | ; GFX908: liveins: $agpr0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec - ; GFX908-NEXT: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec - ; GFX908-NEXT: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec + ; GFX908-NEXT: renamable $agpr1 = COPY $agpr0, implicit $exec + ; GFX908-NEXT: renamable $agpr2 = COPY $agpr0, implicit $exec ; GFX908-NEXT: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2 ; ; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll index c4479b3..e3bc516 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll @@ -15,6 +15,9 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx950 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX950 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1250 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX1250 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1250 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX1250 %s + ; NO-SRAM-ECC-GFX906: Flags [ ; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100) ; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F) @@ -52,6 +55,11 @@ ; SRAM-ECC-GFX950: EF_AMDGPU_MACH_AMDGCN_GFX950 (0x4F) ; SRAM-ECC-GFX950: ] +; SRAM-ECC-GFX1250: Flags [ +; SRAM-ECC-GFX1250: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200) +; SRAM-ECC-GFX1250: EF_AMDGPU_MACH_AMDGCN_GFX1250 (0x49) +; SRAM-ECC-GFX1250: ] + define amdgpu_kernel void @elf_header() { ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll index 51cd564..f46116e 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll @@ -95,66 +95,66 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 { ; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v3, v0, a[0:31] ; GREEDY908-NEXT: s_nop 15 ; GREEDY908-NEXT: s_nop 1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a32 -; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a61 -; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60 -; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a33 -; GREEDY908-NEXT: v_accvgpr_read_b32 v7, a59 -; GREEDY908-NEXT: v_accvgpr_read_b32 v8, a58 -; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a32 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a33 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a34 -; GREEDY908-NEXT: v_accvgpr_read_b32 v9, a57 -; GREEDY908-NEXT: v_accvgpr_read_b32 v10, a56 +; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a4, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a35 -; GREEDY908-NEXT: v_accvgpr_read_b32 v11, a55 -; GREEDY908-NEXT: v_accvgpr_read_b32 v12, a54 -; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a36 -; GREEDY908-NEXT: v_accvgpr_read_b32 v13, a53 -; GREEDY908-NEXT: v_accvgpr_read_b32 v14, a52 -; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a35 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a36 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a37 -; GREEDY908-NEXT: v_accvgpr_read_b32 v15, a51 -; GREEDY908-NEXT: v_accvgpr_read_b32 v16, a50 +; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a7, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a38 -; GREEDY908-NEXT: v_accvgpr_read_b32 v17, a49 -; GREEDY908-NEXT: v_accvgpr_read_b32 v18, a48 -; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a39 -; GREEDY908-NEXT: v_accvgpr_read_b32 v19, a47 -; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a46 -; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a38 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a39 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a40 -; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v2 -; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v19 +; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a10, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a41 -; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v18 -; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v17 -; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a42 -; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v16 -; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v15 -; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a41 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a42 ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a43 -; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v14 -; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v13 +; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v6 ; GREEDY908-NEXT: v_accvgpr_write_b32 a13, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a44 -; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v12 -; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v11 -; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v1 -; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a45 -; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v10 -; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v9 -; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v1 -; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v8 -; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v7 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a44 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a45 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a46 +; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a47 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a48 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a49 +; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a50 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a51 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a52 +; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a53 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a54 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a55 +; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a56 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a57 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a58 +; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v2 +; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v6 +; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v1 +; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a59 +; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60 +; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a61 +; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v2 ; GREEDY908-NEXT: v_accvgpr_write_b32 a30, v6 -; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v5 +; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v1 ; GREEDY908-NEXT: s_nop 0 ; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v3, v0, a[0:31] ; GREEDY908-NEXT: s_nop 15 @@ -667,11 +667,11 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 { ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[18:33], v0, v1, a[18:33] ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[2:17], v0, v1, a[18:33] ; GREEDY908-NEXT: s_nop 8 +; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a18 ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a19 -; GREEDY908-NEXT: v_accvgpr_read_b32 v3, a18 ; GREEDY908-NEXT: s_nop 0 +; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v5 ; GREEDY908-NEXT: v_accvgpr_write_b32 a1, v2 -; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v3 ; GREEDY908-NEXT: s_nop 0 ; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[0:15] ; GREEDY908-NEXT: s_nop 9 diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll index cf244f0..be1788c 100644 --- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll +++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll @@ -54,19 +54,20 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX908-NEXT: s_branch .LBB0_2 ; GFX908-NEXT: .LBB0_1: ; %bb2 ; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1 +; GFX908-NEXT: s_nop 6 +; GFX908-NEXT: v_accvgpr_read_b32 v3, a2 ; GFX908-NEXT: s_or_b32 s4, s3, 1 ; GFX908-NEXT: s_ashr_i32 s5, s3, 31 ; GFX908-NEXT: s_mov_b32 s3, s2 ; GFX908-NEXT: v_mov_b32_e32 v1, s2 -; GFX908-NEXT: s_nop 2 -; GFX908-NEXT: v_accvgpr_read_b32 v0, a2 ; GFX908-NEXT: v_mov_b32_e32 v2, s3 +; GFX908-NEXT: v_accvgpr_write_b32 a0, v3 ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 ; GFX908-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX908-NEXT: v_accvgpr_write_b32 a0, v0 +; GFX908-NEXT: s_and_b32 s3, s5, s4 ; GFX908-NEXT: v_accvgpr_write_b32 a2, v4 ; GFX908-NEXT: v_accvgpr_write_b32 a3, v3 -; GFX908-NEXT: s_and_b32 s3, s5, s4 +; GFX908-NEXT: s_nop 0 ; GFX908-NEXT: v_mfma_f32_16x16x16f16 a[2:5], v[1:2], v[1:2], a[0:3] ; GFX908-NEXT: s_cbranch_execz .LBB0_4 ; GFX908-NEXT: .LBB0_2: ; %bb diff --git a/llvm/test/CodeGen/PowerPC/vec-nmsub.ll b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll new file mode 100644 index 0000000..8f4ac972 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec-nmsub.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs < %s -mcpu=pwr5 -mtriple=ppc32-- -mattr=+altivec | FileCheck %s + +define dso_local <4 x float> @intrinsic(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr { +; CHECK-LABEL: intrinsic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x float> @llvm.ppc.altivec.vnmsubfp(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %0 +} + +define <4 x float> @manual_llvm_fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) unnamed_addr { +; CHECK-LABEL: manual_llvm_fma: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +start: + %0 = fneg <4 x float> %c + %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %0) + %2 = fneg <4 x float> %1 + ret <4 x float> %2 +} + +define dso_local <4 x float> @manual_vmaddfp(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) local_unnamed_addr { +; CHECK-LABEL: manual_vmaddfp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnmsubfp 2, 2, 3, 4 +; CHECK-NEXT: blr +entry: + %fneg.i3 = fneg <4 x float> %c + %0 = tail call <4 x float> @llvm.ppc.altivec.vmaddfp(<4 x float> %a, <4 x float> %b, <4 x float> %fneg.i3) + %fneg.i = fneg <4 x float> %0 + ret <4 x float> %fneg.i +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll new file mode 100644 index 0000000..4ad2d2c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll @@ -0,0 +1,950 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+ztso,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s + + +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s + +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s + + +define float @atomic_load_f32_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: fmv.w.x fa0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: fmv.w.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic float, ptr %a unordered, align 4 + ret float %1 +} + +define float @atomic_load_f32_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: fmv.w.x fa0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: fmv.w.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic float, ptr %a monotonic, align 4 + ret float %1 +} + +define float @atomic_load_f32_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_f32_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_f32_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f32_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f32_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic float, ptr %a acquire, align 4 + ret float %1 +} + +define float @atomic_load_f32_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_f32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_f32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic float, ptr %a seq_cst, align 4 + ret float %1 +} + +define double @atomic_load_f64_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: fmv.d.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic double, ptr %a unordered, align 8 + ret double %1 +} + +define double @atomic_load_f64_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: fmv.d.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic double, ptr %a monotonic, align 8 + ret double %1 +} + +define double @atomic_load_f64_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 2 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f64_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f64_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f64_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f64_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic double, ptr %a acquire, align 8 + ret double %1 +} + +define double @atomic_load_f64_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 5 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic double, ptr %a seq_cst, align 8 + ret double %1 +} + +define void @atomic_store_f32_unordered(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: fmv.x.w a1, fa0 +; RV32IA-NEXT: sw a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.w a1, fa0 +; RV64IA-NEXT: sw a1, 0(a0) +; RV64IA-NEXT: ret + store atomic float %b, ptr %a unordered, align 4 + ret void +} + +define void @atomic_store_f32_monotonic(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: fmv.x.w a1, fa0 +; RV32IA-NEXT: sw a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.w a1, fa0 +; RV64IA-NEXT: sw a1, 0(a0) +; RV64IA-NEXT: ret + store atomic float %b, ptr %a monotonic, align 4 + ret void +} + +define void @atomic_store_f32_release(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 3 +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_f32_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-NEXT: sw a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_f32_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f32_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-NEXT: sw a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f32_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic float %b, ptr %a release, align 4 + ret void +} + +define void @atomic_store_f32_seq_cst(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_f32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-NEXT: sw a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_f32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-NEXT: sw a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic float %b, ptr %a seq_cst, align 4 + ret void +} + +define void @atomic_store_f64_unordered(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 0 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.d a1, fa0 +; RV64IA-NEXT: sd a1, 0(a0) +; RV64IA-NEXT: ret + store atomic double %b, ptr %a unordered, align 8 + ret void +} + +define void @atomic_store_f64_monotonic(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 0 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.d a1, fa0 +; RV64IA-NEXT: sd a1, 0(a0) +; RV64IA-NEXT: ret + store atomic double %b, ptr %a monotonic, align 8 + ret void +} + +define void @atomic_store_f64_release(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_release: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 3 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f64_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-NEXT: sd a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f64_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f64_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f64_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic double %b, ptr %a release, align 8 + ret void +} + +define void @atomic_store_f64_seq_cst(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-NEXT: sd a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic double %b, ptr %a seq_cst, align 8 + ret void +} diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s index f98f33a..50a7433 100644 --- a/llvm/test/MC/AMDGPU/vop3-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -566,6 +566,141 @@ v_interp_p2_f16 v5, v2, attr0.x, v3 clamp // NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,1] ; encoding: [0x05,0x60,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,0,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,1,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,1] ; encoding: [0x05,0x60,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,1] ; encoding: [0x05,0x48,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,1] ; encoding: [0x05,0x68,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,0,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,1] ; encoding: [0x05,0x48,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1,0] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,1] ; encoding: [0x05,0x68,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04] // NOGCN: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt index 802d6368..60f058d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt @@ -19311,6 +19311,27 @@ # CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04] 0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04 +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04 + +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04 + +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,1] ; encoding: [0x05,0x60,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0x60,0x77,0xd2,0x00,0x04,0x0e,0x04 + +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04 + +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,1] ; encoding: [0x05,0x48,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0x48,0x77,0xd2,0x00,0x04,0x0e,0x04 + +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04 + +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,1] ; encoding: [0x05,0x68,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0x68,0x77,0xd2,0x00,0x04,0x0e,0x04 + # CHECK: v_add_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00] 0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00 diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 3aea0f2..f595dfe 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -67,6 +67,7 @@ ; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: GlobalSplitPass ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass +; CHECK-O-NEXT: Running pass: NoRecurseLTOInferencePass ; CHECK-O23SZ-NEXT: Running pass: CoroEarlyPass ; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O23SZ-NEXT: Running pass: GlobalOptPass diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll new file mode 100644 index 0000000..bcdf75b --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_address_taken.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call to a library function which is not marked as +; NoCallback. Function bob() does not have internal linkage and hence prevents +; norecurse to be added. + +@.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1 + +;. +; CHECK: @.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1 +;. +define dso_local void @bob() { +; CHECK-LABEL: define dso_local void @bob() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str) +; CHECK-NEXT: ret void +; +entry: + %call = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str) + ret void +} + +declare i32 @printf(ptr readonly captures(none), ...) + +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bob() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @bob() + ret i32 0 +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll new file mode 100644 index 0000000..a03b4ca --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_libfunc_no_address_taken.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call to a library function which is not marked as +; NoCallback. All functions except main() are internal and main is marked +; norecurse, so as to not block norecurse to be added to bob(). + +@.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1 + +; Function Attrs: nofree noinline nounwind uwtable +;. +; CHECK: @.str = private unnamed_addr constant [12 x i8] c"Hello World\00", align 1 +;. +define internal void @bob() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define internal void @bob( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str) +; CHECK-NEXT: ret void +; +entry: + %call = tail call i32 (ptr, ...) @printf(ptr nonnull dereferenceable(1) @.str) + ret void +} + +; Function Attrs: nofree nounwind +declare i32 @printf(ptr readonly captures(none), ...) + +; Function Attrs: nofree norecurse nounwind uwtable +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bob() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @bob() + ret i32 0 +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll new file mode 100644 index 0000000..5be707b --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_lto.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call graph which has a recursive function(foo2) which +; calls a non-recursive internal function (foo3) satisfying the norecurse +; attribute criteria. + + +define internal void @foo3() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define internal void @foo3( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define internal i32 @foo2(i32 %accum, i32 %n) { +; CHECK-LABEL: define internal i32 @foo2( +; CHECK-SAME: i32 [[ACCUM:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[RECURSE:.*]] +; CHECK: [[RECURSE]]: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[N]], 1 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ACCUM]], [[SUB]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo2(i32 [[MUL]], i32 [[SUB]]) +; CHECK-NEXT: call void @foo3() +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[ACCUM]], %[[ENTRY]] ], [ [[CALL]], %[[RECURSE]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %cmp = icmp eq i32 %n, 0 + br i1 %cmp, label %exit, label %recurse + +recurse: + %sub = sub i32 %n, 1 + %mul = mul i32 %accum, %sub + %call = call i32 @foo2(i32 %mul, i32 %sub) + call void @foo3() + br label %exit + +exit: + %res = phi i32 [ %accum, %entry ], [ %call, %recurse ] + ret i32 %res +} + +define internal i32 @foo1() { +; CHECK-LABEL: define internal i32 @foo1() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @foo2(i32 1, i32 5) +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @foo2(i32 1, i32 5) + ret i32 %res +} + +define dso_local i32 @main() { +; CHECK-LABEL: define dso_local i32 @main() { +; CHECK-NEXT: [[RES:%.*]] = call i32 @foo1() +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @foo1() + ret i32 %res +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll new file mode 100644 index 0000000..e351f60 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call graph with multiple SCCs. The purpose of this is +; to check that norecurse is not added when a function is part of non-singular +; SCC. +; There are three different SCCs in this test: +; SCC#1: f1, foo, bar, foo1, bar1 +; SCC#2: bar2, bar3, bar4 +; SCC#3: baz, fun +; None of these functions should be marked as norecurse + +define internal void @bar1() { +; CHECK-LABEL: define internal void @bar1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @f1() +; CHECK-NEXT: ret void +; +entry: + tail call void @f1() + ret void +} + +define internal void @f1() { +; CHECK-LABEL: define internal void @f1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @foo() +; CHECK-NEXT: tail call void @bar2() +; CHECK-NEXT: tail call void @baz() +; CHECK-NEXT: ret void +; +entry: + tail call void @foo() + tail call void @bar2() + tail call void @baz() + ret void +} + +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @f1() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @f1() + ret i32 0 +} + +define internal void @foo1() { +; CHECK-LABEL: define internal void @foo1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar1() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar1() + ret void +} + +define internal void @bar() { +; CHECK-LABEL: define internal void @bar() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @foo1() +; CHECK-NEXT: ret void +; +entry: + tail call void @foo1() + ret void +} + +define internal void @foo() { +; CHECK-LABEL: define internal void @foo() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar() + ret void +} + +define internal void @bar4() { +; CHECK-LABEL: define internal void @bar4() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar2() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar2() + ret void +} + +define internal void @bar2() { +; CHECK-LABEL: define internal void @bar2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar3() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar3() + ret void +} + +define internal void @bar3() { +; CHECK-LABEL: define internal void @bar3() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar4() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar4() + ret void +} + +define internal void @fun() { +; CHECK-LABEL: define internal void @fun() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @baz() +; CHECK-NEXT: ret void +; +entry: + tail call void @baz() + ret void +} + +define internal void @baz() { +; CHECK-LABEL: define internal void @baz() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @fun() +; CHECK-NEXT: ret void +; +entry: + tail call void @fun() + ret void +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll new file mode 100644 index 0000000..cd94037 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multi_scc_indirect_recursion1.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call graph with multiple SCCs. The purpose of this is +; to check that norecurse is added to a function which calls a function which +; is indirectly recursive but is not part of the recursive chain. +; There are two SCCs in this test: +; SCC#1: bar2, bar3, bar4 +; SCC#2: baz, fun +; f1() calls bar2 and baz, both of which are part of some indirect recursive +; chain. but does not call back f1() and hence f1() can be marked as +; norecurse. + +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @f1() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @f1() + ret i32 0 +} + +define internal void @f1() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define internal void @f1( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar2() +; CHECK-NEXT: tail call void @baz() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar2() + tail call void @baz() + ret void +} + +define internal void @bar4() { +; CHECK-LABEL: define internal void @bar4() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar2() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar2() + ret void +} + +define internal void @bar2() { +; CHECK-LABEL: define internal void @bar2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar3() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar3() + ret void +} + +define internal void @bar3() { +; CHECK-LABEL: define internal void @bar3() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bar4() +; CHECK-NEXT: ret void +; +entry: + tail call void @bar4() + ret void +} + +define internal void @fun() { +; CHECK-LABEL: define internal void @fun() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @baz() +; CHECK-NEXT: ret void +; +entry: + tail call void @baz() + ret void +} + +define internal void @baz() { +; CHECK-LABEL: define internal void @baz() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @fun() +; CHECK-NEXT: ret void +; +entry: + tail call void @fun() + ret void +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll new file mode 100644 index 0000000..8b81a90 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_multinode_refscc.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt -passes=norecurse-lto-inference -S %s | FileCheck %s + +; This is a negative test which results in RefSCC with size > 1. +; RefSCC : [(f2), (f1)] +; --- SCC A (f1) --- size() = 1 +define internal void @f1() { +; CHECK-LABEL: define internal void @f1() { +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: ret void +; + call void @f2() + ret void +} + +; --- SCC B (f2) --- size() = 1 +; f2 indirectly calls f1 using locally allocated function pointer +define internal void @f2() { +; CHECK-LABEL: define internal void @f2() { +; CHECK-NEXT: [[FP:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr @f1, ptr [[FP]], align 8 +; CHECK-NEXT: [[TMP:%.*]] = load ptr, ptr [[FP]], align 8 +; CHECK-NEXT: call void [[TMP]]() +; CHECK-NEXT: ret void +; + %fp = alloca void ()* + store void ()* @f1, void ()** %fp + %tmp = load void ()*, void ()** %fp + call void %tmp() + ret void +} + +define i32 @main() { +; CHECK-LABEL: define i32 @main() { +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: ret i32 0 +; + call void @f1() + ret i32 0 +} + diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll new file mode 100644 index 0000000..461e5df --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/norecurse_self_recursive_callee.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -passes=norecurse-lto-inference -S | FileCheck %s + +; This test includes a call graph with a self recursive function. +; The purpose of this is to check that norecurse is added to functions +; which have a self-recursive function in the call-chain. +; The call-chain in this test is as follows +; main -> bob -> callee1 -> callee2 +; where callee2 is self recursive. + +@x = dso_local global i32 4, align 4 +@y = dso_local global i32 2, align 4 + +;. +; CHECK: @x = dso_local global i32 4, align 4 +; CHECK: @y = dso_local global i32 2, align 4 +;. +define internal void @callee2() { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define internal void @callee2( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @y, align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: store volatile i32 [[INC]], ptr @y, align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = load volatile i32, ptr @y, align 4 + %inc = add nsw i32 %0, 1 + store volatile i32 %inc, ptr @y, align 4 + ret void +} + +define internal void @callee1(i32 %x) { +; CHECK-LABEL: define internal void @callee1( +; CHECK-SAME: i32 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: tail call void @callee1(i32 [[X]]) +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: tail call void @callee2() +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %x, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @callee1(i32 %x) + br label %if.end + +if.end: ; preds = %if.then, %entry + tail call void @callee2() + ret void +} + +define internal void @bob() { +; CHECK-LABEL: define internal void @bob() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @x, align 4 +; CHECK-NEXT: tail call void @callee2(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %0 = load volatile i32, ptr @x, align 4 + tail call void @callee2(i32 %0) + ret void +} + +define dso_local i32 @main() norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define dso_local i32 @main( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: tail call void @bob() +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void @bob() + ret i32 0 +} +;. +; CHECK: attributes #[[ATTR0]] = { norecurse } +;. diff --git a/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll index 9de9150..8b0a5ca 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes=instcombine -S | FileCheck %s ; TODO: All of these should be optimized to less than or equal to a single @@ -7,13 +7,13 @@ ; --- (A op B) op' A / (B op A) op' A --- ; (A land B) land A -define i1 @land_land_left1(i1 %A, i1 %B) { +define i1 @land_land_left1(i1 %A, i1 %B) !prof !0 { ; CHECK-LABEL: @land_land_left1( -; CHECK-NEXT: [[C:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false +; CHECK-NEXT: [[C:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i1 [[C]] ; - %c = select i1 %A, i1 %B, i1 false - %res = select i1 %c, i1 %A, i1 false + %c = select i1 %A, i1 %B, i1 false, !prof !1 + %res = select i1 %c, i1 %A, i1 false, !prof !2 ret i1 %res } define i1 @land_land_left2(i1 %A, i1 %B) { @@ -157,13 +157,13 @@ define i1 @lor_band_left2(i1 %A, i1 %B) { } ; (A lor B) lor A -define i1 @lor_lor_left1(i1 %A, i1 %B) { +define i1 @lor_lor_left1(i1 %A, i1 %B) !prof !0 { ; CHECK-LABEL: @lor_lor_left1( -; CHECK-NEXT: [[C:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[B:%.*]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[B:%.*]], !prof [[PROF1]] ; CHECK-NEXT: ret i1 [[C]] ; - %c = select i1 %A, i1 true, i1 %B - %res = select i1 %c, i1 true, i1 %A + %c = select i1 %A, i1 true, i1 %B, !prof !1 + %res = select i1 %c, i1 true, i1 %A, !prof !2 ret i1 %res } define i1 @lor_lor_left2(i1 %A, i1 %B) { @@ -506,3 +506,12 @@ define <2 x i1> @PR50500_falseval(<2 x i1> %a, <2 x i1> %b) { %r = select <2 x i1> %a, <2 x i1> %b, <2 x i1> %s ret <2 x i1> %r } + +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +!2 = !{!"branch_weights", i32 5, i32 7} + +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll new file mode 100644 index 0000000..6095b24 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr162009.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-NO-PARTIAL-REDUCTION + +target triple = "aarch64" + +define i128 @add_reduc_i32_i128_unsupported(ptr %a, ptr %b) "target-features"="+dotprod" { +; CHECK-NO-PARTIAL-REDUCTION-LABEL: define i128 @add_reduc_i32_i128_unsupported( +; CHECK-NO-PARTIAL-REDUCTION-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ENTRY:.*:]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK-NO-PARTIAL-REDUCTION: [[VECTOR_PH]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NO-PARTIAL-REDUCTION: [[VECTOR_BODY]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[VEC_PHI:%.*]] = phi <4 x i128> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[WIDE_LOAD1]] to <4 x i64> +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP4:%.*]] = mul nuw <4 x i64> [[TMP1]], [[TMP3]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP5:%.*]] = zext <4 x i64> [[TMP4]] to <4 x i128> +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP7]] = add <4 x i128> [[VEC_PHI]], [[TMP5]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4024 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NO-PARTIAL-REDUCTION: [[MIDDLE_BLOCK]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[TMP8:%.*]] = call i128 @llvm.vector.reduce.add.v4i128(<4 x i128> [[TMP7]]) +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK-NO-PARTIAL-REDUCTION: [[SCALAR_PH]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-NO-PARTIAL-REDUCTION: [[FOR_BODY]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[IV:%.*]] = phi i64 [ 4024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ACCUM:%.*]] = phi i128 [ [[TMP8]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[LOAD_A:%.*]] = load i32, ptr [[GEP_A]], align 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXT_A:%.*]] = zext i32 [[LOAD_A]] to i64 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[LOAD_B:%.*]] = load i32, ptr [[GEP_B]], align 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXT_B:%.*]] = zext i32 [[LOAD_B]] to i64 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[MUL:%.*]] = mul nuw i64 [[EXT_A]], [[EXT_B]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[MUL_ZEXT:%.*]] = zext i64 [[MUL]] to i128 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ADD]] = add i128 [[ACCUM]], [[MUL_ZEXT]] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 4025 +; CHECK-NO-PARTIAL-REDUCTION-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_EXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NO-PARTIAL-REDUCTION: [[FOR_EXIT]]: +; CHECK-NO-PARTIAL-REDUCTION-NEXT: [[ADD_LCSSA:%.*]] = phi i128 [ [[ADD]], %[[FOR_BODY]] ] +; CHECK-NO-PARTIAL-REDUCTION-NEXT: ret i128 [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %accum = phi i128 [ 0, %entry ], [ %add, %for.body ] + %gep.a = getelementptr i32, ptr %a, i64 %iv + %load.a = load i32, ptr %gep.a, align 1 + %ext.a = zext i32 %load.a to i64 + %gep.b = getelementptr i32, ptr %b, i64 %iv + %load.b = load i32, ptr %gep.b, align 1 + %ext.b = zext i32 %load.b to i64 + %mul = mul nuw i64 %ext.a, %ext.b + %mul.zext = zext i64 %mul to i128 + %add = add i128 %accum, %mul.zext + %iv.next = add i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 4025 + br i1 %exitcond.not, label %for.exit, label %for.body + +for.exit: + ret i128 %add +} +;. +; CHECK-NO-PARTIAL-REDUCTION: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-NO-PARTIAL-REDUCTION: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-NO-PARTIAL-REDUCTION: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-NO-PARTIAL-REDUCTION: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/indirectbr.ll b/llvm/test/Transforms/SimplifyCFG/indirectbr.ll index 87d8b39..2fa36b0 100644 --- a/llvm/test/Transforms/SimplifyCFG/indirectbr.ll +++ b/llvm/test/Transforms/SimplifyCFG/indirectbr.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s ; SimplifyCFG should eliminate redundant indirectbr edges. @@ -8,7 +8,11 @@ declare void @A() declare void @B(i32) declare void @C() -define void @indbrtest0(ptr %P, ptr %Q) { +;. +; CHECK: @anchor = constant [13 x ptr] [ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@indbrtest4, %L1), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr)] +; CHECK: @xblkx.bbs = internal unnamed_addr constant [9 x ptr] [ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %v2j), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %v2j)] +;. +define void @indbrtest0(ptr %P, ptr %Q) !prof !0 { ; CHECK-LABEL: @indbrtest0( ; CHECK-NEXT: entry: ; CHECK-NEXT: store ptr blockaddress(@indbrtest0, [[BB0:%.*]]), ptr [[P:%.*]], align 8 @@ -16,7 +20,7 @@ define void @indbrtest0(ptr %P, ptr %Q) { ; CHECK-NEXT: store ptr blockaddress(@indbrtest0, [[BB2:%.*]]), ptr [[P]], align 8 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: [[T:%.*]] = load ptr, ptr [[Q:%.*]], align 8 -; CHECK-NEXT: indirectbr ptr [[T]], [label [[BB0]], label [[BB1]], label %BB2] +; CHECK-NEXT: indirectbr ptr [[T]], [label [[BB0]], label [[BB1]], label %BB2], !prof [[PROF1:![0-9]+]] ; CHECK: BB0: ; CHECK-NEXT: call void @A() ; CHECK-NEXT: br label [[BB1]] @@ -36,7 +40,7 @@ entry: store ptr blockaddress(@indbrtest0, %BB2), ptr %P call void @foo() %t = load ptr, ptr %Q - indirectbr ptr %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2] + indirectbr ptr %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2], !prof !1 BB0: call void @A() br label %BB1 @@ -103,10 +107,10 @@ BB0: ; SimplifyCFG should turn the indirectbr into a conditional branch on the ; condition of the select. -define void @indbrtest3(i1 %cond, ptr %address) nounwind { +define void @indbrtest3(i1 %cond, ptr %address) nounwind !prof !0 { ; CHECK-LABEL: @indbrtest3( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[L1:%.*]], label [[L2:%.*]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[L1:%.*]], label [[L2:%.*]], !prof [[PROF2:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: L1: @@ -117,8 +121,8 @@ define void @indbrtest3(i1 %cond, ptr %address) nounwind { ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: - %indirect.goto.dest = select i1 %cond, ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2) - indirectbr ptr %indirect.goto.dest, [label %L1, label %L2, label %L3] + %indirect.goto.dest = select i1 %cond, ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2), !prof !2 + indirectbr ptr %indirect.goto.dest, [label %L1, label %L2, label %L3], !prof !3 L1: call void @A() @@ -385,3 +389,15 @@ declare i32 @xfunc5x() declare i8 @xfunc7x() declare i32 @xselectorx() declare i32 @xactionx() + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 3, i32 5, i32 7, i32 11, i32 13, i32 17} +!2 = !{!"branch_weights", i32 3, i32 5} +!3 = !{!"branch_weights", i32 3, i32 5, i32 7} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 14, i32 18, i32 24} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 5} +;. diff --git a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s index 65e1203..c8a5746 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s @@ -1,4 +1,6 @@ REQUIRES: aarch64-registered-target +// Flakey on SVE buildbots, disabled pending invesgitation. +UNSUPPORTED: target={{.*}} RUN: llvm-exegesis -mtriple=aarch64 -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FMOVWSr --benchmark-phase=assemble-measured-code 2>&1 RUN: llvm-objdump -d %d > %t.s diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt index 0f8fcb9..ee40e6c 100644 --- a/llvm/unittests/CAS/CMakeLists.txt +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_unittest(CASTests ActionCacheTest.cpp CASTestConfig.cpp ObjectStoreTest.cpp + OnDiskDataAllocatorTest.cpp OnDiskTrieRawHashMapTest.cpp ProgramTest.cpp ) diff --git a/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp new file mode 100644 index 0000000..966fa03 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/MappedFileRegionArena.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" + +#if LLVM_ENABLE_ONDISK_CAS + +using namespace llvm; +using namespace llvm::cas; + +TEST(OnDiskDataAllocatorTest, Allocate) { + unittest::TempDir Temp("data-allocator", /*Unique=*/true); + constexpr size_t MB = 1024u * 1024u; + + std::optional<OnDiskDataAllocator> Allocator; + ASSERT_THAT_ERROR(OnDiskDataAllocator::create( + Temp.path("allocator"), "data", /*MaxFileSize=*/MB, + /*NewFileInitialSize=*/std::nullopt) + .moveInto(Allocator), + Succeeded()); + + // Allocate. + { + for (size_t Size = 1; Size < 16; ++Size) { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(Size).moveInto(P), Succeeded()); + EXPECT_TRUE( + isAligned(MappedFileRegionArena::getAlign(), P.getOffset().get())); + } + } + + // Out of space. + { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(MB).moveInto(P), Failed()); + } + + // Check size and capacity. + { + ASSERT_EQ(Allocator->capacity(), MB); + ASSERT_LE(Allocator->size(), MB); + } + + // Get. + { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(32).moveInto(P), Succeeded()); + ArrayRef<char> Data; + ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 16).moveInto(Data), + Succeeded()); + ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 1025).moveInto(Data), + Failed()); + } +} + +#endif // LLVM_ENABLE_ONDISK_CAS diff --git a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp index 7bedfe4..6034c70 100644 --- a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp +++ b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp @@ -71,7 +71,7 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { std::optional<FileOffset> Offset; std::optional<MutableArrayRef<char>> Data; { - std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion; ASSERT_THAT_ERROR(Trie1->insert({Hash0, Data0v1}).moveInto(Insertion), Succeeded()); EXPECT_EQ(Hash0, (*Insertion)->Hash); @@ -128,7 +128,7 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { // Recover from an offset. { - OnDiskTrieRawHashMap::const_pointer Recovered; + OnDiskTrieRawHashMap::ConstOnDiskPtr Recovered; ASSERT_THAT_ERROR(Trie1->recoverFromFileOffset(*Offset).moveInto(Recovered), Succeeded()); ASSERT_TRUE(Recovered); @@ -140,14 +140,14 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { // Recover from a bad offset. { FileOffset BadOffset(1); - OnDiskTrieRawHashMap::const_pointer Recovered; + OnDiskTrieRawHashMap::ConstOnDiskPtr Recovered; ASSERT_THAT_ERROR( Trie1->recoverFromFileOffset(BadOffset).moveInto(Recovered), Failed()); } // Insert another thing. { - std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion; ASSERT_THAT_ERROR(Trie1->insert({Hash1, Data1}).moveInto(Insertion), Succeeded()); EXPECT_EQ(Hash1, (*Insertion)->Hash); @@ -210,7 +210,7 @@ TEST(OnDiskTrieRawHashMapTest, OutOfSpace) { auto Hash0 = ArrayRef(Hash0Bytes); constexpr StringLiteral Data0v1Bytes = "data0.v1"; ArrayRef<char> Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size()); - std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + std::optional<OnDiskTrieRawHashMap::OnDiskPtr> Insertion; ASSERT_THAT_ERROR(Trie->insert({Hash0, Data0v1}).moveInto(Insertion), Failed()); } diff --git a/llvm/unittests/Option/CMakeLists.txt b/llvm/unittests/Option/CMakeLists.txt index 7be4300..5fefb5e 100644 --- a/llvm/unittests/Option/CMakeLists.txt +++ b/llvm/unittests/Option/CMakeLists.txt @@ -4,11 +4,15 @@ set(LLVM_LINK_COMPONENTS ) set(LLVM_TARGET_DEFINITIONS Opts.td) - tablegen(LLVM Opts.inc -gen-opt-parser-defs) + +set(LLVM_TARGET_DEFINITIONS SubCommandOpts.td) +tablegen(LLVM SubCommandOpts.inc -gen-opt-parser-defs) + add_public_tablegen_target(OptsTestTableGen) add_llvm_unittest(OptionTests OptionParsingTest.cpp OptionMarshallingTest.cpp + OptionSubCommandsTest.cpp ) diff --git a/llvm/unittests/Option/OptionMarshallingTest.cpp b/llvm/unittests/Option/OptionMarshallingTest.cpp index 005144b..15917cc 100644 --- a/llvm/unittests/Option/OptionMarshallingTest.cpp +++ b/llvm/unittests/Option/OptionMarshallingTest.cpp @@ -29,8 +29,9 @@ static const OptionWithMarshallingInfo MarshallingTable[] = { #define OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ - SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \ - IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ + SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ + IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \ + TABLE_INDEX) \ {PREFIXED_NAME_OFFSET, #KEYPATH, #IMPLIED_CHECK, #IMPLIED_VALUE}, #include "Opts.inc" #undef OPTION_WITH_MARSHALLING diff --git a/llvm/unittests/Option/OptionSubCommandsTest.cpp b/llvm/unittests/Option/OptionSubCommandsTest.cpp new file mode 100644 index 0000000..e31a326 --- /dev/null +++ b/llvm/unittests/Option/OptionSubCommandsTest.cpp @@ -0,0 +1,252 @@ +//===- unittest/Support/OptionParsingTest.cpp - OptTable tests ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::opt; + +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wdeprecated-declarations" +#endif + +namespace { +enum ID { + OPT_INVALID = 0, +#define OPTION(PREFIXES, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ + VALUES, SUBCOMMANDIDS_OFFSET) \ + OPT_##ID, +#include "SubCommandOpts.inc" +#undef OPTION +}; +#define OPTTABLE_STR_TABLE_CODE +#include "SubCommandOpts.inc" +#undef OPTTABLE_STR_TABLE_CODE + +#define OPTTABLE_PREFIXES_TABLE_CODE +#include "SubCommandOpts.inc" +#undef OPTTABLE_PREFIXES_TABLE_CODE + +#define OPTTABLE_SUBCOMMAND_IDS_TABLE_CODE +#include "SubCommandOpts.inc" +#undef OPTTABLE_SUBCOMMAND_IDS_TABLE_CODE + +#define OPTTABLE_SUBCOMMANDS_CODE +#include "SubCommandOpts.inc" +#undef OPTTABLE_SUBCOMMANDS_CODE + +static constexpr OptTable::Info InfoTable[] = { +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), +#include "SubCommandOpts.inc" +#undef OPTION +}; + +class TestOptSubCommandTable : public GenericOptTable { +public: + TestOptSubCommandTable(bool IgnoreCase = false) + : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable, + /*IgnoreCase=*/false, OptionSubCommands, + OptionSubCommandIDsTable) {} +}; + +// Test fixture +template <typename T> class OptSubCommandTableTest : public ::testing::Test {}; + +// Test both precomputed and computed OptTables with the same suite of tests. +using OptSubCommandTableTestTypes = ::testing::Types<TestOptSubCommandTable>; + +TYPED_TEST_SUITE(OptSubCommandTableTest, OptSubCommandTableTestTypes, ); + +TYPED_TEST(OptSubCommandTableTest, SubCommandParsing) { + TypeParam T; + unsigned MAI, MAC; + + std::string ErrMsg; + raw_string_ostream RSO1(ErrMsg); + + auto HandleMultipleSubcommands = [&](ArrayRef<StringRef> SubCommands) { + ErrMsg.clear(); + RSO1 << "Multiple subcommands passed\n"; + for (auto SC : SubCommands) + RSO1 << "\n" << SC; + }; + + auto HandleOtherPositionals = [&](ArrayRef<StringRef> Positionals) { + ErrMsg.clear(); + RSO1 << "Unregistered positionals passed\n"; + for (auto SC : Positionals) + RSO1 << "\n" << SC; + }; + + { + // Test case 1: Toplevel option, no subcommand + const char *Args[] = {"-version"}; + InputArgList AL = T.ParseArgs(Args, MAI, MAC); + EXPECT_TRUE(AL.hasArg(OPT_version)); + StringRef SC = AL.getSubCommand( + T.getSubCommands(), HandleMultipleSubcommands, HandleOtherPositionals); + EXPECT_TRUE(SC.empty()); + EXPECT_FALSE(AL.hasArg(OPT_uppercase)); + EXPECT_FALSE(AL.hasArg(OPT_lowercase)); + } + + { + // Test case 2: Subcommand 'foo' with its valid options + const char *Args[] = {"foo", "-uppercase"}; + InputArgList AL = T.ParseArgs(Args, MAI, MAC); + StringRef SC = AL.getSubCommand( + T.getSubCommands(), HandleMultipleSubcommands, HandleOtherPositionals); + EXPECT_EQ(SC, "foo"); + EXPECT_TRUE(AL.hasArg(OPT_uppercase)); + EXPECT_FALSE(AL.hasArg(OPT_lowercase)); + EXPECT_FALSE(AL.hasArg(OPT_version)); + EXPECT_EQ(std::string::npos, ErrMsg.find("Multiple subcommands passed")) + << "Did not expect error message as this is a valid use case."; + EXPECT_EQ(std::string::npos, ErrMsg.find("Unregistered positionals passed")) + << "Did not expect error message as this is a valid use case."; + } + + { + // Test case 3: Check valid use of subcommand which follows a valid + // subcommand option. + const char *Args[] = {"-uppercase", "foo"}; + InputArgList AL = T.ParseArgs(Args, MAI, MAC); + StringRef SC = AL.getSubCommand( + T.getSubCommands(), HandleMultipleSubcommands, HandleOtherPositionals); + EXPECT_EQ(SC, "foo"); + EXPECT_TRUE(AL.hasArg(OPT_uppercase)); + EXPECT_FALSE(AL.hasArg(OPT_lowercase)); + EXPECT_FALSE(AL.hasArg(OPT_version)); + EXPECT_EQ(std::string::npos, ErrMsg.find("Multiple subcommands passed")) + << "Did not expect error message as this is a valid use case."; + EXPECT_EQ(std::string::npos, ErrMsg.find("Unregistered positionals passed")) + << "Did not expect error message as this is a valid use case."; + } + + { + // Test case 4: Check invalid use of passing multiple subcommands. + const char *Args[] = {"-uppercase", "foo", "bar"}; + InputArgList AL = T.ParseArgs(Args, MAI, MAC); + StringRef SC = AL.getSubCommand( + T.getSubCommands(), HandleMultipleSubcommands, HandleOtherPositionals); + // No valid subcommand should be returned as this is an invalid invocation. + EXPECT_TRUE(SC.empty()); + // Expect the multiple subcommands error message. + EXPECT_NE(std::string::npos, ErrMsg.find("Multiple subcommands passed")); + EXPECT_NE(std::string::npos, ErrMsg.find("foo")); + EXPECT_NE(std::string::npos, ErrMsg.find("bar")); + EXPECT_EQ(std::string::npos, ErrMsg.find("Unregistered positionals passed")) + << "Did not expect error message as this is a valid use case."; + } + + { + // Test case 5: Check invalid use of passing unregistered subcommands. + const char *Args[] = {"foobar"}; + InputArgList AL = T.ParseArgs(Args, MAI, MAC); + StringRef SC = AL.getSubCommand( + T.getSubCommands(), HandleMultipleSubcommands, HandleOtherPositionals); + // No valid subcommand should be returned as this is an invalid invocation. + EXPECT_TRUE(SC.empty()); + // Expect the unregistered subcommands error message. + EXPECT_NE(std::string::npos, + ErrMsg.find("Unregistered positionals passed")); + EXPECT_NE(std::string::npos, ErrMsg.find("foobar")); + } + + { + // Test case 6: Check invalid use of a valid subcommand which follows a + // valid subcommand option but the option is not registered with the given + // subcommand. + const char *Args[] = {"-lowercase", "bar"}; + InputArgList AL = T.ParseArgs(Args, MAI, MAC); + StringRef SC = AL.getSubCommand( + T.getSubCommands(), HandleMultipleSubcommands, HandleOtherPositionals); + auto HandleSubCommandArg = [&](ID OptionType) { + if (!AL.hasArg(OptionType)) + return false; + auto O = T.getOption(OptionType); + if (!O.isRegisteredSC(SC)) { + ErrMsg.clear(); + RSO1 << "Option [" << O.getName() << "] is not valid for SubCommand [" + << SC << "]\n"; + return false; + } + return true; + }; + EXPECT_EQ(SC, "bar"); // valid subcommand + EXPECT_TRUE(AL.hasArg(OPT_lowercase)); // valid option + EXPECT_FALSE(HandleSubCommandArg(OPT_lowercase)); + EXPECT_NE( + std::string::npos, + ErrMsg.find("Option [lowercase] is not valid for SubCommand [bar]")); + } +} + +TYPED_TEST(OptSubCommandTableTest, SubCommandHelp) { + TypeParam T; + std::string Help; + raw_string_ostream RSO(Help); + + // Toplevel help + T.printHelp(RSO, "Test Usage String", "OverviewString"); + EXPECT_NE(std::string::npos, Help.find("OVERVIEW:")); + EXPECT_NE(std::string::npos, Help.find("OverviewString")); + EXPECT_NE(std::string::npos, Help.find("USAGE:")); + EXPECT_NE(std::string::npos, Help.find("Test Usage String")); + EXPECT_NE(std::string::npos, Help.find("SUBCOMMANDS:")); + EXPECT_NE(std::string::npos, Help.find("foo")); + EXPECT_NE(std::string::npos, Help.find("bar")); + EXPECT_NE(std::string::npos, Help.find("HelpText for SubCommand foo.")); + EXPECT_NE(std::string::npos, Help.find("HelpText for SubCommand bar.")); + EXPECT_NE(std::string::npos, Help.find("OPTIONS:")); + EXPECT_NE(std::string::npos, Help.find("--help")); + EXPECT_NE(std::string::npos, Help.find("-version")); + // uppercase is not a global option and should not be shown. + EXPECT_EQ(std::string::npos, Help.find("-uppercase")); + + // Help for subcommand foo + Help.clear(); + StringRef SC1 = "foo"; + T.printHelp(RSO, "Test Usage String", "OverviewString", false, false, + Visibility(), SC1); + EXPECT_NE(std::string::npos, Help.find("OVERVIEW:")); + EXPECT_NE(std::string::npos, Help.find("OverviewString")); + // SubCommand "foo" definition for tablegen has NO dedicated usage string so + // not expected to see USAGE. + EXPECT_EQ(std::string::npos, Help.find("USAGE:")); + EXPECT_NE(std::string::npos, Help.find("HelpText for SubCommand foo.")); + EXPECT_NE(std::string::npos, Help.find("-uppercase")); + EXPECT_NE(std::string::npos, Help.find("-lowercase")); + EXPECT_EQ(std::string::npos, Help.find("-version")); + EXPECT_EQ(std::string::npos, Help.find("SUBCOMMANDS:")); + + // Help for subcommand bar + Help.clear(); + StringRef SC2 = "bar"; + T.printHelp(RSO, "Test Usage String", "OverviewString", false, false, + Visibility(), SC2); + EXPECT_NE(std::string::npos, Help.find("OVERVIEW:")); + EXPECT_NE(std::string::npos, Help.find("OverviewString")); + // SubCommand "bar" definition for tablegen has a dedicated usage string. + EXPECT_NE(std::string::npos, Help.find("USAGE:")); + EXPECT_NE(std::string::npos, Help.find("Subcommand bar <options>")); + EXPECT_NE(std::string::npos, Help.find("HelpText for SubCommand bar.")); + EXPECT_NE(std::string::npos, Help.find("-uppercase")); + // lowercase is not an option for bar and should not be shown. + EXPECT_EQ(std::string::npos, Help.find("-lowercase")); + // version is a global option and should not be shown. + EXPECT_EQ(std::string::npos, Help.find("-version")); +} +} // end anonymous namespace diff --git a/llvm/unittests/Option/SubCommandOpts.td b/llvm/unittests/Option/SubCommandOpts.td new file mode 100644 index 0000000..b9750da --- /dev/null +++ b/llvm/unittests/Option/SubCommandOpts.td @@ -0,0 +1,16 @@ +include "llvm/Option/OptParser.td" + +def sc_foo : SubCommand<"foo", "HelpText for SubCommand foo.">; + +def sc_bar : SubCommand<"bar", "HelpText for SubCommand bar.", + "Subcommand bar <options>">; + +def help : Flag<["--"], "help">, HelpText<"Subcommand <subcommand> <options>">; + +def version : Flag<["-"], "version">, HelpText<"Display the version number">; + +def uppercase : Flag<["-"], "uppercase", [sc_foo, sc_bar]>, + HelpText<"Print in uppercase">; + +def lowercase : Flag<["-"], "lowercase", [sc_foo]>, + HelpText<"Print in lowercase">; diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp index a470fbb..48ae1a0 100644 --- a/llvm/utils/TableGen/OptionParserEmitter.cpp +++ b/llvm/utils/TableGen/OptionParserEmitter.cpp @@ -9,8 +9,10 @@ #include "Common/OptEmitter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Option/OptTable.h" #include "llvm/Support/InterleavedRange.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" @@ -258,6 +260,9 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { std::vector<const Record *> Opts = Records.getAllDerivedDefinitions("Option"); llvm::sort(Opts, IsOptionRecordsLess); + std::vector<const Record *> SubCommands = + Records.getAllDerivedDefinitions("SubCommand"); + emitSourceFileHeader("Option Parsing Definitions", OS); // Generate prefix groups. @@ -271,6 +276,35 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { Prefixes.try_emplace(PrefixKey, 0); } + // Generate sub command groups. + typedef SmallVector<StringRef, 2> SubCommandKeyT; + typedef std::map<SubCommandKeyT, unsigned> SubCommandIDsT; + SubCommandIDsT SubCommandIDs; + + auto PrintSubCommandIdsOffset = [&SubCommandIDs, &OS](const Record &R) { + if (R.getValue("SubCommands") != nullptr) { + std::vector<const Record *> SubCommands = + R.getValueAsListOfDefs("SubCommands"); + SubCommandKeyT SubCommandKey; + for (const auto &SubCommand : SubCommands) + SubCommandKey.push_back(SubCommand->getName()); + OS << SubCommandIDs[SubCommandKey]; + } else { + // The option SubCommandIDsOffset (for default top level toolname is 0). + OS << " 0"; + } + }; + + SubCommandIDs.try_emplace(SubCommandKeyT(), 0); + for (const Record &R : llvm::make_pointee_range(Opts)) { + std::vector<const Record *> RSubCommands = + R.getValueAsListOfDefs("SubCommands"); + SubCommandKeyT SubCommandKey; + for (const auto &SubCommand : RSubCommands) + SubCommandKey.push_back(SubCommand->getName()); + SubCommandIDs.try_emplace(SubCommandKey, 0); + } + DenseSet<StringRef> PrefixesUnionSet; for (const auto &[Prefix, _] : Prefixes) PrefixesUnionSet.insert_range(Prefix); @@ -323,6 +357,40 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { OS << "\n};\n"; OS << "#endif // OPTTABLE_PREFIXES_TABLE_CODE\n\n"; + // Dump subcommand IDs. + OS << "/////////"; + OS << "// SubCommand IDs\n\n"; + OS << "#ifdef OPTTABLE_SUBCOMMAND_IDS_TABLE_CODE\n"; + OS << "static constexpr unsigned OptionSubCommandIDsTable[] = {\n"; + { + // Ensure the first subcommand set is always empty. + assert(!SubCommandIDs.empty() && + "We should always emit an empty set of subcommands"); + assert(SubCommandIDs.begin()->first.empty() && + "First subcommand set should always be empty"); + llvm::ListSeparator Sep(",\n"); + unsigned CurIndex = 0; + for (auto &[SubCommand, SubCommandIndex] : SubCommandIDs) { + // First emit the number of subcommand strings in this list of + // subcommands. + OS << Sep << " " << SubCommand.size() << " /* subcommands */"; + SubCommandIndex = CurIndex; + assert((CurIndex == 0 || !SubCommand.empty()) && + "Only first subcommand set should be empty!"); + for (const auto &SubCommandKey : SubCommand) { + auto It = std::find_if( + SubCommands.begin(), SubCommands.end(), + [&](const Record *R) { return R->getName() == SubCommandKey; }); + assert(It != SubCommands.end() && "SubCommand not found"); + OS << ", " << std::distance(SubCommands.begin(), It) << " /* '" + << SubCommandKey << "' */"; + } + CurIndex += SubCommand.size() + 1; + } + } + OS << "\n};\n"; + OS << "#endif // OPTTABLE_SUBCOMMAND_IDS_TABLE_CODE\n\n"; + // Dump prefixes union. OS << "/////////\n"; OS << "// Prefix Union\n\n"; @@ -400,7 +468,12 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { OS << ", nullptr"; // The option Values (unused for groups). - OS << ", nullptr)\n"; + OS << ", nullptr"; + + // The option SubCommandIDsOffset. + OS << ", "; + PrintSubCommandIdsOffset(R); + OS << ")\n"; } OS << "\n"; @@ -527,6 +600,10 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { OS << getOptionName(R) << "_Values"; else OS << "nullptr"; + + // The option SubCommandIDsOffset. + OS << ", "; + PrintSubCommandIdsOffset(R); }; auto IsMarshallingOption = [](const Record &R) { @@ -595,6 +672,19 @@ static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { OS << "#endif // SIMPLE_ENUM_VALUE_TABLE\n"; OS << "\n"; + OS << "/////////\n"; + OS << "\n// SubCommands\n\n"; + OS << "#ifdef OPTTABLE_SUBCOMMANDS_CODE\n"; + OS << "static constexpr llvm::opt::OptTable::SubCommand OptionSubCommands[] " + "= " + "{\n"; + for (const Record *SubCommand : SubCommands) { + OS << " { \"" << SubCommand->getValueAsString("Name") << "\", "; + OS << "\"" << SubCommand->getValueAsString("HelpText") << "\", "; + OS << "\"" << SubCommand->getValueAsString("Usage") << "\" },\n"; + } + OS << "};\n"; + OS << "#endif // OPTTABLE_SUBCOMMANDS_CODE\n\n"; OS << "\n"; } diff --git a/llvm/utils/gn/secondary/llvm/lib/CAS/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CAS/BUILD.gn index c37f43c..b4edd8d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CAS/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CAS/BUILD.gn @@ -9,6 +9,7 @@ static_library("CAS") { "MappedFileRegionArena.cpp", "ObjectStore.cpp", "OnDiskCommon.cpp", + "OnDiskDataAllocator.cpp", "OnDiskTrieRawHashMap.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/unittests/CAS/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CAS/BUILD.gn index ccb447f..52a64be 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CAS/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CAS/BUILD.gn @@ -10,6 +10,7 @@ unittest("CASTests") { "ActionCacheTest.cpp", "CASTestConfig.cpp", "ObjectStoreTest.cpp", + "OnDiskDataAllocatorTest.cpp", "OnDiskTrieRawHashMapTest.cpp", "ProgramTest.cpp", ] diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index 53187c8..bbc8f59 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -1,11 +1,8 @@ Analysis/LoopAccessAnalysis/memcheck-ni.ll Analysis/MemorySSA/pr116227.ll -Analysis/MemorySSA/pr40038.ll Analysis/MemorySSA/pr43641.ll Analysis/MemorySSA/pr46574.ll Analysis/MemorySSA/update-remove-dead-blocks.ll -Analysis/StackSafetyAnalysis/ipa.ll -Analysis/ValueTracking/known-power-of-two-urem.ll Bitcode/fcmp-fast.ll Bitcode/flags.ll CodeGen/AArch64/cgdata-merge-local.ll @@ -70,16 +67,11 @@ CodeGen/AMDGPU/si-annotate-nested-control-flows.ll CodeGen/AMDGPU/simple-indirect-call-2.ll CodeGen/ARM/loopvectorize_pr33804.ll CodeGen/ARM/sjljeh-swifterror.ll -CodeGen/BPF/adjust-opt-icmp1.ll -CodeGen/BPF/adjust-opt-icmp2.ll -CodeGen/BPF/adjust-opt-icmp5.ll -CodeGen/BPF/adjust-opt-icmp6.ll CodeGen/Hexagon/autohvx/interleave.ll CodeGen/Hexagon/loop-idiom/hexagon-memmove1.ll CodeGen/Hexagon/loop-idiom/hexagon-memmove2.ll CodeGen/Hexagon/loop-idiom/memmove-rt-check.ll CodeGen/NVPTX/lower-ctor-dtor.ll -CodeGen/PowerPC/P10-stack-alignment.ll CodeGen/RISCV/zmmul.ll CodeGen/SPIRV/hlsl-resources/UniqueImplicitBindingNumber.ll CodeGen/WebAssembly/memory-interleave.ll @@ -87,11 +79,8 @@ CodeGen/X86/masked_gather_scatter.ll CodeGen/X86/nocfivalue.ll DebugInfo/AArch64/ir-outliner.ll DebugInfo/assignment-tracking/X86/hotcoldsplit.ll -DebugInfo/debugify-each.ll DebugInfo/Generic/block-asan.ll DebugInfo/KeyInstructions/Generic/loop-unswitch.ll -DebugInfo/KeyInstructions/Generic/simplifycfg-branch-fold.ll -DebugInfo/simplify-cfg-preserve-dbg-values.ll DebugInfo/X86/asan_debug_info.ll Instrumentation/AddressSanitizer/aarch64be.ll Instrumentation/AddressSanitizer/adaptive_global_redzones.ll @@ -532,13 +521,9 @@ Instrumentation/TypeSanitizer/nosanitize.ll Instrumentation/TypeSanitizer/sanitize-no-tbaa.ll Instrumentation/TypeSanitizer/swifterror.ll LTO/X86/diagnostic-handler-remarks-with-hotness.ll -Other/ChangePrinters/DotCfg/print-changed-dot-cfg.ll -Other/opt-bisect-print-ir-path.ll Other/optimization-remarks-auto.ll -Other/printer.ll Other/X86/debugcounter-partiallyinlinelibcalls.ll tools/llvm-objcopy/ELF/auto-remove-add-symtab-shndx.test -tools/not/disable-symbolization.test tools/UpdateTestChecks/update_analyze_test_checks/loop-access-analysis.test tools/UpdateTestChecks/update_analyze_test_checks/loop-distribute.test tools/UpdateTestChecks/update_test_checks/argument_name_reuse.test @@ -563,14 +548,10 @@ tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test tools/UpdateTestChecks/update_test_checks/stable_ir_values.test tools/UpdateTestChecks/update_test_checks/tbaa-semantics-checks.test tools/UpdateTestChecks/update_test_checks/various_ir_values_dbgrecords.test -Transforms/AggressiveInstCombine/inline-strcmp-debugloc.ll Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll Transforms/AggressiveInstCombine/lower-table-based-cttz-dereferencing-pointer.ll Transforms/AggressiveInstCombine/lower-table-based-cttz-non-argument-value.ll Transforms/AggressiveInstCombine/lower-table-based-cttz-zero-element.ll -Transforms/AggressiveInstCombine/memchr.ll -Transforms/AggressiveInstCombine/strncmp-1.ll -Transforms/AggressiveInstCombine/strncmp-2.ll Transforms/AggressiveInstCombine/trunc_select_cmp.ll Transforms/AggressiveInstCombine/trunc_select.ll Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll @@ -608,7 +589,6 @@ Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll Transforms/AtomicExpand/ARM/atomicrmw-fp.ll -Transforms/AtomicExpand/ARM/cmpxchg-weak.ll Transforms/AtomicExpand/Hexagon/atomicrmw-fp.ll Transforms/AtomicExpand/LoongArch/atomicrmw-fp.ll Transforms/AtomicExpand/Mips/atomicrmw-fp.ll @@ -688,7 +668,6 @@ Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-not-exact.ll Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll Transforms/CodeGenPrepare/X86/vec-shift.ll -Transforms/Coroutines/coro-alloca-outside-frame.ll Transforms/Coroutines/coro-await-suspend-lower-invoke.ll Transforms/Coroutines/coro-await-suspend-lower.ll Transforms/Coroutines/coro-byval-param.ll @@ -829,21 +808,17 @@ Transforms/HotColdSplit/unwind.ll Transforms/HotColdSplit/update-split-loop-metadata.ll Transforms/IndirectBrExpand/basic.ll Transforms/IndVarSimplify/debugloc-rem-subst.ll -Transforms/IndVarSimplify/eliminate-backedge.ll Transforms/IndVarSimplify/eliminate-rem.ll Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll Transforms/IndVarSimplify/pr45835.ll Transforms/IndVarSimplify/preserving-debugloc-rem-div.ll -Transforms/Inline/optimization-remarks-hotness-threshold.ll Transforms/InstCombine/2004-09-20-BadLoadCombine.ll Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll -Transforms/InstCombine/2011-02-14-InfLoop.ll Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll Transforms/InstCombine/add-mask.ll Transforms/InstCombine/add-shl-mul-umax.ll -Transforms/InstCombine/add-shl-sdiv-to-srem.ll Transforms/InstCombine/AMDGPU/addrspacecast.ll Transforms/InstCombine/and2.ll Transforms/InstCombine/and-fcmp.ll @@ -853,13 +828,10 @@ Transforms/InstCombine/and-or-icmps.ll Transforms/InstCombine/and-or-implied-cond-not.ll Transforms/InstCombine/apint-div1.ll Transforms/InstCombine/apint-div2.ll -Transforms/InstCombine/apint-rem1.ll -Transforms/InstCombine/apint-rem2.ll Transforms/InstCombine/ashr-demand.ll Transforms/InstCombine/atomic.ll Transforms/InstCombine/binop-cast.ll Transforms/InstCombine/binop-select-cast-of-select-cond.ll -Transforms/InstCombine/binop-select.ll Transforms/InstCombine/bit-checks.ll Transforms/InstCombine/bitreverse.ll Transforms/InstCombine/branch.ll @@ -931,30 +903,23 @@ Transforms/InstCombine/not.ll Transforms/InstCombine/or-bitmask.ll Transforms/InstCombine/or-fcmp.ll Transforms/InstCombine/or.ll -Transforms/InstCombine/phi-select-constant.ll Transforms/InstCombine/pow-1.ll Transforms/InstCombine/pow-3.ll Transforms/InstCombine/pow-sqrt.ll Transforms/InstCombine/pr24354.ll -Transforms/InstCombine/pr35515.ll -Transforms/InstCombine/ptrtoint-nullgep.ll Transforms/InstCombine/pull-conditional-binop-through-shift.ll Transforms/InstCombine/rem.ll Transforms/InstCombine/sdiv-canonicalize.ll Transforms/InstCombine/sdiv-guard.ll -Transforms/InstCombine/select-and-cmp.ll Transforms/InstCombine/select-and-or.ll -Transforms/InstCombine/select_arithmetic.ll Transforms/InstCombine/select-bitext.ll Transforms/InstCombine/select-cmp-br.ll Transforms/InstCombine/select-cmp.ll Transforms/InstCombine/select-factorize.ll Transforms/InstCombine/select_frexp.ll -Transforms/InstCombine/select-icmp-and.ll Transforms/InstCombine/select.ll Transforms/InstCombine/select-min-max.ll Transforms/InstCombine/select-of-symmetric-selects.ll -Transforms/InstCombine/select-or-cmp.ll Transforms/InstCombine/select-safe-bool-transforms.ll Transforms/InstCombine/select-safe-impliedcond-transforms.ll Transforms/InstCombine/select-safe-transforms.ll @@ -974,11 +939,8 @@ Transforms/InstCombine/strlen-1.ll Transforms/InstCombine/strrchr-3.ll Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll -Transforms/InstCombine/sub.ll Transforms/InstCombine/sub-xor-cmp.ll Transforms/InstCombine/truncating-saturate.ll -Transforms/InstCombine/trunc-inseltpoison.ll -Transforms/InstCombine/trunc.ll Transforms/InstCombine/unordered-fcmp-select.ll Transforms/InstCombine/urem-via-cmp-select.ll Transforms/InstCombine/vec_sext.ll @@ -990,7 +952,6 @@ Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll Transforms/InstCombine/X86/x86-avx512.ll Transforms/InstCombine/xor-and-or.ll Transforms/InstCombine/xor-ashr.ll -Transforms/InstCombine/xor.ll Transforms/InstCombine/zext-bool-add-sub.ll Transforms/InstCombine/zext-or-icmp.ll Transforms/IRCE/add-metadata-pre-post-loops.ll @@ -1126,12 +1087,8 @@ Transforms/LoopDistribute/pointer-phi-in-loop.ll Transforms/LoopDistribute/scev-inserted-runtime-check.ll Transforms/LoopDistribute/symbolic-stride.ll Transforms/LoopFlatten/loop-flatten-version.ll -Transforms/LoopFlatten/widen-iv2.ll -Transforms/LoopFlatten/widen-iv.ll Transforms/LoopIdiom/AArch64/byte-compare-index.ll Transforms/LoopIdiom/AArch64/find-first-byte.ll -Transforms/LoopIdiom/memset-runtime-32bit.ll -Transforms/LoopIdiom/memset-runtime-64bit.ll Transforms/LoopIdiom/RISCV/byte-compare-index.ll Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll Transforms/LoopIdiom/X86/left-shift-until-bittest.ll @@ -1155,10 +1112,6 @@ Transforms/LoopSimplifyCFG/live_block_marking.ll Transforms/LoopSimplifyCFG/mssa_update.ll Transforms/LoopSimplifyCFG/pr117537.ll Transforms/LoopSimplifyCFG/update_parents.ll -Transforms/LoopSimplify/pr26682.ll -Transforms/LoopSimplify/preserve-llvm-loop-metadata.ll -Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll -Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll Transforms/LoopUnroll/peel-last-iteration-with-guards.ll Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll @@ -1301,7 +1254,6 @@ Transforms/PGOProfile/chr-lifetimes.ll Transforms/PGOProfile/chr.ll Transforms/PGOProfile/chr-poison.ll Transforms/PGOProfile/comdat.ll -Transforms/PGOProfile/cspgo_profile_summary.ll Transforms/PGOProfile/memop_profile_funclet_wasm.ll Transforms/PGOProfile/profcheck-select.ll Transforms/PGOProfile/prof-verify.ll @@ -1310,25 +1262,18 @@ Transforms/PGOProfile/X86/macho.ll Transforms/PhaseOrdering/AArch64/constraint-elimination-placement.ll Transforms/PhaseOrdering/AArch64/globals-aa-required-for-vectorization.ll Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll -Transforms/PhaseOrdering/AArch64/loopflatten.ll -Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll Transforms/PhaseOrdering/AArch64/predicated-reduction.ll Transforms/PhaseOrdering/AArch64/quant_4x4.ll Transforms/PhaseOrdering/ARM/arm_mean_q7.ll Transforms/PhaseOrdering/lower-table-based-cttz.ll -Transforms/PhaseOrdering/pr44461-br-to-switch-rotate.ll -Transforms/PhaseOrdering/simplifycfg-switch-lowering-vs-correlatedpropagation.ll Transforms/PhaseOrdering/vector-select.ll Transforms/PhaseOrdering/X86/blendv-select.ll Transforms/PhaseOrdering/X86/merge-functions2.ll Transforms/PhaseOrdering/X86/merge-functions3.ll Transforms/PhaseOrdering/X86/merge-functions.ll -Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll Transforms/PhaseOrdering/X86/pr52078.ll Transforms/PhaseOrdering/X86/pr67803.ll Transforms/PhaseOrdering/X86/preserve-access-group.ll -Transforms/PhaseOrdering/X86/simplifycfg-late.ll -Transforms/PhaseOrdering/X86/vdiv.ll Transforms/PhaseOrdering/X86/vector-reductions.ll Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll Transforms/PreISelIntrinsicLowering/AArch64/expand-log.ll @@ -1338,13 +1283,8 @@ Transforms/PreISelIntrinsicLowering/RISCV/memset-pattern.ll Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll -Transforms/Reassociate/basictest.ll -Transforms/SampleProfile/pseudo-probe-dangle.ll -Transforms/SampleProfile/pseudo-probe-emit.ll -Transforms/SampleProfile/pseudo-probe-profile.ll Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll Transforms/SampleProfile/remarks-hotness.ll -Transforms/SampleProfile/remarks.ll Transforms/SandboxVectorizer/special_opcodes.ll Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-store.ll @@ -1387,63 +1327,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll Transforms/SimpleLoopUnswitch/trivial-unswitch.ll Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll -Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll -Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll -Transforms/SimplifyCFG/annotations.ll -Transforms/SimplifyCFG/ARM/branch-fold-threshold.ll -Transforms/SimplifyCFG/ARM/phi-eliminate.ll -Transforms/SimplifyCFG/ARM/select-trunc-i64.ll -Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll -Transforms/SimplifyCFG/basictest.ll -Transforms/SimplifyCFG/branch-cond-dont-merge.ll -Transforms/SimplifyCFG/branch-fold-dbg.ll -Transforms/SimplifyCFG/branch-fold.ll -Transforms/SimplifyCFG/branch-fold-multiple.ll -Transforms/SimplifyCFG/branch-fold-threshold.ll -Transforms/SimplifyCFG/branch-nested.ll -Transforms/SimplifyCFG/clamp.ll -Transforms/SimplifyCFG/common-code-hoisting.ll -Transforms/SimplifyCFG/common-dest-folding.ll -Transforms/SimplifyCFG/extract-cost.ll -Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll -Transforms/SimplifyCFG/fold-branch-to-common-dest.ll -Transforms/SimplifyCFG/fold-branch-to-common-dest-two-preds-cost.ll -Transforms/SimplifyCFG/fold-debug-location.ll -Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll -Transforms/SimplifyCFG/hoist-dbgvalue.ll -Transforms/SimplifyCFG/indirectbr.ll -Transforms/SimplifyCFG/merge-cond-stores-2.ll -Transforms/SimplifyCFG/merge-cond-stores.ll -Transforms/SimplifyCFG/multiple-phis.ll -Transforms/SimplifyCFG/PhiBlockMerge.ll -Transforms/SimplifyCFG/pr48641.ll -Transforms/SimplifyCFG/preserve-store-alignment.ll -Transforms/SimplifyCFG/rangereduce.ll -Transforms/SimplifyCFG/RISCV/select-trunc-i64.ll -Transforms/SimplifyCFG/RISCV/switch_to_lookup_table-rv32.ll -Transforms/SimplifyCFG/RISCV/switch_to_lookup_table-rv64.ll -Transforms/SimplifyCFG/safe-abs.ll -Transforms/SimplifyCFG/SimplifyEqualityComparisonWithOnlyPredecessor-domtree-preservation-edgecase.ll -Transforms/SimplifyCFG/speculate-blocks.ll -Transforms/SimplifyCFG/speculate-derefable-load.ll -Transforms/SimplifyCFG/switch_create-custom-dl.ll -Transforms/SimplifyCFG/switch_create.ll -Transforms/SimplifyCFG/switch-dup-bbs.ll -Transforms/SimplifyCFG/switch_mask.ll -Transforms/SimplifyCFG/switch_msan.ll -Transforms/SimplifyCFG/switch-on-const-select.ll -Transforms/SimplifyCFG/switchToSelect-domtree-preservation-edgecase.ll -Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll -Transforms/SimplifyCFG/switch-to-select-two-case.ll -Transforms/SimplifyCFG/switch-transformations-no-lut.ll -Transforms/SimplifyCFG/wc-widen-block.ll -Transforms/SimplifyCFG/X86/disable-lookup-table.ll -Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll -Transforms/SimplifyCFG/X86/SpeculativeExec.ll -Transforms/SimplifyCFG/X86/switch-to-lookup-globals.ll -Transforms/SimplifyCFG/X86/switch-to-lookup-large-types.ll -Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll -Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll Transforms/SLPVectorizer/AArch64/gather-root.ll Transforms/SLPVectorizer/AArch64/horizontal.ll Transforms/SLPVectorizer/AArch64/loadi8.ll @@ -1471,7 +1354,6 @@ Transforms/SLPVectorizer/reduction-gather-non-scheduled-extracts.ll Transforms/SLPVectorizer/reorder-node.ll Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll Transforms/SLPVectorizer/revec.ll -Transforms/SLPVectorizer/RISCV/long-gep-chains.ll Transforms/SLPVectorizer/RISCV/remarks_cmp_sel_min_max.ll Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll Transforms/SLPVectorizer/RISCV/reordered-interleaved-loads.ll @@ -1556,4 +1438,3 @@ Transforms/Util/libcalls-opt-remarks.ll Transforms/Util/lowerswitch.ll Transforms/VectorCombine/AArch64/shuffletoidentity.ll Transforms/VectorCombine/X86/shuffle-of-selects.ll -Transforms/WholeProgramDevirt/unique-retval-same-vtable.ll |