diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2024-12-14 09:09:47 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2025-02-04 18:04:57 +0000 |
commit | cd269fee05a0f78fb53b65f701b4e06e9ddab424 (patch) | |
tree | 2a248752b2d85d3ae6bd73a7cf5091499f236006 /clang/lib/Basic/Builtins.cpp | |
parent | f308af757d72412d0d1429f43d93dedcc87c49f0 (diff) | |
download | llvm-cd269fee05a0f78fb53b65f701b4e06e9ddab424.zip llvm-cd269fee05a0f78fb53b65f701b4e06e9ddab424.tar.gz llvm-cd269fee05a0f78fb53b65f701b4e06e9ddab424.tar.bz2 |
[StrTable] Switch Clang builtins to use string tables
This both reapplies #118734, the initial attempt at this, and updates it
significantly.
First, it uses the newly added `StringTable` abstraction for string
tables, and simplifies the construction to build the string table and
info arrays separately. This should reduce any `constexpr` compile time
memory or CPU cost of the original PR while significantly improving the
APIs throughout.
It also restructures the builtins to support sharding across several
independent tables. This accomplishes two improvements from the
original PR:
1) It improves the APIs used significantly.
2) When builtins are defined from different sources (like SVE vs MVE in
AArch64), this allows each of them to build their own string table
independently rather than having to merge the string tables and info
structures.
3) It allows each shard to factor out a common prefix, often cutting the
size of the strings needed for the builtins by a factor two.
The second point is important both to allow different mechanisms of
construction (for example a `.def` file and a tablegen'ed `.inc` file,
or different tablegen'ed `.inc files), it also simply reduces the sizes
of these tables which is valuable given how large they are in some
cases. The third builds on that size reduction.
Initially, we use this new sharding rather than merging tables in
AArch64, LoongArch, RISCV, and X86. Mostly this helps ensure the system
works, as without further changes these still push scaling limits.
Subsequent commits will more deeply leverage the new structure,
including using the prefix capabilities which cannot be easily factored
out here and requires deep changes to the targets.
Diffstat (limited to 'clang/lib/Basic/Builtins.cpp')
-rw-r--r-- | clang/lib/Basic/Builtins.cpp | 184 |
1 files changed, 131 insertions, 53 deletions
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp index 5881837..e5b0ff5 100644 --- a/clang/lib/Basic/Builtins.cpp +++ b/clang/lib/Basic/Builtins.cpp @@ -29,54 +29,124 @@ const char *HeaderDesc::getName() const { llvm_unreachable("Unknown HeaderDesc::HeaderID enum"); } -static constexpr Builtin::Info BuiltinInfo[] = { - {"not a builtin function", nullptr, nullptr, nullptr, HeaderDesc::NO_HEADER, - ALL_LANGUAGES}, -#define BUILTIN(ID, TYPE, ATTRS) \ - {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, -#define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \ - {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANGS}, -#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \ - {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, LANGS}, +static constexpr llvm::StringTable BuiltinStrings = + CLANG_BUILTIN_STR_TABLE_START + // We inject a non-builtin string into the table. + CLANG_BUILTIN_STR_TABLE("not a builtin function", "", "") +#define BUILTIN CLANG_BUILTIN_STR_TABLE #include "clang/Basic/Builtins.inc" -}; + ; +static_assert(BuiltinStrings.size() < 100'000); + +static constexpr auto BuiltinInfos = + Builtin::MakeInfos<Builtin::FirstTSBuiltin>( + {CLANG_BUILTIN_ENTRY("not a builtin function", "", "") +#define BUILTIN CLANG_BUILTIN_ENTRY +#define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY +#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY +#include "clang/Basic/Builtins.inc" + }); -const Builtin::Info &Builtin::Context::getRecord(unsigned ID) const { - if (ID < Builtin::FirstTSBuiltin) - return BuiltinInfo[ID]; - assert(((ID - Builtin::FirstTSBuiltin) < - (TSRecords.size() + AuxTSRecords.size())) && +std::pair<const Builtin::InfosShard &, const Builtin::Info &> +Builtin::Context::getShardAndInfo(unsigned ID) const { + assert((ID < (Builtin::FirstTSBuiltin + NumTargetBuiltins + + NumAuxTargetBuiltins)) && "Invalid builtin ID!"); - if (isAuxBuiltinID(ID)) - return AuxTSRecords[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin]; - return TSRecords[ID - Builtin::FirstTSBuiltin]; + + ArrayRef<InfosShard> Shards = BuiltinShards; + if (isAuxBuiltinID(ID)) { + Shards = AuxTargetShards; + ID = getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin; + } else if (ID >= Builtin::FirstTSBuiltin) { + Shards = TargetShards; + ID -= Builtin::FirstTSBuiltin; + } + + // Loop over the shards to find the one matching this ID. We don't expect to + // have many shards and so its better to search linearly than with a binary + // search. + for (const auto &Shard : Shards) { + if (ID < Shard.Infos.size()) { + return {Shard, Shard.Infos[ID]}; + } + + ID -= Shard.Infos.size(); + } + llvm_unreachable("Invalid target builtin shard structure!"); +} + +std::string Builtin::Info::getName(const Builtin::InfosShard &Shard) const { + return (Twine(Shard.NamePrefix) + (*Shard.Strings)[Offsets.Name]).str(); } +/// Return the identifier name for the specified builtin, +/// e.g. "__builtin_abs". +std::string Builtin::Context::getName(unsigned ID) const { + const auto &[Shard, I] = getShardAndInfo(ID); + return I.getName(Shard); +} + +std::string Builtin::Context::getQuotedName(unsigned ID) const { + const auto &[Shard, I] = getShardAndInfo(ID); + return (Twine("'") + Shard.NamePrefix + (*Shard.Strings)[I.Offsets.Name] + + "'") + .str(); +} + +const char *Builtin::Context::getTypeString(unsigned ID) const { + const auto &[Shard, I] = getShardAndInfo(ID); + return (*Shard.Strings)[I.Offsets.Type].data(); +} + +const char *Builtin::Context::getAttributesString(unsigned ID) const { + const auto &[Shard, I] = getShardAndInfo(ID); + return (*Shard.Strings)[I.Offsets.Attributes].data(); +} + +const char *Builtin::Context::getRequiredFeatures(unsigned ID) const { + const auto &[Shard, I] = getShardAndInfo(ID); + return (*Shard.Strings)[I.Offsets.Features].data(); +} + +Builtin::Context::Context() : BuiltinShards{{&BuiltinStrings, BuiltinInfos}} {} + void Builtin::Context::InitializeTarget(const TargetInfo &Target, const TargetInfo *AuxTarget) { - assert(TSRecords.empty() && "Already initialized target?"); - TSRecords = Target.getTargetBuiltins(); - if (AuxTarget) - AuxTSRecords = AuxTarget->getTargetBuiltins(); + assert(TargetShards.empty() && "Already initialized target?"); + assert(NumTargetBuiltins == 0 && "Already initialized target?"); + TargetShards = Target.getTargetBuiltins(); + for (const auto &Shard : TargetShards) + NumTargetBuiltins += Shard.Infos.size(); + if (AuxTarget) { + AuxTargetShards = AuxTarget->getTargetBuiltins(); + for (const auto &Shard : AuxTargetShards) + NumAuxTargetBuiltins += Shard.Infos.size(); + } } bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) { bool InStdNamespace = FuncName.consume_front("std-"); - for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin; - ++i) { - if (FuncName == BuiltinInfo[i].Name && - (bool)strchr(BuiltinInfo[i].Attributes, 'z') == InStdNamespace) - return strchr(BuiltinInfo[i].Attributes, 'f') != nullptr; - } + for (const auto &Shard : {InfosShard{&BuiltinStrings, BuiltinInfos}}) + if (llvm::StringRef FuncNameSuffix = FuncName; + FuncNameSuffix.consume_front(Shard.NamePrefix)) + for (const auto &I : Shard.Infos) + if (FuncNameSuffix == (*Shard.Strings)[I.Offsets.Name] && + (bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') == + InStdNamespace) + return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'f') != + nullptr; return false; } /// Is this builtin supported according to the given language options? -static bool builtinIsSupported(const Builtin::Info &BuiltinInfo, +static bool builtinIsSupported(const llvm::StringTable &Strings, + const Builtin::Info &BuiltinInfo, const LangOptions &LangOpts) { + auto AttributesStr = Strings[BuiltinInfo.Offsets.Attributes]; + /* Builtins Unsupported */ - if (LangOpts.NoBuiltin && strchr(BuiltinInfo.Attributes, 'f') != nullptr) + if (LangOpts.NoBuiltin && strchr(AttributesStr.data(), 'f') != nullptr) return false; /* CorBuiltins Unsupported */ if (!LangOpts.Coroutines && (BuiltinInfo.Langs & COR_LANG)) @@ -123,7 +193,7 @@ static bool builtinIsSupported(const Builtin::Info &BuiltinInfo, if (!LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG) return false; /* consteval Unsupported */ - if (!LangOpts.CPlusPlus20 && strchr(BuiltinInfo.Attributes, 'G') != nullptr) + if (!LangOpts.CPlusPlus20 && strchr(AttributesStr.data(), 'G') != nullptr) return false; return true; } @@ -132,22 +202,34 @@ static bool builtinIsSupported(const Builtin::Info &BuiltinInfo, /// appropriate builtin ID # and mark any non-portable builtin identifiers as /// such. void Builtin::Context::initializeBuiltins(IdentifierTable &Table, - const LangOptions& LangOpts) { - // Step #1: mark all target-independent builtins with their ID's. - for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin; ++i) - if (builtinIsSupported(BuiltinInfo[i], LangOpts)) { - Table.get(BuiltinInfo[i].Name).setBuiltinID(i); - } - - // Step #2: Register target-specific builtins. - for (unsigned i = 0, e = TSRecords.size(); i != e; ++i) - if (builtinIsSupported(TSRecords[i], LangOpts)) - Table.get(TSRecords[i].Name).setBuiltinID(i + Builtin::FirstTSBuiltin); + const LangOptions &LangOpts) { + { + unsigned ID = 0; + // Step #1: mark all target-independent builtins with their ID's. + for (const auto &Shard : BuiltinShards) + for (const auto &I : Shard.Infos) { + // If this is a real builtin (ID != 0) and is supported, add it. + if (ID != 0 && builtinIsSupported(*Shard.Strings, I, LangOpts)) + Table.get(I.getName(Shard)).setBuiltinID(ID); + ++ID; + } + assert(ID == FirstTSBuiltin && "Should have added all non-target IDs!"); + + // Step #2: Register target-specific builtins. + for (const auto &Shard : TargetShards) + for (const auto &I : Shard.Infos) { + if (builtinIsSupported(*Shard.Strings, I, LangOpts)) + Table.get(I.getName(Shard)).setBuiltinID(ID); + ++ID; + } - // Step #3: Register target-specific builtins for AuxTarget. - for (unsigned i = 0, e = AuxTSRecords.size(); i != e; ++i) - Table.get(AuxTSRecords[i].Name) - .setBuiltinID(i + Builtin::FirstTSBuiltin + TSRecords.size()); + // Step #3: Register target-specific builtins for AuxTarget. + for (const auto &Shard : AuxTargetShards) + for (const auto &I : Shard.Infos) { + Table.get(I.getName(Shard)).setBuiltinID(ID); + ++ID; + } + } // Step #4: Unregister any builtins specified by -fno-builtin-foo. for (llvm::StringRef Name : LangOpts.NoBuiltinFuncs) { @@ -163,12 +245,8 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table, } } -std::string Builtin::Context::getQuotedName(unsigned ID) const { - return (llvm::Twine("'") + getName(ID) + "'").str(); -} - unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const { - const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V'); + const char *WidthPos = ::strchr(getAttributesString(ID), 'V'); if (!WidthPos) return 0; @@ -191,7 +269,7 @@ bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx, assert(::toupper(Fmt[0]) == Fmt[1] && "Format string is not in the form \"xX\""); - const char *Like = ::strpbrk(getRecord(ID).Attributes, Fmt); + const char *Like = ::strpbrk(getAttributesString(ID), Fmt); if (!Like) return false; @@ -218,7 +296,7 @@ bool Builtin::Context::isScanfLike(unsigned ID, unsigned &FormatIdx, bool Builtin::Context::performsCallback(unsigned ID, SmallVectorImpl<int> &Encoding) const { - const char *CalleePos = ::strchr(getRecord(ID).Attributes, 'C'); + const char *CalleePos = ::strchr(getAttributesString(ID), 'C'); if (!CalleePos) return false; |