aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Basic/Builtins.cpp
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2024-12-14 09:09:47 +0000
committerChandler Carruth <chandlerc@gmail.com>2025-02-04 18:04:57 +0000
commitcd269fee05a0f78fb53b65f701b4e06e9ddab424 (patch)
tree2a248752b2d85d3ae6bd73a7cf5091499f236006 /clang/lib/Basic/Builtins.cpp
parentf308af757d72412d0d1429f43d93dedcc87c49f0 (diff)
downloadllvm-cd269fee05a0f78fb53b65f701b4e06e9ddab424.zip
llvm-cd269fee05a0f78fb53b65f701b4e06e9ddab424.tar.gz
llvm-cd269fee05a0f78fb53b65f701b4e06e9ddab424.tar.bz2
[StrTable] Switch Clang builtins to use string tables
This both reapplies #118734, the initial attempt at this, and updates it significantly. First, it uses the newly added `StringTable` abstraction for string tables, and simplifies the construction to build the string table and info arrays separately. This should reduce any `constexpr` compile time memory or CPU cost of the original PR while significantly improving the APIs throughout. It also restructures the builtins to support sharding across several independent tables. This accomplishes two improvements from the original PR: 1) It improves the APIs used significantly. 2) When builtins are defined from different sources (like SVE vs MVE in AArch64), this allows each of them to build their own string table independently rather than having to merge the string tables and info structures. 3) It allows each shard to factor out a common prefix, often cutting the size of the strings needed for the builtins by a factor two. The second point is important both to allow different mechanisms of construction (for example a `.def` file and a tablegen'ed `.inc` file, or different tablegen'ed `.inc files), it also simply reduces the sizes of these tables which is valuable given how large they are in some cases. The third builds on that size reduction. Initially, we use this new sharding rather than merging tables in AArch64, LoongArch, RISCV, and X86. Mostly this helps ensure the system works, as without further changes these still push scaling limits. Subsequent commits will more deeply leverage the new structure, including using the prefix capabilities which cannot be easily factored out here and requires deep changes to the targets.
Diffstat (limited to 'clang/lib/Basic/Builtins.cpp')
-rw-r--r--clang/lib/Basic/Builtins.cpp184
1 files changed, 131 insertions, 53 deletions
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index 5881837..e5b0ff5 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -29,54 +29,124 @@ const char *HeaderDesc::getName() const {
llvm_unreachable("Unknown HeaderDesc::HeaderID enum");
}
-static constexpr Builtin::Info BuiltinInfo[] = {
- {"not a builtin function", nullptr, nullptr, nullptr, HeaderDesc::NO_HEADER,
- ALL_LANGUAGES},
-#define BUILTIN(ID, TYPE, ATTRS) \
- {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
-#define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \
- {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANGS},
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \
- {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, LANGS},
+static constexpr llvm::StringTable BuiltinStrings =
+ CLANG_BUILTIN_STR_TABLE_START
+ // We inject a non-builtin string into the table.
+ CLANG_BUILTIN_STR_TABLE("not a builtin function", "", "")
+#define BUILTIN CLANG_BUILTIN_STR_TABLE
#include "clang/Basic/Builtins.inc"
-};
+ ;
+static_assert(BuiltinStrings.size() < 100'000);
+
+static constexpr auto BuiltinInfos =
+ Builtin::MakeInfos<Builtin::FirstTSBuiltin>(
+ {CLANG_BUILTIN_ENTRY("not a builtin function", "", "")
+#define BUILTIN CLANG_BUILTIN_ENTRY
+#define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY
+#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
+#include "clang/Basic/Builtins.inc"
+ });
-const Builtin::Info &Builtin::Context::getRecord(unsigned ID) const {
- if (ID < Builtin::FirstTSBuiltin)
- return BuiltinInfo[ID];
- assert(((ID - Builtin::FirstTSBuiltin) <
- (TSRecords.size() + AuxTSRecords.size())) &&
+std::pair<const Builtin::InfosShard &, const Builtin::Info &>
+Builtin::Context::getShardAndInfo(unsigned ID) const {
+ assert((ID < (Builtin::FirstTSBuiltin + NumTargetBuiltins +
+ NumAuxTargetBuiltins)) &&
"Invalid builtin ID!");
- if (isAuxBuiltinID(ID))
- return AuxTSRecords[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin];
- return TSRecords[ID - Builtin::FirstTSBuiltin];
+
+ ArrayRef<InfosShard> Shards = BuiltinShards;
+ if (isAuxBuiltinID(ID)) {
+ Shards = AuxTargetShards;
+ ID = getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin;
+ } else if (ID >= Builtin::FirstTSBuiltin) {
+ Shards = TargetShards;
+ ID -= Builtin::FirstTSBuiltin;
+ }
+
+ // Loop over the shards to find the one matching this ID. We don't expect to
+ // have many shards and so its better to search linearly than with a binary
+ // search.
+ for (const auto &Shard : Shards) {
+ if (ID < Shard.Infos.size()) {
+ return {Shard, Shard.Infos[ID]};
+ }
+
+ ID -= Shard.Infos.size();
+ }
+ llvm_unreachable("Invalid target builtin shard structure!");
+}
+
+std::string Builtin::Info::getName(const Builtin::InfosShard &Shard) const {
+ return (Twine(Shard.NamePrefix) + (*Shard.Strings)[Offsets.Name]).str();
}
+/// Return the identifier name for the specified builtin,
+/// e.g. "__builtin_abs".
+std::string Builtin::Context::getName(unsigned ID) const {
+ const auto &[Shard, I] = getShardAndInfo(ID);
+ return I.getName(Shard);
+}
+
+std::string Builtin::Context::getQuotedName(unsigned ID) const {
+ const auto &[Shard, I] = getShardAndInfo(ID);
+ return (Twine("'") + Shard.NamePrefix + (*Shard.Strings)[I.Offsets.Name] +
+ "'")
+ .str();
+}
+
+const char *Builtin::Context::getTypeString(unsigned ID) const {
+ const auto &[Shard, I] = getShardAndInfo(ID);
+ return (*Shard.Strings)[I.Offsets.Type].data();
+}
+
+const char *Builtin::Context::getAttributesString(unsigned ID) const {
+ const auto &[Shard, I] = getShardAndInfo(ID);
+ return (*Shard.Strings)[I.Offsets.Attributes].data();
+}
+
+const char *Builtin::Context::getRequiredFeatures(unsigned ID) const {
+ const auto &[Shard, I] = getShardAndInfo(ID);
+ return (*Shard.Strings)[I.Offsets.Features].data();
+}
+
+Builtin::Context::Context() : BuiltinShards{{&BuiltinStrings, BuiltinInfos}} {}
+
void Builtin::Context::InitializeTarget(const TargetInfo &Target,
const TargetInfo *AuxTarget) {
- assert(TSRecords.empty() && "Already initialized target?");
- TSRecords = Target.getTargetBuiltins();
- if (AuxTarget)
- AuxTSRecords = AuxTarget->getTargetBuiltins();
+ assert(TargetShards.empty() && "Already initialized target?");
+ assert(NumTargetBuiltins == 0 && "Already initialized target?");
+ TargetShards = Target.getTargetBuiltins();
+ for (const auto &Shard : TargetShards)
+ NumTargetBuiltins += Shard.Infos.size();
+ if (AuxTarget) {
+ AuxTargetShards = AuxTarget->getTargetBuiltins();
+ for (const auto &Shard : AuxTargetShards)
+ NumAuxTargetBuiltins += Shard.Infos.size();
+ }
}
bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) {
bool InStdNamespace = FuncName.consume_front("std-");
- for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin;
- ++i) {
- if (FuncName == BuiltinInfo[i].Name &&
- (bool)strchr(BuiltinInfo[i].Attributes, 'z') == InStdNamespace)
- return strchr(BuiltinInfo[i].Attributes, 'f') != nullptr;
- }
+ for (const auto &Shard : {InfosShard{&BuiltinStrings, BuiltinInfos}})
+ if (llvm::StringRef FuncNameSuffix = FuncName;
+ FuncNameSuffix.consume_front(Shard.NamePrefix))
+ for (const auto &I : Shard.Infos)
+ if (FuncNameSuffix == (*Shard.Strings)[I.Offsets.Name] &&
+ (bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') ==
+ InStdNamespace)
+ return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'f') !=
+ nullptr;
return false;
}
/// Is this builtin supported according to the given language options?
-static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
+static bool builtinIsSupported(const llvm::StringTable &Strings,
+ const Builtin::Info &BuiltinInfo,
const LangOptions &LangOpts) {
+ auto AttributesStr = Strings[BuiltinInfo.Offsets.Attributes];
+
/* Builtins Unsupported */
- if (LangOpts.NoBuiltin && strchr(BuiltinInfo.Attributes, 'f') != nullptr)
+ if (LangOpts.NoBuiltin && strchr(AttributesStr.data(), 'f') != nullptr)
return false;
/* CorBuiltins Unsupported */
if (!LangOpts.Coroutines && (BuiltinInfo.Langs & COR_LANG))
@@ -123,7 +193,7 @@ static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
if (!LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG)
return false;
/* consteval Unsupported */
- if (!LangOpts.CPlusPlus20 && strchr(BuiltinInfo.Attributes, 'G') != nullptr)
+ if (!LangOpts.CPlusPlus20 && strchr(AttributesStr.data(), 'G') != nullptr)
return false;
return true;
}
@@ -132,22 +202,34 @@ static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
/// appropriate builtin ID # and mark any non-portable builtin identifiers as
/// such.
void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
- const LangOptions& LangOpts) {
- // Step #1: mark all target-independent builtins with their ID's.
- for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin; ++i)
- if (builtinIsSupported(BuiltinInfo[i], LangOpts)) {
- Table.get(BuiltinInfo[i].Name).setBuiltinID(i);
- }
-
- // Step #2: Register target-specific builtins.
- for (unsigned i = 0, e = TSRecords.size(); i != e; ++i)
- if (builtinIsSupported(TSRecords[i], LangOpts))
- Table.get(TSRecords[i].Name).setBuiltinID(i + Builtin::FirstTSBuiltin);
+ const LangOptions &LangOpts) {
+ {
+ unsigned ID = 0;
+ // Step #1: mark all target-independent builtins with their ID's.
+ for (const auto &Shard : BuiltinShards)
+ for (const auto &I : Shard.Infos) {
+ // If this is a real builtin (ID != 0) and is supported, add it.
+ if (ID != 0 && builtinIsSupported(*Shard.Strings, I, LangOpts))
+ Table.get(I.getName(Shard)).setBuiltinID(ID);
+ ++ID;
+ }
+ assert(ID == FirstTSBuiltin && "Should have added all non-target IDs!");
+
+ // Step #2: Register target-specific builtins.
+ for (const auto &Shard : TargetShards)
+ for (const auto &I : Shard.Infos) {
+ if (builtinIsSupported(*Shard.Strings, I, LangOpts))
+ Table.get(I.getName(Shard)).setBuiltinID(ID);
+ ++ID;
+ }
- // Step #3: Register target-specific builtins for AuxTarget.
- for (unsigned i = 0, e = AuxTSRecords.size(); i != e; ++i)
- Table.get(AuxTSRecords[i].Name)
- .setBuiltinID(i + Builtin::FirstTSBuiltin + TSRecords.size());
+ // Step #3: Register target-specific builtins for AuxTarget.
+ for (const auto &Shard : AuxTargetShards)
+ for (const auto &I : Shard.Infos) {
+ Table.get(I.getName(Shard)).setBuiltinID(ID);
+ ++ID;
+ }
+ }
// Step #4: Unregister any builtins specified by -fno-builtin-foo.
for (llvm::StringRef Name : LangOpts.NoBuiltinFuncs) {
@@ -163,12 +245,8 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
}
}
-std::string Builtin::Context::getQuotedName(unsigned ID) const {
- return (llvm::Twine("'") + getName(ID) + "'").str();
-}
-
unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const {
- const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V');
+ const char *WidthPos = ::strchr(getAttributesString(ID), 'V');
if (!WidthPos)
return 0;
@@ -191,7 +269,7 @@ bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx,
assert(::toupper(Fmt[0]) == Fmt[1] &&
"Format string is not in the form \"xX\"");
- const char *Like = ::strpbrk(getRecord(ID).Attributes, Fmt);
+ const char *Like = ::strpbrk(getAttributesString(ID), Fmt);
if (!Like)
return false;
@@ -218,7 +296,7 @@ bool Builtin::Context::isScanfLike(unsigned ID, unsigned &FormatIdx,
bool Builtin::Context::performsCallback(unsigned ID,
SmallVectorImpl<int> &Encoding) const {
- const char *CalleePos = ::strchr(getRecord(ID).Attributes, 'C');
+ const char *CalleePos = ::strchr(getAttributesString(ID), 'C');
if (!CalleePos)
return false;