diff options
author | Reid Kleckner <rnk@google.com> | 2025-04-13 17:58:53 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-13 17:58:53 +0200 |
commit | 0a27c4e318e778b520306a9e2102e03023cfaa33 (patch) | |
tree | 14915c7f218ff0a17f49c184c677322054f69f6f | |
parent | 30ae47eeefaeb2c78ae7f234621b8bb0444b7844 (diff) | |
download | llvm-0a27c4e318e778b520306a9e2102e03023cfaa33.zip llvm-0a27c4e318e778b520306a9e2102e03023cfaa33.tar.gz llvm-0a27c4e318e778b520306a9e2102e03023cfaa33.tar.bz2 |
[StrTable] Use string literal emission for intrinsics on non-MSVC platforms (#124856)
This mainly transitions the LLVM intrinsic string table from character
emission to string literal emission, which I confirmed happens for me
locally.
I moved the guts of StringToOffsetTable to a cpp file so I could move
the `EmitLongStrLiterals` cl::opt global to a non-vague linkage home in
the `TableGen` library. I had to add missing FormatVariadic.h includes
to account for moving other includes to a cpp file.
-rw-r--r-- | llvm/cmake/modules/TableGen.cmake | 4 | ||||
-rw-r--r-- | llvm/include/llvm/TableGen/Main.h | 5 | ||||
-rw-r--r-- | llvm/include/llvm/TableGen/StringToOffsetTable.h | 104 | ||||
-rw-r--r-- | llvm/lib/TableGen/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/TableGen/Main.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/TableGen/StringToOffsetTable.cpp | 120 | ||||
-rw-r--r-- | llvm/utils/TableGen/AsmMatcherEmitter.cpp | 1 | ||||
-rw-r--r-- | llvm/utils/TableGen/Basic/SequenceToOffsetTable.h | 3 | ||||
-rw-r--r-- | llvm/utils/TableGen/Basic/TableGen.cpp | 9 | ||||
-rw-r--r-- | llvm/utils/TableGen/SDNodeInfoEmitter.cpp | 1 | ||||
-rw-r--r-- | llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn | 1 |
11 files changed, 145 insertions, 113 deletions
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index ffcc718..bf914c3 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -68,7 +68,9 @@ function(tablegen project ofn) # char literals, instead. If we're cross-compiling, then conservatively assume # that the source might be consumed by MSVC. # [1] https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017 - if (MSVC AND project STREQUAL LLVM) + # Don't pass this flag to mlir-src-sharder, since it doesn't support the + # flag, and it doesn't need it. + if (MSVC AND NOT "${project}" STREQUAL "MLIR_SRC_SHARDER") list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0") endif() if (CMAKE_GENERATOR MATCHES "Visual Studio") diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h index e8c60e2..5f68be1 100644 --- a/llvm/include/llvm/TableGen/Main.h +++ b/llvm/include/llvm/TableGen/Main.h @@ -13,6 +13,7 @@ #ifndef LLVM_TABLEGEN_MAIN_H #define LLVM_TABLEGEN_MAIN_H +#include "llvm/Support/CommandLine.h" #include <functional> namespace llvm { @@ -27,6 +28,10 @@ using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records); int TableGenMain(const char *argv0, std::function<TableGenMainFn> MainFn = nullptr); +/// Controls emitting large character arrays as strings or character arrays. +/// Typically set to false when building with MSVC. +extern cl::opt<bool> EmitLongStrLiterals; + } // end namespace llvm #endif // LLVM_TABLEGEN_MAIN_H diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h index e716411..2179564 100644 --- a/llvm/include/llvm/TableGen/StringToOffsetTable.h +++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h @@ -12,8 +12,6 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" #include <optional> namespace llvm { @@ -36,17 +34,7 @@ public: bool empty() const { return StringOffset.empty(); } size_t size() const { return AggregateString.size(); } - unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) { - auto [II, Inserted] = StringOffset.insert({Str, size()}); - if (Inserted) { - // Add the string to the aggregate if this is the first time found. - AggregateString.append(Str.begin(), Str.end()); - if (appendZero) - AggregateString += '\0'; - } - - return II->second; - } + unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true); // Returns the offset of `Str` in the table if its preset, else return // std::nullopt. @@ -69,96 +57,10 @@ public: // `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be // valid identifiers to declare. void EmitStringTableDef(raw_ostream &OS, const Twine &Name, - const Twine &Indent = "") const { - OS << formatv(R"( -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Woverlength-strings" -#endif -{0}static constexpr char {1}Storage[] = )", - Indent, Name); - - // MSVC silently miscompiles string literals longer than 64k in some - // circumstances. When the string table is longer, emit it as an array of - // character literals. - bool UseChars = AggregateString.size() > (64 * 1024); - OS << (UseChars ? "{\n" : "\n"); - - llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n"); - llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0')); - // We should always have an empty string at the start, and because these are - // null terminators rather than separators, we'll have one at the end as - // well. Skip the end one. - assert(Strings.front().empty() && "Expected empty initial string!"); - assert(Strings.back().empty() && - "Expected empty string at the end due to terminators!"); - Strings.pop_back(); - for (StringRef Str : Strings) { - OS << LineSep << Indent << " "; - // If we can, just emit this as a string literal to be concatenated. - if (!UseChars) { - OS << "\""; - OS.write_escaped(Str); - OS << "\\0\""; - continue; - } - - llvm::ListSeparator CharSep(", "); - for (char C : Str) { - OS << CharSep << "'"; - OS.write_escaped(StringRef(&C, 1)); - OS << "'"; - } - OS << CharSep << "'\\0'"; - } - OS << LineSep << Indent << (UseChars ? "};" : " ;"); - - OS << formatv(R"( -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - -{0}static constexpr llvm::StringTable {1} = -{0} {1}Storage; -)", - Indent, Name); - } + const Twine &Indent = "") const; // Emit the string as one single string. - void EmitString(raw_ostream &O) const { - // Escape the string. - SmallString<256> EscapedStr; - raw_svector_ostream(EscapedStr).write_escaped(AggregateString); - - O << " \""; - unsigned CharsPrinted = 0; - for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { - if (CharsPrinted > 70) { - O << "\"\n \""; - CharsPrinted = 0; - } - O << EscapedStr[i]; - ++CharsPrinted; - - // Print escape sequences all together. - if (EscapedStr[i] != '\\') - continue; - - assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!"); - if (isDigit(EscapedStr[i + 1])) { - assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && - "Expected 3 digit octal escape!"); - O << EscapedStr[++i]; - O << EscapedStr[++i]; - O << EscapedStr[++i]; - CharsPrinted += 3; - } else { - O << EscapedStr[++i]; - ++CharsPrinted; - } - } - O << "\""; - } + void EmitString(raw_ostream &O) const; }; } // end namespace llvm diff --git a/llvm/lib/TableGen/CMakeLists.txt b/llvm/lib/TableGen/CMakeLists.txt index 84815c7..0f9284c 100644 --- a/llvm/lib/TableGen/CMakeLists.txt +++ b/llvm/lib/TableGen/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMTableGen Record.cpp SetTheory.cpp StringMatcher.cpp + StringToOffsetTable.cpp TableGenBackend.cpp TableGenBackendSkeleton.cpp TGLexer.cpp diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 35600bf..ea71621 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -64,6 +64,15 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); static cl::opt<bool> TimePhases("time-phases", cl::desc("Time phases of parser and backend")); +namespace llvm { +cl::opt<bool> EmitLongStrLiterals( + "long-string-literals", + cl::desc("when emitting large string tables, prefer string literals over " + "comma-separated char literals. This can be a readability and " + "compile-time performance win, but upsets some compilers"), + cl::Hidden, cl::init(true)); +} // end namespace llvm + static cl::opt<bool> NoWarnOnUnusedTemplateArgs( "no-warn-on-unused-template-args", cl::desc("Disable unused template argument warnings.")); diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp new file mode 100644 index 0000000..d73b574 --- /dev/null +++ b/llvm/lib/TableGen/StringToOffsetTable.cpp @@ -0,0 +1,120 @@ +//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/StringToOffsetTable.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Main.h" + +using namespace llvm; + +unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str, + bool appendZero) { + auto [II, Inserted] = StringOffset.insert({Str, size()}); + if (Inserted) { + // Add the string to the aggregate if this is the first time found. + AggregateString.append(Str.begin(), Str.end()); + if (appendZero) + AggregateString += '\0'; + } + + return II->second; +} + +void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name, + const Twine &Indent) const { + OS << formatv(R"( +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Woverlength-strings" +#endif +{0}static constexpr char {1}Storage[] = )", + Indent, Name); + + // MSVC silently miscompiles string literals longer than 64k in some + // circumstances. The build system sets EmitLongStrLiterals to false when it + // detects that it is targetting MSVC. When that option is false and the + // string table is longer than 64k, emit it as an array of character + // literals. + bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024); + OS << (UseChars ? "{\n" : "\n"); + + llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n"); + llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0')); + // We should always have an empty string at the start, and because these are + // null terminators rather than separators, we'll have one at the end as + // well. Skip the end one. + assert(Strings.front().empty() && "Expected empty initial string!"); + assert(Strings.back().empty() && + "Expected empty string at the end due to terminators!"); + Strings.pop_back(); + for (StringRef Str : Strings) { + OS << LineSep << Indent << " "; + // If we can, just emit this as a string literal to be concatenated. + if (!UseChars) { + OS << "\""; + OS.write_escaped(Str); + OS << "\\0\""; + continue; + } + + llvm::ListSeparator CharSep(", "); + for (char C : Str) { + OS << CharSep << "'"; + OS.write_escaped(StringRef(&C, 1)); + OS << "'"; + } + OS << CharSep << "'\\0'"; + } + OS << LineSep << Indent << (UseChars ? "};" : " ;"); + + OS << formatv(R"( +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +{0}static constexpr llvm::StringTable {1} = +{0} {1}Storage; +)", + Indent, Name); +} + +void StringToOffsetTable::EmitString(raw_ostream &O) const { + // Escape the string. + SmallString<256> EscapedStr; + raw_svector_ostream(EscapedStr).write_escaped(AggregateString); + + O << " \""; + unsigned CharsPrinted = 0; + for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { + if (CharsPrinted > 70) { + O << "\"\n \""; + CharsPrinted = 0; + } + O << EscapedStr[i]; + ++CharsPrinted; + + // Print escape sequences all together. + if (EscapedStr[i] != '\\') + continue; + + assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!"); + if (isDigit(EscapedStr[i + 1])) { + assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && + "Expected 3 digit octal escape!"); + O << EscapedStr[++i]; + O << EscapedStr[++i]; + O << EscapedStr[++i]; + CharsPrinted += 3; + } else { + O << EscapedStr[++i]; + ++CharsPrinted; + } + } + O << "\""; +} diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 24822c8..c954163 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -110,6 +110,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringMatcher.h" diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h index 35a9abd..8da6fbe 100644 --- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h +++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h @@ -15,15 +15,14 @@ #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Main.h" #include <algorithm> #include <cassert> #include <functional> #include <map> namespace llvm { -extern cl::opt<bool> EmitLongStrLiterals; inline void printChar(raw_ostream &OS, char C) { unsigned char UC(C); diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp index 80ac93f..edb7791 100644 --- a/llvm/utils/TableGen/Basic/TableGen.cpp +++ b/llvm/utils/TableGen/Basic/TableGen.cpp @@ -26,15 +26,6 @@ using namespace llvm; -namespace llvm { -cl::opt<bool> EmitLongStrLiterals( - "long-string-literals", - cl::desc("when emitting large string tables, prefer string literals over " - "comma-separated char literals. This can be a readability and " - "compile-time performance win, but upsets some compilers"), - cl::Hidden, cl::init(true)); -} // end namespace llvm - static cl::OptionCategory PrintEnumsCat("Options for -print-enums"); static cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"), diff --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp index 63ee0de..64f03da 100644 --- a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp +++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp @@ -9,6 +9,7 @@ #include "Basic/SequenceToOffsetTable.h" #include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo. #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" diff --git a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn index d90df7b..b40fdf1 100644 --- a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn @@ -10,6 +10,7 @@ static_library("TableGen") { "Record.cpp", "SetTheory.cpp", "StringMatcher.cpp", + "StringToOffsetTable.cpp", "TGLexer.cpp", "TGParser.cpp", "TGTimer.cpp", |