diff options
author | Rahul Joshi <rjoshi@nvidia.com> | 2024-09-04 14:46:48 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-04 14:46:48 -0700 |
commit | dcf0160bd61d150e7b94067fcd991b466a361b08 (patch) | |
tree | c4f1706420e268d68157e75f6a362a34928f97f3 | |
parent | dd754cd262222bcb489038ac791e4278d90697f0 (diff) | |
download | llvm-dcf0160bd61d150e7b94067fcd991b466a361b08.zip llvm-dcf0160bd61d150e7b94067fcd991b466a361b08.tar.gz llvm-dcf0160bd61d150e7b94067fcd991b466a361b08.tar.bz2 |
[TableGen] Optimize intrinsic info type signature encoding (#106809)
Change the "fixed encoding" table used for encoding intrinsic
type signature to use 16-bit encoding as opposed to 32-bit.
This results in both space and time improvements. For space,
the total static storage size (in bytes) of this info reduces by 50%:
- Current = 14193*4 (Fixed table) + 16058 + 3 (Long Table) = 72833
- New size = 14193*2 (Fixed table) + 19879 + 3 (Long Table) = 48268.
- Reduction = 50.9%
For time, with the added benchmark, we see a 7.3% speedup in
`GetIntrinsicInfoTableEntries` benchmark. Actual output of the
benchmark in included in the GitHub MR.
-rw-r--r-- | llvm/benchmarks/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp | 30 | ||||
-rw-r--r-- | llvm/lib/IR/Function.cpp | 14 | ||||
-rw-r--r-- | llvm/utils/TableGen/IntrinsicEmitter.cpp | 80 |
4 files changed, 87 insertions, 38 deletions
diff --git a/llvm/benchmarks/CMakeLists.txt b/llvm/benchmarks/CMakeLists.txt index e3366e6..aa0cb77 100644 --- a/llvm/benchmarks/CMakeLists.txt +++ b/llvm/benchmarks/CMakeLists.txt @@ -6,3 +6,4 @@ add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED) add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED) +add_benchmark(GetIntrinsicInfoTableEntriesBM GetIntrinsicInfoTableEntriesBM.cpp PARTIAL_SOURCES_INTENDED) diff --git a/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp b/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp new file mode 100644 index 0000000..7f3bd3b --- /dev/null +++ b/llvm/benchmarks/GetIntrinsicInfoTableEntriesBM.cpp @@ -0,0 +1,30 @@ +//===- GetIntrinsicInfoTableEntries.cpp - IIT signature benchmark ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "benchmark/benchmark.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace Intrinsic;
+
+static void BM_GetIntrinsicInfoTableEntries(benchmark::State &state) {
+ SmallVector<IITDescriptor> Table;
+ for (auto _ : state) {
+ for (ID ID = 1; ID < num_intrinsics; ++ID) {
+ // This makes sure the vector does not keep growing, as well as after the
+ // first iteration does not result in additional allocations.
+ Table.clear();
+ getIntrinsicInfoTableEntries(ID, Table);
+ }
+ }
+}
+
+BENCHMARK(BM_GetIntrinsicInfoTableEntries);
+
+BENCHMARK_MAIN();
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 69520fd..afef893 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1381,22 +1381,24 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos, void Intrinsic::getIntrinsicInfoTableEntries(ID id, SmallVectorImpl<IITDescriptor> &T){ + static_assert(sizeof(IIT_Table[0]) == 2, + "Expect 16-bit entries in IIT_Table"); // Check to see if the intrinsic's type was expressible by the table. - unsigned TableVal = IIT_Table[id-1]; + uint16_t TableVal = IIT_Table[id - 1]; // Decode the TableVal into an array of IITValues. - SmallVector<unsigned char, 8> IITValues; + SmallVector<unsigned char> IITValues; ArrayRef<unsigned char> IITEntries; unsigned NextElt = 0; - if ((TableVal >> 31) != 0) { + if (TableVal >> 15) { // This is an offset into the IIT_LongEncodingTable. IITEntries = IIT_LongEncodingTable; // Strip sentinel bit. - NextElt = (TableVal << 1) >> 1; + NextElt = TableVal & 0x7fff; } else { - // Decode the TableVal into an array of IITValues. If the entry was encoded - // into a single word in the table itself, decode it now. + // If the entry was encoded into a single word in the table itself, decode + // it from an array of nibbles to an array of bytes. do { IITValues.push_back(TableVal & 0xF); TableVal >>= 4; diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 09eb1ed..0f4d7bf 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -282,11 +282,37 @@ static TypeSigTy ComputeTypeSignature(const CodeGenIntrinsic &Int) { return TypeSig; } +// Pack the type signature into 32-bit fixed encoding word. +static std::optional<uint32_t> encodePacked(const TypeSigTy &TypeSig) { + if (TypeSig.size() > 8) + return std::nullopt; + + uint32_t Result = 0; + for (unsigned char C : reverse(TypeSig)) { + if (C > 15) + return std::nullopt; + Result = (Result << 4) | C; + } + return Result; +} + void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints, raw_ostream &OS) { - // If we can compute a 32-bit fixed encoding for this intrinsic, do so and + // Note: the code below can be switched to use 32-bit fixed encoding by + // flipping the flag below. + constexpr bool Use16BitFixedEncoding = true; + using FixedEncodingTy = + std::conditional_t<Use16BitFixedEncoding, uint16_t, uint32_t>; + constexpr unsigned FixedEncodingBits = sizeof(FixedEncodingTy) * CHAR_BIT; + // Mask with all bits 1 except the most significant bit. + const unsigned Mask = (1U << (FixedEncodingBits - 1)) - 1; + const unsigned MSBPostion = FixedEncodingBits - 1; + StringRef FixedEncodingTypeName = + Use16BitFixedEncoding ? "uint16_t" : "uint32_t"; + + // If we can compute a 16/32-bit fixed encoding for this intrinsic, do so and // capture it in this vector, otherwise store a ~0U. - std::vector<unsigned> FixedEncodings; + std::vector<FixedEncodingTy> FixedEncodings; SequenceToOffsetTable<TypeSigTy> LongEncodingTable; FixedEncodings.reserve(Ints.size()); @@ -296,69 +322,59 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints, // Get the signature for the intrinsic. TypeSigTy TypeSig = ComputeTypeSignature(Int); - // Check to see if we can encode it into a 32-bit word. We can only encode - // 8 nibbles into a 32-bit word. - if (TypeSig.size() <= 8) { - // Attempt to pack elements of TypeSig into a 32-bit word, starting from - // the most significant nibble. - unsigned Result = 0; - bool Failed = false; - for (unsigned char C : reverse(TypeSig)) { - if (C > 15) { - Failed = true; - break; - } - Result = (Result << 4) | C; - } - - // If this could be encoded into a 31-bit word, return it. - if (!Failed && (Result >> 31) == 0) { - FixedEncodings.push_back(Result); - continue; - } + // Check to see if we can encode it into a 16/32 bit word. + std::optional<uint32_t> Result = encodePacked(TypeSig); + if (Result && (*Result & Mask) == Result) { + FixedEncodings.push_back(static_cast<FixedEncodingTy>(*Result)); + continue; } - // Otherwise, we're going to unique the sequence into the - // LongEncodingTable, and use its offset in the 32-bit table instead. LongEncodingTable.add(TypeSig); // This is a placehold that we'll replace after the table is laid out. - FixedEncodings.push_back(~0U); + FixedEncodings.push_back(static_cast<FixedEncodingTy>(~0U)); } LongEncodingTable.layout(); - OS << R"(// Global intrinsic function declaration type table. + OS << formatv(R"(// Global intrinsic function declaration type table. #ifdef GET_INTRINSIC_GENERATOR_GLOBAL -static constexpr unsigned IIT_Table[] = { - )"; +static constexpr {0} IIT_Table[] = {{ + )", + FixedEncodingTypeName); + unsigned MaxOffset = 0; for (auto [Idx, FixedEncoding, Int] : enumerate(FixedEncodings, Ints)) { if ((Idx & 7) == 7) OS << "\n "; // If the entry fit in the table, just emit it. - if (FixedEncoding != ~0U) { + if ((FixedEncoding & Mask) == FixedEncoding) { OS << "0x" << Twine::utohexstr(FixedEncoding) << ", "; continue; } TypeSigTy TypeSig = ComputeTypeSignature(Int); + unsigned Offset = LongEncodingTable.get(TypeSig); + MaxOffset = std::max(MaxOffset, Offset); // Otherwise, emit the offset into the long encoding table. We emit it this // way so that it is easier to read the offset in the .def file. - OS << "(1U<<31) | " << LongEncodingTable.get(TypeSig) << ", "; + OS << formatv("(1U<<{0}) | {1}, ", MSBPostion, Offset); } OS << "0\n};\n\n"; + // verify that all offsets will fit in 16/32 bits. + if ((MaxOffset & Mask) != MaxOffset) + PrintFatalError("Offset of long encoding table exceeds encoding bits"); + // Emit the shared table of register lists. OS << "static constexpr unsigned char IIT_LongEncodingTable[] = {\n"; if (!LongEncodingTable.empty()) LongEncodingTable.emit( OS, [](raw_ostream &OS, unsigned char C) { OS << (unsigned)C; }); - OS << " 255\n};\n\n"; - + OS << " 255\n};\n"; OS << "#endif\n\n"; // End of GET_INTRINSIC_GENERATOR_GLOBAL } |