diff options
| author | Kyungwoo Lee <kyulee@meta.com> | 2024-09-09 19:38:05 -0700 |
|---|---|---|
| committer | Kyungwoo Lee <kyulee@meta.com> | 2024-10-27 00:13:00 -0700 |
| commit | c7913f9fff736da4cc6a78a17e41dc539bc75e8a (patch) | |
| tree | b23e15800d3e8119b4385dfe95896fccba57623e | |
| parent | 8e10ed3b27b0f0098782171bb38387e86536be5f (diff) | |
| download | llvm-upstream/users/kyulee-com/cgdata.tar.gz llvm-upstream/users/kyulee-com/cgdata.tar.bz2 llvm-upstream/users/kyulee-com/cgdata.zip | |
[CGData][llvm-cgdata] Support for stable function mapupstream/users/kyulee-com/cgdata
This introduces a new cgdata format for stable function maps.
The raw data is embedded in the __llvm_merge section during compile time.
This data can be read and merged using the llvm-cgdata tool, into an indexed cgdata file. Consequently, the tool is now capable of handling either outlined hash trees, stable function maps, or both, as they are orthogonal.
21 files changed, 577 insertions, 87 deletions
diff --git a/lld/test/MachO/cgdata-generate.s b/lld/test/MachO/cgdata-generate.s index 174df39d666c..f942ae07f64e 100644 --- a/lld/test/MachO/cgdata-generate.s +++ b/lld/test/MachO/cgdata-generate.s @@ -3,12 +3,12 @@ # RUN: rm -rf %t; split-file %s %t -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. # RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -# RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt +# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-1-bytes.txt # RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-template.s > %t/merge-1.s # RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -# RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt +# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' > %t/raw-2-bytes.txt # RUN: sed "s/<RAW_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-template.s > %t/merge-2.s # RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o diff --git a/llvm/docs/CommandGuide/llvm-cgdata.rst b/llvm/docs/CommandGuide/llvm-cgdata.rst index f592e1508844..0670decd087e 100644 --- a/llvm/docs/CommandGuide/llvm-cgdata.rst +++ b/llvm/docs/CommandGuide/llvm-cgdata.rst @@ -11,15 +11,13 @@ SYNOPSIS DESCRIPTION ----------- -The :program:llvm-cgdata utility parses raw codegen data embedded -in compiled binary files and merges them into a single .cgdata file. -It can also inspect and manipulate .cgdata files. -Currently, the tool supports saving and restoring outlined hash trees, -enabling global function outlining across modules, allowing for more -efficient function outlining in subsequent compilations. -The design is extensible, allowing for the incorporation of additional -codegen summaries and optimization techniques, such as global function -merging, in the future. +The :program:llvm-cgdata utility parses raw codegen data embedded in compiled +binary files and merges them into a single .cgdata file. It can also inspect +and manipulate .cgdata files. Currently, the tool supports saving and restoring +outlined hash trees and stable function maps, allowing for more efficient +function outlining and function merging across modules in subsequent +compilations. The design is extensible, allowing for the incorporation of +additional codegen summaries and optimization techniques. COMMANDS -------- diff --git a/llvm/include/llvm/CGData/CodeGenData.h b/llvm/include/llvm/CGData/CodeGenData.h index 53550beeae1f..5d7c74725cce 100644 --- a/llvm/include/llvm/CGData/CodeGenData.h +++ b/llvm/include/llvm/CGData/CodeGenData.h @@ -19,6 +19,7 @@ #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CGData/OutlinedHashTree.h" #include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/StableFunctionMapRecord.h" #include "llvm/IR/Module.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Caching.h" @@ -41,7 +42,9 @@ enum class CGDataKind { Unknown = 0x0, // A function outlining info. FunctionOutlinedHashTree = 0x1, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionOutlinedHashTree) + // A function merging info. + StableFunctionMergingMap = 0x2, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap) }; const std::error_category &cgdata_category(); @@ -108,6 +111,8 @@ enum CGDataMode { class CodeGenData { /// Global outlined hash tree that has oulined hash sequences across modules. std::unique_ptr<OutlinedHashTree> PublishedHashTree; + /// Global stable function map that has stable function info across modules. + std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap; /// This flag is set when -fcodegen-data-generate is passed. /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds. @@ -131,6 +136,9 @@ public: bool hasOutlinedHashTree() { return PublishedHashTree && !PublishedHashTree->empty(); } + bool hasStableFunctionMap() { + return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty(); + } /// Returns the outlined hash tree. This can be globally used in a read-only /// manner. @@ -147,6 +155,12 @@ public: // Ensure we disable emitCGData as we do not want to read and write both. EmitCGData = false; } + void + publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) { + PublishedStableFunctionMap = std::move(FunctionMap); + // Ensure we disable emitCGData as we do not want to read and write both. + EmitCGData = false; + } }; namespace cgdata { @@ -166,6 +180,11 @@ publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) { CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree)); } +inline void +publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) { + CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap)); +} + struct StreamCacheData { /// Backing buffer for serialized data stream. SmallVector<SmallString<0>> Outputs; @@ -249,6 +268,8 @@ enum CGDataVersion { // Version 1 is the first version. This version supports the outlined // hash tree. Version1 = 1, + // Version 2 supports the stable function merging map. + Version2 = 2, CurrentVersion = CG_DATA_INDEX_VERSION }; const uint64_t Version = CGDataVersion::CurrentVersion; @@ -258,6 +279,7 @@ struct Header { uint32_t Version; uint32_t DataKind; uint64_t OutlinedHashTreeOffset; + uint64_t StableFunctionMapOffset; // New fields should only be added at the end to ensure that the size // computation is correct. The methods below need to be updated to ensure that diff --git a/llvm/include/llvm/CGData/CodeGenData.inc b/llvm/include/llvm/CGData/CodeGenData.inc index 08ec14ea051a..e0ae7a51024d 100644 --- a/llvm/include/llvm/CGData/CodeGenData.inc +++ b/llvm/include/llvm/CGData/CodeGenData.inc @@ -20,6 +20,8 @@ #define CG_DATA_DEFINED CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON), CG_DATA_OUTLINE_COFF, "__DATA,") +CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON), + CG_DATA_MERGE_COFF, "__DATA,") #undef CG_DATA_SECT_ENTRY #endif @@ -27,20 +29,24 @@ CG_DATA_SECT_ENTRY(CG_outline, CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON), /* section name strings common to all targets other than WIN32 */ #define CG_DATA_OUTLINE_COMMON __llvm_outline +#define CG_DATA_MERGE_COMMON __llvm_merge /* Since cg data sections are not allocated, we don't need to * access them at runtime. */ #define CG_DATA_OUTLINE_COFF ".loutline" +#define CG_DATA_MERGE_COFF ".lmerge" #ifdef _WIN32 /* Runtime section names and name strings. */ -#define CG_DATA_SECT_NAME CG_DATA_OUTLINE_COFF +#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_OUTLINE_COFF +#define CG_DATA_MERGE_SECT_NAME CG_DATA_MERGE_COFF #else /* Runtime section names and name strings. */ -#define CG_DATA_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON) +#define CG_DATA_OUTLINE_SECT_NAME CG_DATA_QUOTE(CG_DATA_OUTLINE_COMMON) +#define CG_DATA_MERGE_SECT_NAME CG_DATA_QUOTE(CG_DATA_MERGE_COMMON) #endif /* Indexed codegen data format version (start from 1). */ -#define CG_DATA_INDEX_VERSION 1 +#define CG_DATA_INDEX_VERSION 2 diff --git a/llvm/include/llvm/CGData/CodeGenDataReader.h b/llvm/include/llvm/CGData/CodeGenDataReader.h index 7e4882df2116..085dd6dd747c 100644 --- a/llvm/include/llvm/CGData/CodeGenDataReader.h +++ b/llvm/include/llvm/CGData/CodeGenDataReader.h @@ -15,6 +15,7 @@ #include "llvm/CGData/CodeGenData.h" #include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/StableFunctionMapRecord.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/VirtualFileSystem.h" @@ -36,10 +37,15 @@ public: virtual CGDataKind getDataKind() const = 0; /// Return true if the data has an outlined hash tree. virtual bool hasOutlinedHashTree() const = 0; + /// Return true if the data has a stable function map. + virtual bool hasStableFunctionMap() const = 0; /// Return the outlined hash tree that is released from the reader. std::unique_ptr<OutlinedHashTree> releaseOutlinedHashTree() { return std::move(HashTreeRecord.HashTree); } + std::unique_ptr<StableFunctionMap> releaseStableFunctionMap() { + return std::move(FunctionMapRecord.FunctionMap); + } /// Factory method to create an appropriately typed reader for the given /// codegen data file path and file system. @@ -56,15 +62,21 @@ public: /// is used by `llvm-cgdata --merge` or ThinLTO's two-codegen rounds. /// Optionally, \p CombinedHash can be used to compuate the combined hash of /// the merged data. - static Error mergeFromObjectFile(const object::ObjectFile *Obj, - OutlinedHashTreeRecord &GlobalOutlineRecord, - stable_hash *CombinedHash = nullptr); + static Error + mergeFromObjectFile(const object::ObjectFile *Obj, + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord, + stable_hash *CombinedHash = nullptr); protected: /// The outlined hash tree that has been read. When it's released by /// releaseOutlinedHashTree(), it's no longer valid. OutlinedHashTreeRecord HashTreeRecord; + /// The stable function map that has been read. When it's released by + // releaseStableFunctionMap(), it's no longer valid. + StableFunctionMapRecord FunctionMapRecord; + /// Set the current error and return same. Error error(cgdata_error Err, const std::string &ErrMsg = "") { LastError = Err; @@ -115,6 +127,11 @@ public: return Header.DataKind & static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); } + /// Return true if the header indicates the data has a stable function map. + bool hasStableFunctionMap() const override { + return Header.DataKind & + static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap); + } }; /// This format is a simple text format that's suitable for test data. @@ -150,6 +167,12 @@ public: return static_cast<uint32_t>(DataKind) & static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); } + /// Return true if the header indicates the data has a stable function map. + /// This does not mean that the data is still available. + bool hasStableFunctionMap() const override { + return static_cast<uint32_t>(DataKind) & + static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap); + } }; } // end namespace llvm diff --git a/llvm/include/llvm/CGData/CodeGenDataWriter.h b/llvm/include/llvm/CGData/CodeGenDataWriter.h index 5cb8377b1d07..1c4247608999 100644 --- a/llvm/include/llvm/CGData/CodeGenDataWriter.h +++ b/llvm/include/llvm/CGData/CodeGenDataWriter.h @@ -15,6 +15,7 @@ #include "llvm/CGData/CodeGenData.h" #include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/StableFunctionMapRecord.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" @@ -57,6 +58,9 @@ class CodeGenDataWriter { /// The outlined hash tree to be written. OutlinedHashTreeRecord HashTreeRecord; + /// The stable function map to be written. + StableFunctionMapRecord FunctionMapRecord; + /// A bit mask describing the kind of the codegen data. CGDataKind DataKind = CGDataKind::Unknown; @@ -64,9 +68,12 @@ public: CodeGenDataWriter() = default; ~CodeGenDataWriter() = default; - /// Add the outlined hash tree record. The input Record is released. + /// Add the outlined hash tree record. The input hash tree is released. void addRecord(OutlinedHashTreeRecord &Record); + /// Add the stable function map record. The input function map is released. + void addRecord(StableFunctionMapRecord &Record); + /// Write the codegen data to \c OS Error write(raw_fd_ostream &OS); @@ -81,11 +88,19 @@ public: return static_cast<uint32_t>(DataKind) & static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); } + /// Return true if the header indicates the data has a stable function map. + bool hasStableFunctionMap() const { + return static_cast<uint32_t>(DataKind) & + static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap); + } private: /// The offset of the outlined hash tree in the file. uint64_t OutlinedHashTreeOffset; + /// The offset of the stable function map in the file. + uint64_t StableFunctionMapOffset; + /// Write the codegen data header to \c COS Error writeHeader(CGDataOStream &COS); diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp index 2a3a74c8bc37..88dcdfd1f931 100644 --- a/llvm/lib/CGData/CodeGenData.cpp +++ b/llvm/lib/CGData/CodeGenData.cpp @@ -14,6 +14,7 @@ #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CGData/CodeGenDataReader.h" #include "llvm/CGData/OutlinedHashTreeRecord.h" +#include "llvm/CGData/StableFunctionMapRecord.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Caching.h" #include "llvm/Support/CommandLine.h" @@ -163,6 +164,8 @@ CodeGenData &CodeGenData::getInstance() { auto Reader = ReaderOrErr->get(); if (Reader->hasOutlinedHashTree()) Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree()); + if (Reader->hasStableFunctionMap()) + Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap()); } }); return *Instance; @@ -185,18 +188,14 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) { return make_error<CGDataError>(cgdata_error::unsupported_version); H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); - switch (H.Version) { - // When a new field is added to the header add a case statement here to - // compute the size as offset of the new field + size of the new field. This - // relies on the field being added to the end of the list. - static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1, - "Please update the size computation below if a new field has " - "been added to the header, if not add a case statement to " - "fall through to the latest version."); - case 1ull: - H.OutlinedHashTreeOffset = + static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2, + "Please update the offset computation below if a new field has " + "been added to the header."); + H.OutlinedHashTreeOffset = + endian::readNext<uint64_t, endianness::little, unaligned>(Curr); + if (H.Version >= 2) + H.StableFunctionMapOffset = endian::readNext<uint64_t, endianness::little, unaligned>(Curr); - } return H; } @@ -257,6 +256,7 @@ std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule, Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) { OutlinedHashTreeRecord GlobalOutlineRecord; + StableFunctionMapRecord GlobalStableFunctionMapRecord; stable_hash CombinedHash = 0; for (auto File : ObjFiles) { if (File.empty()) @@ -270,12 +270,18 @@ Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) { std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get(); if (auto E = CodeGenDataReader::mergeFromObjectFile( - Obj.get(), GlobalOutlineRecord, &CombinedHash)) + Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord, + &CombinedHash)) return E; } + GlobalStableFunctionMapRecord.finalize(); + if (!GlobalOutlineRecord.empty()) cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree)); + if (!GlobalStableFunctionMapRecord.empty()) + cgdata::publishStableFunctionMap( + std::move(GlobalStableFunctionMapRecord.FunctionMap)); return CombinedHash; } diff --git a/llvm/lib/CGData/CodeGenDataReader.cpp b/llvm/lib/CGData/CodeGenDataReader.cpp index 2f2481ea60f8..ebeb4ae36f99 100644 --- a/llvm/lib/CGData/CodeGenDataReader.cpp +++ b/llvm/lib/CGData/CodeGenDataReader.cpp @@ -32,10 +32,40 @@ setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { Error CodeGenDataReader::mergeFromObjectFile( const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord, stable_hash *CombinedHash) { Triple TT = Obj->makeTriple(); auto CGOutLineName = getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); + auto CGMergeName = + getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false); + + auto processSectionContents = [&](const StringRef &Name, + const StringRef &Contents) { + if (Name != CGOutLineName && Name != CGMergeName) + return; + if (CombinedHash) + *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents)); + auto *Data = reinterpret_cast<const unsigned char *>(Contents.data()); + auto *EndData = Data + Contents.size(); + // In case dealing with an executable that has concatenated cgdata, + // we want to merge them into a single cgdata. + // Although it's not a typical workflow, we support this scenario + // by looping over all data in the sections. + if (Name == CGOutLineName) { + while (Data != EndData) { + OutlinedHashTreeRecord LocalOutlineRecord; + LocalOutlineRecord.deserialize(Data); + GlobalOutlineRecord.merge(LocalOutlineRecord); + } + } else if (Name == CGMergeName) { + while (Data != EndData) { + StableFunctionMapRecord LocalFunctionMapRecord; + LocalFunctionMapRecord.deserialize(Data); + GlobalFunctionMapRecord.merge(LocalFunctionMapRecord); + } + } + }; for (auto &Section : Obj->sections()) { Expected<StringRef> NameOrErr = Section.getName(); @@ -44,23 +74,7 @@ Error CodeGenDataReader::mergeFromObjectFile( Expected<StringRef> ContentsOrErr = Section.getContents(); if (!ContentsOrErr) return ContentsOrErr.takeError(); - auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data()); - auto *EndData = Data + ContentsOrErr->size(); - - if (*NameOrErr == CGOutLineName) { - if (CombinedHash) - *CombinedHash = - stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr)); - // In case dealing with an executable that has concatenated cgdata, - // we want to merge them into a single cgdata. - // Although it's not a typical workflow, we support this scenario. - while (Data != EndData) { - OutlinedHashTreeRecord LocalOutlineRecord; - LocalOutlineRecord.deserialize(Data); - GlobalOutlineRecord.merge(LocalOutlineRecord); - } - } - // TODO: Add support for other cgdata sections. + processSectionContents(*NameOrErr, *ContentsOrErr); } return Error::success(); @@ -69,7 +83,8 @@ Error CodeGenDataReader::mergeFromObjectFile( Error IndexedCodeGenDataReader::read() { using namespace support; - // The smallest header with the version 1 is 24 bytes + // The smallest header with the version 1 is 24 bytes. + // Do not update this value even with the new version of the header. const unsigned MinHeaderSize = 24; if (DataBuffer->getBufferSize() < MinHeaderSize) return error(cgdata_error::bad_header); @@ -87,6 +102,12 @@ Error IndexedCodeGenDataReader::read() { return error(cgdata_error::eof); HashTreeRecord.deserialize(Ptr); } + if (hasStableFunctionMap()) { + const unsigned char *Ptr = Start + Header.StableFunctionMapOffset; + if (Ptr >= End) + return error(cgdata_error::eof); + FunctionMapRecord.deserialize(Ptr); + } return success(); } @@ -152,6 +173,8 @@ Error TextCodeGenDataReader::read() { StringRef Str = Line->drop_front().rtrim(); if (Str.equals_insensitive("outlined_hash_tree")) DataKind |= CGDataKind::FunctionOutlinedHashTree; + else if (Str.equals_insensitive("stable_function_map")) + DataKind |= CGDataKind::StableFunctionMergingMap; else return error(cgdata_error::bad_header); } @@ -170,8 +193,8 @@ Error TextCodeGenDataReader::read() { yaml::Input YOS(StringRef(Pos, Size)); if (hasOutlinedHashTree()) HashTreeRecord.deserializeYAML(YOS); - - // TODO: Add more yaml cgdata in order + if (hasStableFunctionMap()) + FunctionMapRecord.deserializeYAML(YOS); return Error::success(); } diff --git a/llvm/lib/CGData/CodeGenDataWriter.cpp b/llvm/lib/CGData/CodeGenDataWriter.cpp index 5f638be0fefe..3a392036198a 100644 --- a/llvm/lib/CGData/CodeGenDataWriter.cpp +++ b/llvm/lib/CGData/CodeGenDataWriter.cpp @@ -52,6 +52,13 @@ void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) { DataKind |= CGDataKind::FunctionOutlinedHashTree; } +void CodeGenDataWriter::addRecord(StableFunctionMapRecord &Record) { + assert(Record.FunctionMap && "empty function map in the record"); + FunctionMapRecord.FunctionMap = std::move(Record.FunctionMap); + + DataKind |= CGDataKind::StableFunctionMergingMap; +} + Error CodeGenDataWriter::write(raw_fd_ostream &OS) { CGDataOStream COS(OS); return writeImpl(COS); @@ -68,8 +75,11 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) { if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree)) Header.DataKind |= static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); - + if (static_cast<bool>(DataKind & CGDataKind::StableFunctionMergingMap)) + Header.DataKind |= + static_cast<uint32_t>(CGDataKind::StableFunctionMergingMap); Header.OutlinedHashTreeOffset = 0; + Header.StableFunctionMapOffset = 0; // Only write up to the CGDataKind. We need to remember the offset of the // remaining fields to allow back-patching later. @@ -83,6 +93,12 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) { // Reserve the space for OutlinedHashTreeOffset field. COS.write(0); + // Save the location of Header.StableFunctionMapOffset field in \c COS. + StableFunctionMapOffset = COS.tell(); + + // Reserve the space for StableFunctionMapOffset field. + COS.write(0); + return Error::success(); } @@ -93,10 +109,14 @@ Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) { uint64_t OutlinedHashTreeFieldStart = COS.tell(); if (hasOutlinedHashTree()) HashTreeRecord.serialize(COS.OS); + uint64_t StableFunctionMapFieldStart = COS.tell(); + if (hasStableFunctionMap()) + FunctionMapRecord.serialize(COS.OS); // Back patch the offsets. CGDataPatchItem PatchItems[] = { - {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}}; + {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}, + {StableFunctionMapOffset, &StableFunctionMapFieldStart, 1}}; COS.patch(PatchItems); return Error::success(); @@ -106,6 +126,9 @@ Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) { if (hasOutlinedHashTree()) OS << "# Outlined stable hash tree\n:outlined_hash_tree\n"; + if (hasStableFunctionMap()) + OS << "# Stable function map\n:stable_function_map\n"; + // TODO: Add more data types in this header return Error::success(); @@ -119,6 +142,9 @@ Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) { if (hasOutlinedHashTree()) HashTreeRecord.serializeYAML(YOS); + if (hasStableFunctionMap()) + FunctionMapRecord.serializeYAML(YOS); + // TODO: Write more yaml cgdata in order return Error::success(); diff --git a/llvm/test/tools/llvm-cgdata/empty.test b/llvm/test/tools/llvm-cgdata/empty.test index 70d5ea4b8006..bea78d512a6d 100644 --- a/llvm/test/tools/llvm-cgdata/empty.test +++ b/llvm/test/tools/llvm-cgdata/empty.test @@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0 # The version number appears when asked, as it's in the header RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION -VERSION: Version: 1 +VERSION: Version: 2 # When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header. RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0 @@ -27,9 +27,11 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0 # uint32_t Version; # uint32_t DataKind; # uint64_t OutlinedHashTreeOffset; +# uint64_t StableFunctionMapOffset; # } RUN: printf '\xffcgdata\x81' > %t_header.cgdata -RUN: printf '\x01\x00\x00\x00' >> %t_header.cgdata +RUN: printf '\x02\x00\x00\x00' >> %t_header.cgdata RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata -RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata RUN: diff %t_header.cgdata %t_emptyheader.cgdata diff --git a/llvm/test/tools/llvm-cgdata/error.test b/llvm/test/tools/llvm-cgdata/error.test index c992174505c1..2caa3aef4039 100644 --- a/llvm/test/tools/llvm-cgdata/error.test +++ b/llvm/test/tools/llvm-cgdata/error.test @@ -6,6 +6,7 @@ # uint32_t Version; # uint32_t DataKind; # uint64_t OutlinedHashTreeOffset; +# uint64_t StableFunctionMapOffset; # } RUN: touch %t_empty.cgdata RUN: not llvm-cgdata --show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY @@ -21,18 +22,20 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt) -# The current version 1 while the header says 2. +# The current version 2 while the header says 3. RUN: printf '\xffcgdata\x81' > %t_version.cgdata -RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata +RUN: printf '\x03\x00\x00\x00' >> %t_version.cgdata RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata -RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata RUN: not llvm-cgdata --show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix=BAD_VERSION BAD_VERSION: {{.}}cgdata: unsupported codegen data version # Header says an outlined hash tree, but the file ends after the header. RUN: printf '\xffcgdata\x81' > %t_eof.cgdata +RUN: printf '\x02\x00\x00\x00' >> %t_eof.cgdata RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata -RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata -RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata +RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata RUN: not llvm-cgdata --show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix=EOF EOF: {{.}}cgdata: end of File diff --git a/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test new file mode 100644 index 000000000000..b9bf067d3771 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-combined-funcmap-hashtree.test @@ -0,0 +1,66 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Test merge a single object file having both __llvm_outline and __llvm_merge into a cgdata. +# Effectively, this test combines merge-hashtree.test and merge-funcmap.test. + +RUN: split-file %s %t + +# Synthesize raw hashtree bytes without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-hashtree.cgtext -o %t/raw-hashtree.cgdata +RUN: od -t x1 -j 32 -An %t/raw-hashtree.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-hashtree-bytes.txt + +# Synthesize raw funcmap bytes without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-funcmap.cgtext -o %t/raw-funcmap.cgdata +RUN: od -t x1 -j 32 -An %t/raw-funcmap.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-funcmap-bytes.txt + +# Synthesize a bitcode file by creating two sections for the hash tree and the function map, respectively. +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-hashtree-bytes.txt)/g" %t/merge-both-template.ll > %t/merge-both-hashtree-template.ll +RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-funcmap-bytes.txt)/g" %t/merge-both-hashtree-template.ll > %t/merge-both-hashtree-funcmap.ll + +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcmap.ll -o %t/merge-both-hashtree-funcmap.o + +# Merge an object file having cgdata (__llvm_outline and __llvm_merge) +RUN: llvm-cgdata -m %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata +RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s + +CHECK: Outlined hash tree: +CHECK-NEXT: Total Node Count: 3 +CHECK-NEXT: Terminal Node Count: 1 +CHECK-NEXT: Depth: 2 +CHECK-NEXT: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 1 +CHECK-NEXT: Mergeable function Count: 0 + +;--- raw-hashtree.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 4 + SuccessorIds: [ ] +... + +;--- raw-funcmap.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-both-template.ll +@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline" +@.data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test new file mode 100644 index 000000000000..f643c8d92073 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test @@ -0,0 +1,83 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge an archive that has two object files having cgdata (__llvm_merge) + +RUN: split-file %s %t + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o + +# Make an archive from two object files +RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o + +# Merge the archive into the codegen data file. +RUN: llvm-cgdata --merge %t/merge-archive.a -o %t/merge-archive.cgdata +RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s + +RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s +CHECK: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 2 +CHECK-NEXT: Mergeable function Count: 2 + +RUN: llvm-cgdata --convert %t/merge-archive.cgdata| FileCheck %s --check-prefix=MAP +MAP: # Stable function map +MAP-NEXT: :stable_function_map +MAP-NEXT: --- +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func1 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 3 +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func2 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 4 +MAP-NEXT: ... + +;--- raw-1.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func2 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 4 +... + +;--- merge-1-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge" + +;--- raw-2.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-2-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test new file mode 100644 index 000000000000..c8acf1f3916e --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-concat.test @@ -0,0 +1,78 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge a binary file (e.g., a linked executable) having concatenated cgdata (__llvm_merge) + +RUN: split-file %s %t + +# Synthesize two sets of raw cgdata without the header (32 byte) from the indexed cgdata. +# Concatenate them in merge-concat.ll +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat-template.ll > %t/merge-concat-template-2.ll +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll + +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o +RUN: llvm-cgdata --merge %t/merge-concat.o -o %t/merge-concat.cgdata +RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s + +CHECK: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 2 +CHECK-NEXT: Mergeable function Count: 2 + +RUN: llvm-cgdata --convert %t/merge-concat.cgdata| FileCheck %s --check-prefix=MAP +MAP: # Stable function map +MAP-NEXT: :stable_function_map +MAP-NEXT: --- +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func1 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 3 +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func2 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 4 +MAP-NEXT: ... + +;--- raw-1.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func2 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 4 +... + +;--- raw-2.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-concat-template.ll + +; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated. +; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. +; In other words, the following two trees are encoded back-to-back in a binary format. +@.data1 = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge" +@.data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test new file mode 100644 index 000000000000..3ae67f062f82 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-double.test @@ -0,0 +1,79 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Merge two object files having cgdata (__llvm_merge) + +RUN: split-file %s %t + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o + +# Merge two object files into the codegen data file. +RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata + +RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s +CHECK: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 2 +CHECK-NEXT: Mergeable function Count: 2 + +RUN: llvm-cgdata --convert %t/merge.cgdata | FileCheck %s --check-prefix=MAP +MAP: # Stable function map +MAP-NEXT: :stable_function_map +MAP-NEXT: --- +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func1 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 3 +MAP-NEXT: - Hash: 1 +MAP-NEXT: FunctionName: Func2 +MAP-NEXT: ModuleName: Mod1 +MAP-NEXT: InstCount: 2 +MAP-NEXT: IndexOperandHashes: +MAP-NEXT: - InstIndex: 0 +MAP-NEXT: OpndIndex: 1 +MAP-NEXT: OpndHash: 4 +MAP-NEXT: ... + +;--- raw-1.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func2 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 4 +... + +;--- merge-1-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge" + +;--- raw-2.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-2-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test new file mode 100644 index 000000000000..6a4e635f6386 --- /dev/null +++ b/llvm/test/tools/llvm-cgdata/merge-funcmap-single.test @@ -0,0 +1,36 @@ +# REQUIRES: shell, aarch64-registered-target +# UNSUPPORTED: system-windows + +# Test merge a single object file into a cgdata + +RUN: split-file %s %t + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata +RUN: od -t x1 -j 32 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt + +RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-template.ll > %t/merge-single.ll +RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o + +# Merge an object file having cgdata (__llvm_merge) +RUN: llvm-cgdata -m %t/merge-single.o -o %t/merge-single.cgdata +RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s +CHECK: Stable function map: +CHECK-NEXT: Unique hash Count: 1 +CHECK-NEXT: Total function Count: 1 +CHECK-NEXT: Mergeable function Count: 0 + +;--- raw-single.cgtext +:stable_function_map +- Hash: 1 + FunctionName: Func1 + ModuleName: Mod1 + InstCount: 2 + IndexOperandHashes: + - InstIndex: 0 + OpndIndex: 1 + OpndHash: 3 +... + +;--- merge-single-template.ll +@.data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge" diff --git a/llvm/test/tools/llvm-cgdata/merge-archive.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test index 03eb9106b545..ee6345247c5b 100644 --- a/llvm/test/tools/llvm-cgdata/merge-archive.test +++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-archive.test @@ -5,15 +5,15 @@ RUN: split-file %s %t -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o diff --git a/llvm/test/tools/llvm-cgdata/merge-concat.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test index ac0e7a6e29e8..5a3ece05a3f9 100644 --- a/llvm/test/tools/llvm-cgdata/merge-concat.test +++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-concat.test @@ -5,13 +5,13 @@ RUN: split-file %s %t -# Synthesize two sets of raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize two sets of raw cgdata without the header (32 byte) from the indexed cgdata. # Concatenate them in merge-concat.ll RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat-template.ll > %t/merge-concat-template-2.ll RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-2.ll > %t/merge-concat.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o diff --git a/llvm/test/tools/llvm-cgdata/merge-double.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-double.test index 1ae806429101..044a8649cf4a 100644 --- a/llvm/test/tools/llvm-cgdata/merge-double.test +++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-double.test @@ -5,15 +5,15 @@ RUN: split-file %s %t -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata -RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-1-bytes.txt RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1-template.ll > %t/merge-1.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata -RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-2-bytes.txt RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2-template.ll > %t/merge-2.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o diff --git a/llvm/test/tools/llvm-cgdata/merge-single.test b/llvm/test/tools/llvm-cgdata/merge-hashtree-single.test index 47e3cb3f4f50..829c63f0f17a 100644 --- a/llvm/test/tools/llvm-cgdata/merge-single.test +++ b/llvm/test/tools/llvm-cgdata/merge-hashtree-single.test @@ -11,9 +11,9 @@ RUN: llvm-cgdata --merge %t/merge-empty.o --output %t/merge-empty.cgdata # No summary appear with the header only cgdata. RUN: llvm-cgdata --show %t/merge-empty.cgdata | count 0 -# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata. +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. RUN: llvm-cgdata --convert --format binary %t/raw-single.cgtext -o %t/raw-single.cgdata -RUN: od -t x1 -j 24 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt +RUN: od -t x1 -j 32 -An %t/raw-single.cgdata | tr -d '\n\r\t' | sed 's/[ ]*$//' | sed 's/[ ][ ]*/\\\\/g' > %t/raw-single-bytes.txt RUN: sed "s/<RAW_1_BYTES>/$(cat %t/raw-single-bytes.txt)/g" %t/merge-single-template.ll > %t/merge-single.ll RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merge-single.o diff --git a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp index 483f46626312..0931cad4bcb7 100644 --- a/llvm/tools/llvm-cgdata/llvm-cgdata.cpp +++ b/llvm/tools/llvm-cgdata/llvm-cgdata.cpp @@ -80,8 +80,6 @@ static CGDataAction Action; static std::optional<CGDataFormat> OutputFormat; static std::vector<std::string> InputFilenames; -// TODO: Add a doc, https://llvm.org/docs/CommandGuide/llvm-cgdata.html - static void exitWithError(Twine Message, std::string Whence = "", std::string Hint = "") { WithColor::error(); @@ -128,6 +126,10 @@ static int convert_main(int argc, const char *argv[]) { OutlinedHashTreeRecord Record(Reader->releaseOutlinedHashTree()); Writer.addRecord(Record); } + if (Reader->hasStableFunctionMap()) { + StableFunctionMapRecord Record(Reader->releaseStableFunctionMap()); + Writer.addRecord(Record); + } if (OutputFormat == CGDataFormat::Text) { if (Error E = Writer.writeText(OS)) @@ -141,10 +143,12 @@ static int convert_main(int argc, const char *argv[]) { } static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, - OutlinedHashTreeRecord &GlobalOutlineRecord); + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord); static bool handleArchive(StringRef Filename, Archive &Arch, - OutlinedHashTreeRecord &GlobalOutlineRecord) { + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord) { bool Result = true; Error Err = Error::success(); for (const auto &Child : Arch.children(Err)) { @@ -155,7 +159,8 @@ static bool handleArchive(StringRef Filename, Archive &Arch, if (Error E = NameOrErr.takeError()) exitWithError(std::move(E), Filename); std::string Name = (Filename + "(" + NameOrErr.get() + ")").str(); - Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord); + Result &= handleBuffer(Name, BuffOrErr.get(), GlobalOutlineRecord, + GlobalFunctionMapRecord); } if (Err) exitWithError(std::move(Err), Filename); @@ -163,7 +168,8 @@ static bool handleArchive(StringRef Filename, Archive &Arch, } static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, - OutlinedHashTreeRecord &GlobalOutlineRecord) { + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord) { Expected<std::unique_ptr<object::Binary>> BinOrErr = object::createBinary(Buffer); if (Error E = BinOrErr.takeError()) @@ -171,11 +177,12 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, bool Result = true; if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) { - if (Error E = - CodeGenDataReader::mergeFromObjectFile(Obj, GlobalOutlineRecord)) + if (Error E = CodeGenDataReader::mergeFromObjectFile( + Obj, GlobalOutlineRecord, GlobalFunctionMapRecord)) exitWithError(std::move(E), Filename); } else if (auto *Arch = dyn_cast<Archive>(BinOrErr->get())) { - Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord); + Result &= handleArchive(Filename, *Arch, GlobalOutlineRecord, + GlobalFunctionMapRecord); } else { // TODO: Support for the MachO universal binary format. errs() << "Error: unsupported binary file: " << Filename << "\n"; @@ -186,26 +193,34 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer, } static bool handleFile(StringRef Filename, - OutlinedHashTreeRecord &GlobalOutlineRecord) { + OutlinedHashTreeRecord &GlobalOutlineRecord, + StableFunctionMapRecord &GlobalFunctionMapRecord) { ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = BuffOrErr.getError()) exitWithErrorCode(EC, Filename); - return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord); + return handleBuffer(Filename, *BuffOrErr.get(), GlobalOutlineRecord, + GlobalFunctionMapRecord); } static int merge_main(int argc, const char *argv[]) { bool Result = true; OutlinedHashTreeRecord GlobalOutlineRecord; + StableFunctionMapRecord GlobalFunctionMapRecord; for (auto &Filename : InputFilenames) - Result &= handleFile(Filename, GlobalOutlineRecord); + Result &= + handleFile(Filename, GlobalOutlineRecord, GlobalFunctionMapRecord); if (!Result) exitWithError("failed to merge codegen data files."); + GlobalFunctionMapRecord.finalize(); + CodeGenDataWriter Writer; if (!GlobalOutlineRecord.empty()) Writer.addRecord(GlobalOutlineRecord); + if (!GlobalFunctionMapRecord.empty()) + Writer.addRecord(GlobalFunctionMapRecord); std::error_code EC; raw_fd_ostream OS(OutputFilename, EC, @@ -249,6 +264,15 @@ static int show_main(int argc, const char *argv[]) { << "\n"; OS << " Depth: " << Tree->depth() << "\n"; } + if (Reader->hasStableFunctionMap()) { + auto Map = Reader->releaseStableFunctionMap(); + OS << "Stable function map:\n"; + OS << " Unique hash Count: " << Map->size() << "\n"; + OS << " Total function Count: " + << Map->size(StableFunctionMap::TotalFunctionCount) << "\n"; + OS << " Mergeable function Count: " + << Map->size(StableFunctionMap::MergeableFunctionCount) << "\n"; + } return 0; } |
