diff options
author | Fangrui Song <i@maskray.me> | 2024-05-01 11:40:46 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-01 11:40:46 -0700 |
commit | 6d44a1ef55b559e59d725b07ffe1da988b4e5f1c (patch) | |
tree | 790791b46d1f180f30665e432a174ff233c8f475 /lld/ELF | |
parent | 91fef0013f2668d1dc0623ede21cf4048d9a733e (diff) | |
download | llvm-6d44a1ef55b559e59d725b07ffe1da988b4e5f1c.zip llvm-6d44a1ef55b559e59d725b07ffe1da988b4e5f1c.tar.gz llvm-6d44a1ef55b559e59d725b07ffe1da988b4e5f1c.tar.bz2 |
[ELF] Adjust --compress-sections to support compression level
zstd excels at scaling from low-ratio-very-fast to
high-ratio-pretty-slow. Some users prioritize speed and prefer disk read
speed, while others focus on achieving the highest compression ratio
possible, similar to traditional high-ratio codecs like LZMA.
Add an optional `level` to `--compress-sections` (#84855) to cater to
these diverse needs. While we initially aimed for a one-size-fits-all
approach, this no longer seems to work.
(https://richg42.blogspot.com/2015/11/the-lossless-decompression-pareto.html)
When --compress-debug-sections is used together, make
--compress-sections take precedence since --compress-sections is usually
more specific.
Remove the level distinction between -O/-O1 and -O2 for
--compress-debug-sections=zlib for a more consistent user experience.
Pull Request: https://github.com/llvm/llvm-project/pull/90567
Diffstat (limited to 'lld/ELF')
-rw-r--r-- | lld/ELF/Config.h | 3 | ||||
-rw-r--r-- | lld/ELF/Driver.cpp | 12 | ||||
-rw-r--r-- | lld/ELF/Options.td | 5 | ||||
-rw-r--r-- | lld/ELF/OutputSections.cpp | 22 |
4 files changed, 26 insertions, 16 deletions
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 33bfa42..c55b547 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -224,7 +224,8 @@ struct Config { bool checkSections; bool checkDynamicRelocs; std::optional<llvm::DebugCompressionType> compressDebugSections; - llvm::SmallVector<std::pair<llvm::GlobPattern, llvm::DebugCompressionType>, 0> + llvm::SmallVector< + std::tuple<llvm::GlobPattern, llvm::DebugCompressionType, unsigned>, 0> compressSections; bool cref; llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0> diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index a5b47f0..b29e1e1 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1533,9 +1533,17 @@ static void readConfigs(opt::InputArgList &args) { ": parse error, not 'section-glob=[none|zlib|zstd]'"); continue; } - auto type = getCompressionType(fields[1], arg->getSpelling()); + auto [typeStr, levelStr] = fields[1].split(':'); + auto type = getCompressionType(typeStr, arg->getSpelling()); + unsigned level = 0; + if (fields[1].size() != typeStr.size() && + !llvm::to_integer(levelStr, level)) { + error(arg->getSpelling() + + ": expected a non-negative integer compression level, but got '" + + levelStr + "'"); + } if (Expected<GlobPattern> pat = GlobPattern::create(fields[0])) { - config->compressSections.emplace_back(std::move(*pat), type); + config->compressSections.emplace_back(std::move(*pat), type, level); } else { error(arg->getSpelling() + ": " + toString(pat.takeError())); continue; diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 72eaf15..73a4f96 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -68,8 +68,9 @@ defm compress_debug_sections: MetaVarName<"[none,zlib,zstd]">; defm compress_sections: EEq<"compress-sections", - "Compress non-SHF_ALLOC output sections matching <section-glob>">, - MetaVarName<"<section-glob>=[none|zlib|zstd]">; + "Compress output sections that match the glob and do not have the SHF_ALLOC flag." + "The compression level is <level> (if specified) or a default speed-focused level">, + MetaVarName<"<section-glob>={none,zlib,zstd}[:level]">; defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 1b09e5b..2dbbff0 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -339,12 +339,13 @@ template <class ELFT> void OutputSection::maybeCompress() { (void)sizeof(Elf_Chdr); DebugCompressionType ctype = DebugCompressionType::None; - for (auto &[glob, t] : config->compressSections) - if (glob.match(name)) - ctype = t; + unsigned level = 0; // default compression level if (!(flags & SHF_ALLOC) && config->compressDebugSections && name.starts_with(".debug_") && size) ctype = *config->compressDebugSections; + for (auto &[glob, t, l] : config->compressSections) + if (glob.match(name)) + std::tie(ctype, level) = {t, l}; if (ctype == DebugCompressionType::None) return; if (flags & SHF_ALLOC) { @@ -376,13 +377,14 @@ template <class ELFT> void OutputSection::maybeCompress() { auto shardsOut = std::make_unique<SmallVector<uint8_t, 0>[]>(numShards); #if LLVM_ENABLE_ZSTD - // Use ZSTD's streaming compression API which permits parallel workers working - // on the stream. See http://facebook.github.io/zstd/zstd_manual.html - // "Streaming compression - HowTo". + // Use ZSTD's streaming compression API. See + // http://facebook.github.io/zstd/zstd_manual.html "Streaming compression - + // HowTo". if (ctype == DebugCompressionType::Zstd) { parallelFor(0, numShards, [&](size_t i) { SmallVector<uint8_t, 0> out; ZSTD_CCtx *cctx = ZSTD_createCCtx(); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level); ZSTD_inBuffer zib = {shardsIn[i].data(), shardsIn[i].size(), 0}; ZSTD_outBuffer zob = {nullptr, 0, 0}; size_t size; @@ -410,12 +412,10 @@ template <class ELFT> void OutputSection::maybeCompress() { #if LLVM_ENABLE_ZLIB // We chose 1 (Z_BEST_SPEED) as the default compression level because it is - // the fastest. If -O2 is given, we use level 6 to compress debug info more by - // ~15%. We found that level 7 to 9 doesn't make much difference (~1% more - // compression) while they take significant amount of time (~2x), so level 6 - // seems enough. + // fast and provides decent compression ratios. if (ctype == DebugCompressionType::Zlib) { - const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED; + if (!level) + level = Z_BEST_SPEED; // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all // shards but the last to flush the output to a byte boundary to be |