aboutsummaryrefslogtreecommitdiff
path: root/lld/ELF
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2024-05-01 11:40:46 -0700
committerGitHub <noreply@github.com>2024-05-01 11:40:46 -0700
commit6d44a1ef55b559e59d725b07ffe1da988b4e5f1c (patch)
tree790791b46d1f180f30665e432a174ff233c8f475 /lld/ELF
parent91fef0013f2668d1dc0623ede21cf4048d9a733e (diff)
downloadllvm-6d44a1ef55b559e59d725b07ffe1da988b4e5f1c.zip
llvm-6d44a1ef55b559e59d725b07ffe1da988b4e5f1c.tar.gz
llvm-6d44a1ef55b559e59d725b07ffe1da988b4e5f1c.tar.bz2
[ELF] Adjust --compress-sections to support compression level
zstd excels at scaling from low-ratio-very-fast to high-ratio-pretty-slow. Some users prioritize speed and prefer disk read speed, while others focus on achieving the highest compression ratio possible, similar to traditional high-ratio codecs like LZMA. Add an optional `level` to `--compress-sections` (#84855) to cater to these diverse needs. While we initially aimed for a one-size-fits-all approach, this no longer seems to work. (https://richg42.blogspot.com/2015/11/the-lossless-decompression-pareto.html) When --compress-debug-sections is used together, make --compress-sections take precedence since --compress-sections is usually more specific. Remove the level distinction between -O/-O1 and -O2 for --compress-debug-sections=zlib for a more consistent user experience. Pull Request: https://github.com/llvm/llvm-project/pull/90567
Diffstat (limited to 'lld/ELF')
-rw-r--r--lld/ELF/Config.h3
-rw-r--r--lld/ELF/Driver.cpp12
-rw-r--r--lld/ELF/Options.td5
-rw-r--r--lld/ELF/OutputSections.cpp22
4 files changed, 26 insertions, 16 deletions
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 33bfa42..c55b547 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -224,7 +224,8 @@ struct Config {
bool checkSections;
bool checkDynamicRelocs;
std::optional<llvm::DebugCompressionType> compressDebugSections;
- llvm::SmallVector<std::pair<llvm::GlobPattern, llvm::DebugCompressionType>, 0>
+ llvm::SmallVector<
+ std::tuple<llvm::GlobPattern, llvm::DebugCompressionType, unsigned>, 0>
compressSections;
bool cref;
llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0>
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index a5b47f0..b29e1e1 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1533,9 +1533,17 @@ static void readConfigs(opt::InputArgList &args) {
": parse error, not 'section-glob=[none|zlib|zstd]'");
continue;
}
- auto type = getCompressionType(fields[1], arg->getSpelling());
+ auto [typeStr, levelStr] = fields[1].split(':');
+ auto type = getCompressionType(typeStr, arg->getSpelling());
+ unsigned level = 0;
+ if (fields[1].size() != typeStr.size() &&
+ !llvm::to_integer(levelStr, level)) {
+ error(arg->getSpelling() +
+ ": expected a non-negative integer compression level, but got '" +
+ levelStr + "'");
+ }
if (Expected<GlobPattern> pat = GlobPattern::create(fields[0])) {
- config->compressSections.emplace_back(std::move(*pat), type);
+ config->compressSections.emplace_back(std::move(*pat), type, level);
} else {
error(arg->getSpelling() + ": " + toString(pat.takeError()));
continue;
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 72eaf15..73a4f96 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -68,8 +68,9 @@ defm compress_debug_sections:
MetaVarName<"[none,zlib,zstd]">;
defm compress_sections: EEq<"compress-sections",
- "Compress non-SHF_ALLOC output sections matching <section-glob>">,
- MetaVarName<"<section-glob>=[none|zlib|zstd]">;
+ "Compress output sections that match the glob and do not have the SHF_ALLOC flag."
+ "The compression level is <level> (if specified) or a default speed-focused level">,
+ MetaVarName<"<section-glob>={none,zlib,zstd}[:level]">;
defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 1b09e5b..2dbbff0 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -339,12 +339,13 @@ template <class ELFT> void OutputSection::maybeCompress() {
(void)sizeof(Elf_Chdr);
DebugCompressionType ctype = DebugCompressionType::None;
- for (auto &[glob, t] : config->compressSections)
- if (glob.match(name))
- ctype = t;
+ unsigned level = 0; // default compression level
if (!(flags & SHF_ALLOC) && config->compressDebugSections &&
name.starts_with(".debug_") && size)
ctype = *config->compressDebugSections;
+ for (auto &[glob, t, l] : config->compressSections)
+ if (glob.match(name))
+ std::tie(ctype, level) = {t, l};
if (ctype == DebugCompressionType::None)
return;
if (flags & SHF_ALLOC) {
@@ -376,13 +377,14 @@ template <class ELFT> void OutputSection::maybeCompress() {
auto shardsOut = std::make_unique<SmallVector<uint8_t, 0>[]>(numShards);
#if LLVM_ENABLE_ZSTD
- // Use ZSTD's streaming compression API which permits parallel workers working
- // on the stream. See http://facebook.github.io/zstd/zstd_manual.html
- // "Streaming compression - HowTo".
+ // Use ZSTD's streaming compression API. See
+ // http://facebook.github.io/zstd/zstd_manual.html "Streaming compression -
+ // HowTo".
if (ctype == DebugCompressionType::Zstd) {
parallelFor(0, numShards, [&](size_t i) {
SmallVector<uint8_t, 0> out;
ZSTD_CCtx *cctx = ZSTD_createCCtx();
+ ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);
ZSTD_inBuffer zib = {shardsIn[i].data(), shardsIn[i].size(), 0};
ZSTD_outBuffer zob = {nullptr, 0, 0};
size_t size;
@@ -410,12 +412,10 @@ template <class ELFT> void OutputSection::maybeCompress() {
#if LLVM_ENABLE_ZLIB
// We chose 1 (Z_BEST_SPEED) as the default compression level because it is
- // the fastest. If -O2 is given, we use level 6 to compress debug info more by
- // ~15%. We found that level 7 to 9 doesn't make much difference (~1% more
- // compression) while they take significant amount of time (~2x), so level 6
- // seems enough.
+ // fast and provides decent compression ratios.
if (ctype == DebugCompressionType::Zlib) {
- const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED;
+ if (!level)
+ level = Z_BEST_SPEED;
// Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all
// shards but the last to flush the output to a byte boundary to be