aboutsummaryrefslogtreecommitdiff
path: root/lld
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2022-07-15 10:08:08 -0700
committerFangrui Song <i@maskray.me>2022-07-15 10:08:08 -0700
commitf77b77e8dbeb26764349a71b35f21b701adeee88 (patch)
treefc5f9c4c5d0921ca5612bd063f9daf2323774a74 /lld
parentbc08c3cb7f8e797fee14e96eedd3dc358608ada3 (diff)
downloadllvm-f77b77e8dbeb26764349a71b35f21b701adeee88.zip
llvm-f77b77e8dbeb26764349a71b35f21b701adeee88.tar.gz
llvm-f77b77e8dbeb26764349a71b35f21b701adeee88.tar.bz2
[ELF][RISCV] Relax local-exec TLS model
In -mrelax mode, GCC/Clang may generate a local-exec TLS code sequence like: ``` # R_RISCV_TPREL_HI20, R_RISCV_RELAX lui rd, %tprel_hi(x) # R_RISCV_TPREL_ADD, R_RISCV_RELAX add rd, rd, tp, %tprel_add(x) # (R_RISCV_TPREL_LO12_I || R_RISCV_TPREL_LO12_S), R_RISCV_RELAX addi rd, rd, %tprel_lo(x) || sw rs, %tprel(x)(rd) ``` Note: st_value(x) for TLS should be in the range [0,p_memsz(PT_TLS)). When st_value(x) < 2048 (i.e. hi20(x) == 0), the linker can relax the code sequence to: ``` addi rd, tp, st_value(x) || sw rs, st_value(x)(rd) ``` Differential Revision: https://reviews.llvm.org/D129425
Diffstat (limited to 'lld')
-rw-r--r--lld/ELF/Arch/RISCV.cpp82
-rw-r--r--lld/test/ELF/riscv-tls-le.s81
2 files changed, 129 insertions, 34 deletions
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index c09bb2e..8fca1a6 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -57,6 +57,7 @@ enum Op {
enum Reg {
X_RA = 1,
+ X_TP = 4,
X_T0 = 5,
X_T1 = 6,
X_T2 = 7,
@@ -76,6 +77,19 @@ static uint32_t utype(uint32_t op, uint32_t rd, uint32_t imm) {
return op | (rd << 7) | (imm << 12);
}
+// Extract bits v[begin:end], where range is inclusive, and begin must be < 63.
+static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
+ return (v & ((1ULL << (begin + 1)) - 1)) >> end;
+}
+
+static uint32_t setLO12_I(uint32_t insn, uint32_t imm) {
+ return (insn & 0xfffff) | (imm << 20);
+}
+static uint32_t setLO12_S(uint32_t insn, uint32_t imm) {
+ return (insn & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) |
+ (extractBits(imm, 4, 0) << 7);
+}
+
RISCV::RISCV() {
copyRel = R_RISCV_COPY;
pltRel = R_RISCV_JUMP_SLOT;
@@ -270,10 +284,9 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
case R_RISCV_TPREL_LO12_I:
case R_RISCV_TPREL_LO12_S:
return R_TPREL;
- case R_RISCV_TPREL_ADD:
- return R_NONE;
case R_RISCV_ALIGN:
return R_RELAX_HINT;
+ case R_RISCV_TPREL_ADD:
case R_RISCV_RELAX:
return config->relax ? R_RELAX_HINT : R_NONE;
default:
@@ -283,11 +296,6 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
}
}
-// Extract bits V[Begin:End], where range is inclusive, and Begin must be < 63.
-static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
- return (v & ((1ULL << (begin + 1)) - 1)) >> end;
-}
-
void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
const unsigned bits = config->wordsize * 8;
@@ -404,7 +412,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_RISCV_LO12_I: {
uint64_t hi = (val + 0x800) >> 12;
uint64_t lo = val - (hi << 12);
- write32le(loc, (read32le(loc) & 0xFFFFF) | ((lo & 0xFFF) << 20));
+ write32le(loc, setLO12_I(read32le(loc), lo & 0xfff));
return;
}
@@ -413,9 +421,7 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_RISCV_LO12_S: {
uint64_t hi = (val + 0x800) >> 12;
uint64_t lo = val - (hi << 12);
- uint32_t imm11_5 = extractBits(lo, 11, 5) << 25;
- uint32_t imm4_0 = extractBits(lo, 4, 0) << 7;
- write32le(loc, (read32le(loc) & 0x1FFF07F) | imm11_5 | imm4_0);
+ write32le(loc, setLO12_S(read32le(loc), lo));
return;
}
@@ -567,6 +573,35 @@ static void relaxCall(const InputSection &sec, size_t i, uint64_t loc,
}
}
+// Relax local-exec TLS when hi20 is zero.
+static void relaxTlsLe(const InputSection &sec, size_t i, uint64_t loc,
+ Relocation &r, uint32_t &remove) {
+ uint64_t val = r.sym->getVA(r.addend);
+ if (hi20(val) != 0)
+ return;
+ uint32_t insn = read32le(sec.rawData.data() + r.offset);
+ switch (r.type) {
+ case R_RISCV_TPREL_HI20:
+ case R_RISCV_TPREL_ADD:
+ // Remove lui rd, %tprel_hi(x) and add rd, rd, tp, %tprel_add(x).
+ sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
+ remove = 4;
+ break;
+ case R_RISCV_TPREL_LO12_I:
+ // addi rd, rd, %tprel_lo(x) => addi rd, tp, st_value(x)
+ sec.relaxAux->relocTypes[i] = R_RISCV_32;
+ insn = (insn & ~(31 << 15)) | (X_TP << 15);
+ sec.relaxAux->writes.push_back(setLO12_I(insn, val));
+ break;
+ case R_RISCV_TPREL_LO12_S:
+ // sw rs, %tprel_lo(x)(rd) => sw rs, st_value(x)(rd)
+ sec.relaxAux->relocTypes[i] = R_RISCV_32;
+ insn = (insn & ~(31 << 15)) | (X_TP << 15);
+ sec.relaxAux->writes.push_back(setLO12_S(insn, val));
+ break;
+ }
+}
+
static bool relax(InputSection &sec) {
const uint64_t secAddr = sec.getVA();
auto &aux = *sec.relaxAux;
@@ -612,6 +647,14 @@ static bool relax(InputSection &sec) {
sec.relocations[i + 1].type == R_RISCV_RELAX)
relaxCall(sec, i, loc, r, remove);
break;
+ case R_RISCV_TPREL_HI20:
+ case R_RISCV_TPREL_ADD:
+ case R_RISCV_TPREL_LO12_I:
+ case R_RISCV_TPREL_LO12_S:
+ if (i + 1 != sec.relocations.size() &&
+ sec.relocations[i + 1].type == R_RISCV_RELAX)
+ relaxTlsLe(sec, i, loc, r, remove);
+ break;
}
// For all anchors whose offsets are <= r.offset, they are preceded by
@@ -697,7 +740,7 @@ void elf::riscvFinalizeRelax(int passes) {
for (size_t i = 0, e = rels.size(); i != e; ++i) {
uint32_t remove = aux.relocDeltas[i] - delta;
delta = aux.relocDeltas[i];
- if (remove == 0)
+ if (remove == 0 && aux.relocTypes[i] == R_RISCV_NONE)
continue;
// Copy from last location to the current relocated location.
@@ -723,15 +766,24 @@ void elf::riscvFinalizeRelax(int passes) {
}
}
} else if (RelType newType = aux.relocTypes[i]) {
- const uint32_t insn = aux.writes[writesIdx++];
switch (newType) {
+ case R_RISCV_RELAX:
+ // Used by relaxTlsLe to indicate the relocation is ignored.
+ break;
case R_RISCV_RVC_JUMP:
skip = 2;
- write16le(p, insn);
+ write16le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_JAL:
skip = 4;
- write32le(p, insn);
+ write32le(p, aux.writes[writesIdx++]);
+ break;
+ case R_RISCV_32:
+ // Used by relaxTlsLe to write a uint32_t then suppress the handling
+ // in relocateAlloc.
+ skip = 4;
+ write32le(p, aux.writes[writesIdx++]);
+ aux.relocTypes[i] = R_RISCV_NONE;
break;
default:
llvm_unreachable("unsupported type");
diff --git a/lld/test/ELF/riscv-tls-le.s b/lld/test/ELF/riscv-tls-le.s
index 96a10e9..752c126 100644
--- a/lld/test/ELF/riscv-tls-le.s
+++ b/lld/test/ELF/riscv-tls-le.s
@@ -1,48 +1,91 @@
# REQUIRES: riscv
+## Additionally test that (a) -no-pie/-pie have the same behavior
+## (b) --no-relax/--relax have the same behavior when R_RISCV_RELAX is suppressed.
# RUN: llvm-mc -filetype=obj -triple=riscv32 %s -o %t.32.o
-# RUN: ld.lld %t.32.o -o %t.32
+# RUN: ld.lld --relax %t.32.o -o %t.32
# RUN: llvm-nm -p %t.32 | FileCheck --check-prefixes=NM %s
# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s
-# RUN: ld.lld -pie %t.32.o -o %t.32
+# RUN: ld.lld -pie --no-relax %t.32.o -o %t.32
# RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefixes=LE %s
-# RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t.64.o
-# RUN: ld.lld %t.64.o -o %t.64
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax %s -o %t.64.o
+# RUN: ld.lld --no-relax %t.64.o -o %t.64
# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s
-# RUN: ld.lld -pie %t.64.o -o %t.64
+# RUN: ld.lld -pie --no-relax %t.64.o -o %t.64
# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=LE %s
+# RUN: ld.lld %t.64.o -o %t.64.relax
+# RUN: llvm-objdump -d --no-show-raw-insn %t.64.relax | FileCheck --check-prefixes=LE-RELAX %s
# RUN: not ld.lld -shared %t.32.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error:
# ERR: error: relocation R_RISCV_TPREL_HI20 against .LANCHOR0 cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_LO12_I against .LANCHOR0 cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
+# ERR: error: relocation R_RISCV_TPREL_LO12_I against a cannot be used with -shared
+# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
+# ERR: error: relocation R_RISCV_TPREL_LO12_S against a cannot be used with -shared
+# ERR: error: relocation R_RISCV_TPREL_HI20 against a cannot be used with -shared
# ERR: error: relocation R_RISCV_TPREL_LO12_S against a cannot be used with -shared
# NM: {{0*}}00000008 b .LANCHOR0
-# NM: {{0*}}0000000c B a
+# NM: {{0*}}00000800 B a
## .LANCHOR0@tprel = 8
## a@tprel = 12
-# LE: lui a5, 0
-# LE-NEXT: add a5, a5, tp
-# LE-NEXT: addi a5, a5, 8
-# LE-NEXT: lui a5, 0
-# LE-NEXT: add a5, a5, tp
-# LE-NEXT: sw a0, 12(a5)
+# LE: lui a1, 0
+# LE-NEXT: add a1, a1, tp
+# LE-NEXT: addi a1, a1, 8
+# LE-NEXT: lui a2, 0
+# LE-NEXT: add a2, a2, tp
+# LE-NEXT: addi a2, a2, 2044
+# LE-NEXT: lui a3, 0
+# LE-NEXT: addi a0, a0, 1
+# LE-NEXT: add a3, a3, tp
+# LE-NEXT: addi a0, a0, 2
+# LE-NEXT: sw a0, 2044(a3)
+# LE-NEXT: lui a4, 1
+# LE-NEXT: add a4, a4, tp
+# LE-NEXT: sw a0, -2048(a4)
+# LE-EMPTY:
+
+# LE-RELAX: <.text>:
+# LE-RELAX-NEXT: addi a1, tp, 8
+# LE-RELAX-NEXT: addi a2, tp, 2044
+# LE-RELAX-NEXT: addi a0, a0, 1
+# LE-RELAX-NEXT: addi a0, a0, 2
+# LE-RELAX-NEXT: sw a0, 2044(tp)
+# LE-RELAX-NEXT: lui a4, 1
+# LE-RELAX-NEXT: add a4, a4, tp
+# LE-RELAX-NEXT: sw a0, -2048(a4)
+# LE-RELAX-EMPTY:
-lui a5, %tprel_hi(.LANCHOR0)
-add a5, a5, tp, %tprel_add(.LANCHOR0)
-addi a5, a5, %tprel_lo(.LANCHOR0)
+lui a1, %tprel_hi(.LANCHOR0)
+add a1, a1, tp, %tprel_add(.LANCHOR0)
+addi a1, a1, %tprel_lo(.LANCHOR0)
-lui a5, %tprel_hi(a)
-add a5, a5, tp, %tprel_add(a)
-sw a0, %tprel_lo(a)(a5)
+## hi20(a-4) = hi20(0x7fc) = 0. relaxable
+lui a2, %tprel_hi(a-4)
+add a2, a2, tp, %tprel_add(a-4)
+addi a2, a2, %tprel_lo(a-4)
+
+## hi20(a-4) = hi20(0x7fc) = 0. relaxable
+## Test non-adjacent instructions.
+lui a3, %tprel_hi(a-4)
+addi a0, a0, 1
+add a3, a3, tp, %tprel_add(a-4)
+addi a0, a0, 2
+sw a0, %tprel_lo(a-4)(a3)
+
+## hi20(a) = hi20(0x800) = 1. not relaxable
+lui a4, %tprel_hi(a)
+add a4, a4, tp, %tprel_add(a)
+sw a0, %tprel_lo(a)(a4)
.section .tbss
.space 8
.LANCHOR0:
-.zero 4
+.space 0x800-8
.globl a
a:
+.zero 4