aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorDaniel Thornburgh <dthorn@google.com>2023-05-17 10:57:54 -0700
committerDaniel Thornburgh <dthorn@google.com>2023-09-19 14:14:27 -0700
commitc649f29c24c9fc1502d8d53e0c96c3d24b31de1a (patch)
treeaf683d6eb12a8ff58f0b79cdcb0bab7932c847a4 /llvm
parent86b32c4b55b51dec1abb15680e8566f36fb7dbd9 (diff)
downloadllvm-c649f29c24c9fc1502d8d53e0c96c3d24b31de1a.zip
llvm-c649f29c24c9fc1502d8d53e0c96c3d24b31de1a.tar.gz
llvm-c649f29c24c9fc1502d8d53e0c96c3d24b31de1a.tar.bz2
[llvm-nm] Add --line-numbers flag
This parallels the binutils/BSD flag of the same name. Debugging information is loaded to print line number information for symbols. Defined symbols are symbolized by their section addresses, and undefined symbols by their first text reloc with line info. Differential Revision: https://reviews.llvm.org/D150987
Diffstat (limited to 'llvm')
-rw-r--r--llvm/docs/CommandGuide/llvm-nm.rst6
-rw-r--r--llvm/docs/ReleaseNotes.rst3
-rw-r--r--llvm/test/tools/llvm-nm/X86/line-numbers.test240
-rw-r--r--llvm/tools/llvm-nm/CMakeLists.txt1
-rw-r--r--llvm/tools/llvm-nm/Opts.td2
-rw-r--r--llvm/tools/llvm-nm/llvm-nm.cpp92
6 files changed, 339 insertions, 5 deletions
diff --git a/llvm/docs/CommandGuide/llvm-nm.rst b/llvm/docs/CommandGuide/llvm-nm.rst
index 4b1290a..7067bb0 100644
--- a/llvm/docs/CommandGuide/llvm-nm.rst
+++ b/llvm/docs/CommandGuide/llvm-nm.rst
@@ -190,6 +190,12 @@ OPTIONS
Print just the symbol names. Alias for `--format=just-symbols``.
+.. option:: --line-numbers, -l
+
+ Use debugging information to print the filenames and line numbers where
+ symbols are defined. Undefined symbols have the location of their first
+ relocation printed instead.
+
.. option:: -m
Use Darwin format. Alias for ``--format=darwin``.
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 352420e..660bb4e 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -177,6 +177,9 @@ Changes to the LLVM tools
* llvm-readelf now supports ``--extra-sym-info`` (``-X``) to display extra
information (section name) when showing symbols.
+* ``llvm-nm`` now supports the ``--line-numbers`` (``-l``) option to use
+ debugging information to print symbols' filenames and line numbers.
+
Changes to LLDB
---------------------------------
diff --git a/llvm/test/tools/llvm-nm/X86/line-numbers.test b/llvm/test/tools/llvm-nm/X86/line-numbers.test
new file mode 100644
index 0000000..4b9817a
--- /dev/null
+++ b/llvm/test/tools/llvm-nm/X86/line-numbers.test
@@ -0,0 +1,240 @@
+## Check that printing line numbers isn't attempted for files like bitcode,
+## which have symbols but limited/no section or debug info.
+# RUN: llvm-nm --line-numbers %p/Inputs/test.IRobj-x86_64 | FileCheck %s --check-prefix=BITCODE --match-full-lines --implicit-check-not={{.}}
+# BITCODE: ---------------- S _global_const
+# BITCODE-NEXT: ---------------- D _global_data
+# BITCODE-NEXT: ---------------- T _global_func
+# BITCODE-NEXT: ---------------- S _hidden_const
+# BITCODE-NEXT: ---------------- D _hidden_data
+# BITCODE-NEXT: ---------------- T _hidden_func
+# BITCODE-NEXT: ---------------- s _static_const
+# BITCODE-NEXT: ---------------- d _static_data
+# BITCODE-NEXT: ---------------- t _static_func
+
+## Check that various symbol types can use debug information if available to
+## print line numbers, and if unavailable, don't print anything erroneous. The
+## specific cases checked are given by the symbol names below. Other test cases
+## place requirements on the contents of the whole file, so they are kept out
+## of main.o.
+# RUN: rm -rf %t
+# RUN: split-file %s %t
+# RUN: llvm-mc -g --filetype=obj %t/main.s -o %t/main.o
+# RUN: llvm-nm -l %t/main.o | FileCheck %s --match-full-lines --implicit-check-not={{.}}
+# RUN: llvm-nm --line-numbers %t/main.o | FileCheck %s --match-full-lines --implicit-check-not={{.}}
+
+# CHECK: 0000000000001234 a absolute_symbol
+# CHECK-NEXT: 0000000000000000 d data_no_dwarf
+# CHECK-NEXT: 0000000000000000 T defined_global_function [[FILENAME:.*main.s]]:4
+# CHECK-NEXT: 0000000000000001 t defined_local_function [[FILENAME]]:7
+# CHECK-NEXT: 0000000000000000 t function_no_dwarf
+# CHECK-NEXT: U undef1 [[FILENAME]]:12
+# CHECK-NEXT: U undef2 [[FILENAME]]:14
+# CHECK-NEXT: U undef_no_reloc
+# CHECK-NEXT: 0000000000000002 t undefined_references [[FILENAME]]:12
+
+## Check that in the absence of DWARF in the whole object, no line number
+## information is printed.
+# RUN: llvm-mc --filetype=obj %t/main.s -o %t/no-dwarf.o
+# RUN: llvm-nm -l %t/no-dwarf.o | FileCheck %s --check-prefix=NO-DWARF --match-full-lines --implicit-check-not={{.}}
+
+# NO-DWARF: 0000000000001234 a absolute_symbol
+# NO-DWARF-NEXT: 0000000000000000 d data_no_dwarf
+# NO-DWARF-NEXT: 0000000000000000 T defined_global_function
+# NO-DWARF-NEXT: 0000000000000001 t defined_local_function
+# NO-DWARF-NEXT: 0000000000000000 t function_no_dwarf
+# NO-DWARF-NEXT: U undef1
+# NO-DWARF-NEXT: U undef2
+# NO-DWARF-NEXT: U undef_no_reloc
+# NO-DWARF-NEXT: 0000000000000002 t undefined_references
+
+## Check that printing line numbers for undefined values is not attempted in
+## the absence of any relocation section.
+# RUN: llvm-mc --filetype=obj %t/undef-no-reloc-sections.s -o %t/undef-no-reloc-sections.o
+# RUN: llvm-nm --line-numbers %t/undef-no-reloc-sections.o | FileCheck %s --check-prefix=UNDEF-NO-RELOC-SECTIONS --match-full-lines --implicit-check-not={{.}}
+
+# UNDEF-NO-RELOC-SECTIONS: U undef
+
+## Check that printing line numbers for undefined values does not include
+## relocations for non-text sections. This is broken out of main.s to ensure
+## that the data relocation for undef comes first.
+# RUN: llvm-mc -g --filetype=obj %t/undef-data-reloc.s -o %t/undef-data-reloc.o
+# RUN: llvm-nm --line-numbers %t/undef-data-reloc.o | FileCheck %s --check-prefix=UNDEF-DATA-RELOC --match-full-lines --implicit-check-not={{.}}
+
+# UNDEF-DATA-RELOC: 0000000000000000 r data_reloc
+# UNDEF-DATA-RELOC-NEXT: U undef
+
+## Check that line numbers can be printed for data definitions. These are broken
+## out of main.s since their DWARF cannot be generated with llvm-mc -g.
+# RUN: llvm-mc -g --filetype=obj %t/data-dwarf.s -o %t/data-dwarf.o
+# RUN: llvm-nm --line-numbers %t/data-dwarf.o | FileCheck %s --check-prefix=DATA-DWARF --match-full-lines --implicit-check-not={{.}}
+
+# DATA-DWARF: 0000000000000000 D defined_data /tmp/tmp.c:1
+
+#--- main.s
+.text
+.globl defined_global_function
+defined_global_function:
+ ret
+
+defined_local_function:
+ ret
+
+absolute_symbol = 0x1234
+
+undefined_references:
+ nop
+ .long undef1
+ nop
+ .long undef2
+ ret
+
+# Note: llvm-mc -g produces no DWARF for data.
+.data
+data_no_dwarf:
+ .byte 0
+
+.globl undef_no_reloc
+
+# Note: llvm-mc -g does not produce DWARF for non-SHF_ALLOC sections.
+.section no_alloc_text,"x",@progbits
+function_no_dwarf:
+ ret
+
+#--- undef-no-reloc-sections.s
+.globl undef
+
+#--- undef-data-reloc.s
+.globl undef
+.rodata
+data_reloc:
+ .long undef
+
+#--- data-dwarf.s
+# char defined_data = 42
+ .text
+ .file "tmp.c"
+ .file 0 "/tmp" "/tmp/tmp.c" md5 0x39602a53b15a32d6a622ca86936e88d7
+ .file 1 "tmp.c" md5 0x39602a53b15a32d6a622ca86936e88d7
+ .type defined_data,@object # @defined_data
+ .data
+ .globl defined_data
+defined_data:
+ .byte 42 # 0x2a
+ .size defined_data, 1
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 37 # DW_FORM_strx1
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 114 # DW_AT_str_offsets_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 37 # DW_FORM_strx1
+ .byte 115 # DW_AT_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 37 # DW_FORM_strx1
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 5 # DWARF version number
+ .byte 1 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 1 # Abbrev [1] 0xc:0x22 DW_TAG_compile_unit
+ .byte 0 # DW_AT_producer
+ .short 12 # DW_AT_language
+ .byte 1 # DW_AT_name
+ .long .Lstr_offsets_base0 # DW_AT_str_offsets_base
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .byte 2 # DW_AT_comp_dir
+ .long .Laddr_table_base0 # DW_AT_addr_base
+ .byte 2 # Abbrev [2] 0x1e:0xb DW_TAG_variable
+ .byte 3 # DW_AT_name
+ .long 41 # DW_AT_type
+ # DW_AT_external
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 161
+ .byte 0
+ .byte 3 # Abbrev [3] 0x29:0x4 DW_TAG_base_type
+ .byte 4 # DW_AT_name
+ .byte 6 # DW_AT_encoding
+ .byte 1 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str_offsets,"",@progbits
+ .long 24 # Length of String Offsets Set
+ .short 5
+ .short 0
+.Lstr_offsets_base0:
+ .section .debug_str,"MS",@progbits,1
+.Linfo_string0:
+ .asciz "Debian clang version 14.0.6" # string offset=0
+.Linfo_string1:
+ .asciz "/tmp/tmp.c" # string offset=28
+.Linfo_string2:
+ .asciz "/tmp" # string offset=39
+.Linfo_string3:
+ .asciz "defined_data" # string offset=44
+.Linfo_string4:
+ .asciz "char" # string offset=57
+ .section .debug_str_offsets,"",@progbits
+ .long .Linfo_string0
+ .long .Linfo_string1
+ .long .Linfo_string2
+ .long .Linfo_string3
+ .long .Linfo_string4
+ .section .debug_addr,"",@progbits
+ .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+ .short 5 # DWARF version number
+ .byte 8 # Address size
+ .byte 0 # Segment selector size
+.Laddr_table_base0:
+ .quad defined_data
+.Ldebug_addr_end0:
+ .ident "Debian clang version 14.0.6"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/llvm/tools/llvm-nm/CMakeLists.txt b/llvm/tools/llvm-nm/CMakeLists.txt
index ec04f1e..5191e13 100644
--- a/llvm/tools/llvm-nm/CMakeLists.txt
+++ b/llvm/tools/llvm-nm/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
Object
Option
Support
+ Symbolize
TargetParser
TextAPI
)
diff --git a/llvm/tools/llvm-nm/Opts.td b/llvm/tools/llvm-nm/Opts.td
index 60ac13426..04d9f5d 100644
--- a/llvm/tools/llvm-nm/Opts.td
+++ b/llvm/tools/llvm-nm/Opts.td
@@ -22,6 +22,7 @@ def export_symbols : FF<"export-symbols", "Export symbol list for all inputs">;
def extern_only : FF<"extern-only", "Show only external symbols">;
defm format : Eq<"format", "Specify output format: bsd (default), posix, sysv, darwin, just-symbols">, MetaVarName<"<format>">;
def help : FF<"help", "Display this help">;
+def line_numbers : FF<"line-numbers", "Use debugging information to print symbols' filenames and line numbers">;
def no_llvm_bc : FF<"no-llvm-bc", "Disable LLVM bitcode reader">;
def no_sort : FF<"no-sort", "Show symbols in order encountered">;
def no_weak : FF<"no-weak", "Show only non-weak symbols">;
@@ -67,6 +68,7 @@ def : JoinedOrSeparate<["-"], "f">, HelpText<"Alias for --format">, Alias<format
def : F<"h", "Alias for --help">, Alias<help>;
def : F<"g", "Alias for --extern-only">, Alias<extern_only>;
def : F<"j", "Alias for --format=just-symbols">, Alias<format_EQ>, AliasArgs<["just-symbols"]>;
+def : F<"l", "Alias for --line-numbers">, Alias<line_numbers>;
def : F<"m", "Alias for --format=darwin">, Alias<format_EQ>, AliasArgs<["darwin"]>;
def : F<"M", "Deprecated alias for --print-armap">, Alias<print_armap>, Flags<[HelpHidden]>;
def : F<"n", "Alias for --numeric-sort">, Alias<numeric_sort>;
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index 9a9e8bd..051fa3e 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -19,6 +19,7 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -97,6 +98,7 @@ static bool Demangle;
static bool DynamicSyms;
static bool ExportSymbols;
static bool ExternalOnly;
+static bool LineNumbers;
static OutputFormatTy OutputFormat;
static bool NoLLVMBitcode;
static bool NoSort;
@@ -551,8 +553,6 @@ static void darwinPrintSymbol(SymbolicFile &Obj, const NMSymbol &S,
}
}
}
-
- outs() << "\n";
}
// Table that maps Darwin's Mach-O stab constants to strings to allow printing.
@@ -689,9 +689,88 @@ static void printExportSymbolList(const std::vector<NMSymbol> &SymbolList) {
}
}
+static void printLineNumbers(symbolize::LLVMSymbolizer &Symbolizer,
+ const NMSymbol &S) {
+ const auto *Obj = dyn_cast<ObjectFile>(S.Sym.getObject());
+ if (!Obj)
+ return;
+ const SymbolRef Sym(S.Sym);
+ uint64_t SectionIndex = object::SectionedAddress::UndefSection;
+ section_iterator Sec = cantFail(Sym.getSection());
+ if (Sec != Obj->section_end())
+ SectionIndex = Sec->getIndex();
+ object::SectionedAddress Address = {cantFail(Sym.getAddress()), SectionIndex};
+
+ std::string FileName;
+ uint32_t Line;
+ switch (S.TypeChar) {
+ // For undefined symbols, find the first relocation for that symbol with a
+ // line number.
+ case 'U': {
+ for (const SectionRef RelocsSec : Obj->sections()) {
+ if (RelocsSec.relocations().empty())
+ continue;
+ SectionRef TextSec = *cantFail(RelocsSec.getRelocatedSection());
+ if (!TextSec.isText())
+ continue;
+ for (const RelocationRef R : RelocsSec.relocations()) {
+ if (R.getSymbol() != Sym)
+ continue;
+ Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
+ *Obj, {TextSec.getAddress() + R.getOffset(), SectionIndex});
+ if (!ResOrErr) {
+ error(ResOrErr.takeError(), Obj->getFileName());
+ return;
+ }
+ if (ResOrErr->FileName == DILineInfo::BadString)
+ return;
+ FileName = std::move(ResOrErr->FileName);
+ Line = ResOrErr->Line;
+ break;
+ }
+ if (!FileName.empty())
+ break;
+ }
+ if (FileName.empty())
+ return;
+ break;
+ }
+ case 't':
+ case 'T': {
+ Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(*Obj, Address);
+ if (!ResOrErr) {
+ error(ResOrErr.takeError(), Obj->getFileName());
+ return;
+ }
+ if (ResOrErr->FileName == DILineInfo::BadString)
+ return;
+ FileName = std::move(ResOrErr->FileName);
+ Line = ResOrErr->Line;
+ break;
+ }
+ default: {
+ Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(*Obj, Address);
+ if (!ResOrErr) {
+ error(ResOrErr.takeError(), Obj->getFileName());
+ return;
+ }
+ if (ResOrErr->DeclFile.empty())
+ return;
+ FileName = std::move(ResOrErr->DeclFile);
+ Line = ResOrErr->DeclLine;
+ break;
+ }
+ }
+ outs() << '\t' << FileName << ':' << Line;
+}
+
static void printSymbolList(SymbolicFile &Obj,
std::vector<NMSymbol> &SymbolList, bool printName,
StringRef ArchiveName, StringRef ArchitectureName) {
+ std::optional<symbolize::LLVMSymbolizer> Symbolizer;
+ if (LineNumbers)
+ Symbolizer.emplace();
+
if (!PrintFileName) {
if ((OutputFormat == bsd || OutputFormat == posix ||
OutputFormat == just_symbols) &&
@@ -798,7 +877,7 @@ static void printSymbolList(SymbolicFile &Obj,
printFormat);
} else if (OutputFormat == posix) {
outs() << Name << " " << S.TypeChar << " " << SymbolAddrStr << " "
- << (MachO ? "0" : SymbolSizeStr) << "\n";
+ << (MachO ? "0" : SymbolSizeStr);
} else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
if (PrintAddress)
outs() << SymbolAddrStr << ' ';
@@ -819,12 +898,14 @@ static void printSymbolList(SymbolicFile &Obj,
} else
outs() << S.IndirectName << ")";
}
- outs() << "\n";
} else if (OutputFormat == sysv) {
outs() << left_justify(Name, 20) << "|" << SymbolAddrStr << "| "
<< S.TypeChar << " |" << right_justify(S.TypeName, 18) << "|"
- << SymbolSizeStr << "| |" << S.SectionName << "\n";
+ << SymbolSizeStr << "| |" << S.SectionName;
}
+ if (LineNumbers)
+ printLineNumbers(*Symbolizer, S);
+ outs() << '\n';
}
SymbolList.clear();
@@ -2415,6 +2496,7 @@ int llvm_nm_main(int argc, char **argv, const llvm::ToolContext &) {
else
error("--format value should be one of: bsd, posix, sysv, darwin, "
"just-symbols");
+ LineNumbers = Args.hasArg(OPT_line_numbers);
NoLLVMBitcode = Args.hasArg(OPT_no_llvm_bc);
NoSort = Args.hasArg(OPT_no_sort);
NoWeakSymbols = Args.hasArg(OPT_no_weak);