1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
|
//===- DwarfTransformer.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
#include <optional>
using namespace llvm;
using namespace gsym;
struct llvm::gsym::CUInfo {
const DWARFDebugLine::LineTable *LineTable;
const char *CompDir;
std::vector<uint32_t> FileCache;
uint64_t Language = 0;
uint8_t AddrSize = 0;
CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
LineTable = DICtx.getLineTableForUnit(CU);
CompDir = CU->getCompilationDir();
FileCache.clear();
if (LineTable)
FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
DWARFDie Die = CU->getUnitDIE();
Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
AddrSize = CU->getAddressByteSize();
}
/// Return true if Addr is the highest address for a given compile unit. The
/// highest address is encoded as -1, of all ones in the address. These high
/// addresses are used by some linkers to indicate that a function has been
/// dead stripped or didn't end up in the linked executable.
bool isHighestAddress(uint64_t Addr) const {
if (AddrSize == 4)
return Addr == UINT32_MAX;
else if (AddrSize == 8)
return Addr == UINT64_MAX;
return false;
}
/// Convert a DWARF compile unit file index into a GSYM global file index.
///
/// Each compile unit in DWARF has its own file table in the line table
/// prologue. GSYM has a single large file table that applies to all files
/// from all of the info in a GSYM file. This function converts between the
/// two and caches and DWARF CU file index that has already been converted so
/// the first client that asks for a compile unit file index will end up
/// doing the conversion, and subsequent clients will get the cached GSYM
/// index.
std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
uint32_t DwarfFileIdx) {
if (!LineTable || DwarfFileIdx >= FileCache.size())
return std::nullopt;
uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
if (GsymFileIdx != UINT32_MAX)
return GsymFileIdx;
std::string File;
if (LineTable->getFileNameByIndex(
DwarfFileIdx, CompDir,
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
GsymFileIdx = Gsym.insertFile(File);
else
GsymFileIdx = 0;
return GsymFileIdx;
}
};
static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
if (DWARFDie SpecDie =
Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
return SpecParent;
}
if (DWARFDie AbstDie =
Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
return AbstParent;
}
// We never want to follow parent for inlined subroutine - that would
// give us information about where the function is inlined, not what
// function is inlined
if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
return DWARFDie();
DWARFDie ParentDie = Die.getParent();
if (!ParentDie)
return DWARFDie();
switch (ParentDie.getTag()) {
case dwarf::DW_TAG_namespace:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_class_type:
case dwarf::DW_TAG_subprogram:
return ParentDie; // Found parent decl context DIE
case dwarf::DW_TAG_lexical_block:
return GetParentDeclContextDIE(ParentDie);
default:
break;
}
return DWARFDie();
}
/// Get the GsymCreator string table offset for the qualified name for the
/// DIE passed in. This function will avoid making copies of any strings in
/// the GsymCreator when possible. We don't need to copy a string when the
/// string comes from our .debug_str section or is an inlined string in the
/// .debug_info. If we create a qualified name string in this function by
/// combining multiple strings in the DWARF string table or info, we will make
/// a copy of the string when we add it to the string table.
static std::optional<uint32_t>
getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
// If the dwarf has mangled name, use mangled name
if (auto LinkageName = Die.getLinkageName()) {
// We have seen cases were linkage name is actually empty.
if (strlen(LinkageName) > 0)
return Gsym.insertString(LinkageName, /* Copy */ false);
}
StringRef ShortName(Die.getName(DINameKind::ShortName));
if (ShortName.empty())
return std::nullopt;
// For C++ and ObjC, prepend names of all parent declaration contexts
if (!(Language == dwarf::DW_LANG_C_plus_plus ||
Language == dwarf::DW_LANG_C_plus_plus_03 ||
Language == dwarf::DW_LANG_C_plus_plus_11 ||
Language == dwarf::DW_LANG_C_plus_plus_14 ||
Language == dwarf::DW_LANG_ObjC_plus_plus ||
// This should not be needed for C, but we see C++ code marked as C
// in some binaries. This should hurt, so let's do it for C as well
Language == dwarf::DW_LANG_C))
return Gsym.insertString(ShortName, /* Copy */ false);
// Some GCC optimizations create functions with names ending with .isra.<num>
// or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
// If it looks like it could be the case, don't add any prefix
if (ShortName.starts_with("_Z") &&
(ShortName.contains(".isra.") || ShortName.contains(".part.")))
return Gsym.insertString(ShortName, /* Copy */ false);
DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
if (ParentDeclCtxDie) {
std::string Name = ShortName.str();
while (ParentDeclCtxDie) {
StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
if (!ParentName.empty()) {
// "lambda" names are wrapped in < >. Replace with { }
// to be consistent with demangled names and not to confuse with
// templates
if (ParentName.front() == '<' && ParentName.back() == '>')
Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
"::" + Name;
else
Name = ParentName.str() + "::" + Name;
}
ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
}
// Copy the name since we created a new name in a std::string.
return Gsym.insertString(Name, /* Copy */ true);
}
// Don't copy the name since it exists in the DWARF object file.
return Gsym.insertString(ShortName, /* Copy */ false);
}
static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
bool CheckChildren = true;
switch (Die.getTag()) {
case dwarf::DW_TAG_subprogram:
// Don't look into functions within functions.
CheckChildren = Depth == 0;
break;
case dwarf::DW_TAG_inlined_subroutine:
return true;
default:
break;
}
if (!CheckChildren)
return false;
for (DWARFDie ChildDie : Die.children()) {
if (hasInlineInfo(ChildDie, Depth + 1))
return true;
}
return false;
}
static AddressRanges
ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
AddressRanges Ranges;
for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
if (DwarfRange.LowPC < DwarfRange.HighPC)
Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC});
}
return Ranges;
}
static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out,
CUInfo &CUI, DWARFDie Die, uint32_t Depth,
FunctionInfo &FI, InlineInfo &Parent,
const AddressRanges &AllParentRanges,
bool &WarnIfEmpty) {
if (!hasInlineInfo(Die, Depth))
return;
dwarf::Tag Tag = Die.getTag();
if (Tag == dwarf::DW_TAG_inlined_subroutine) {
// create new InlineInfo and append to parent.children
InlineInfo II;
AddressRanges AllInlineRanges;
Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
if (RangesOrError) {
AllInlineRanges = ConvertDWARFRanges(RangesOrError.get());
uint32_t EmptyCount = 0;
for (const AddressRange &InlineRange : AllInlineRanges) {
// Check for empty inline range in case inline function was outlined
// or has not code
if (InlineRange.empty()) {
++EmptyCount;
} else {
if (Parent.Ranges.contains(InlineRange)) {
II.Ranges.insert(InlineRange);
} else {
// Only warn if the current inline range is not within any of all
// of the parent ranges. If we have a DW_TAG_subpgram with multiple
// ranges we will emit a FunctionInfo for each range of that
// function that only emits information within the current range,
// so we only want to emit an error if the DWARF has issues, not
// when a range currently just isn't in the range we are currently
// parsing for.
if (AllParentRanges.contains(InlineRange)) {
WarnIfEmpty = false;
} else
Out.Report("Function DIE has uncontained address range",
[&](raw_ostream &OS) {
OS << "error: inlined function DIE at "
<< HEX32(Die.getOffset()) << " has a range ["
<< HEX64(InlineRange.start()) << " - "
<< HEX64(InlineRange.end())
<< ") that isn't contained in "
<< "any parent address ranges, this inline range "
"will be "
"removed.\n";
});
}
}
}
// If we have all empty ranges for the inlines, then don't warn if we
// have an empty InlineInfo at the top level as all inline functions
// were elided.
if (EmptyCount == AllInlineRanges.size())
WarnIfEmpty = false;
}
if (II.Ranges.empty())
return;
if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
II.Name = *NameIndex;
const uint64_t DwarfFileIdx = dwarf::toUnsigned(
Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
std::optional<uint32_t> OptGSymFileIdx =
CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
if (OptGSymFileIdx) {
II.CallFile = OptGSymFileIdx.value();
II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
// parse all children and append to parent
for (DWARFDie ChildDie : Die.children())
parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, II,
AllInlineRanges, WarnIfEmpty);
Parent.Children.emplace_back(std::move(II));
} else
Out.Report(
"Inlined function die has invlaid file index in DW_AT_call_file",
[&](raw_ostream &OS) {
OS << "error: inlined function DIE at " << HEX32(Die.getOffset())
<< " has an invalid file index " << DwarfFileIdx
<< " in its DW_AT_call_file attribute, this inline entry and "
"all "
<< "children will be removed.\n";
});
return;
}
if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
// skip this Die and just recurse down
for (DWARFDie ChildDie : Die.children())
parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, Parent,
AllParentRanges, WarnIfEmpty);
}
}
static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
DWARFDie Die, GsymCreator &Gsym,
FunctionInfo &FI) {
std::vector<uint32_t> RowVector;
const uint64_t StartAddress = FI.startAddress();
const uint64_t EndAddress = FI.endAddress();
const uint64_t RangeSize = EndAddress - StartAddress;
const object::SectionedAddress SecAddress{
StartAddress, object::SectionedAddress::UndefSection};
// Attempt to retrieve DW_AT_LLVM_stmt_sequence if present.
std::optional<uint64_t> StmtSeqOffset;
if (auto StmtSeqAttr = Die.find(llvm::dwarf::DW_AT_LLVM_stmt_sequence)) {
// The `DW_AT_LLVM_stmt_sequence` attribute might be set to `UINT64_MAX`
// when it refers to an empty line sequence. In such cases, the DWARF linker
// will exclude the empty sequence from the final output and assign
// `UINT64_MAX` to the `DW_AT_LLVM_stmt_sequence` attribute.
uint64_t StmtSeqVal = dwarf::toSectionOffset(StmtSeqAttr, UINT64_MAX);
if (StmtSeqVal != UINT64_MAX)
StmtSeqOffset = StmtSeqVal;
}
if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector,
StmtSeqOffset)) {
// If we have a DW_TAG_subprogram but no line entries, fall back to using
// the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
std::string FilePath = Die.getDeclFile(
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
if (FilePath.empty()) {
// If we had a DW_AT_decl_file, but got no file then we need to emit a
// warning.
Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
const uint64_t DwarfFileIdx = dwarf::toUnsigned(
Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
OS << "error: function DIE at " << HEX32(Die.getOffset())
<< " has an invalid file index " << DwarfFileIdx
<< " in its DW_AT_decl_file attribute, unable to create a single "
<< "line entry from the DW_AT_decl_file/DW_AT_decl_line "
<< "attributes.\n";
});
return;
}
if (auto Line =
dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
FI.OptLineTable = LineTable();
FI.OptLineTable->push(LE);
}
return;
}
FI.OptLineTable = LineTable();
DWARFDebugLine::Row PrevRow;
for (uint32_t RowIndex : RowVector) {
// Take file number and line/column from the row.
const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
std::optional<uint32_t> OptFileIdx =
CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
if (!OptFileIdx) {
Out.Report(
"Invalid file index in DWARF line table", [&](raw_ostream &OS) {
OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
<< "a line entry with invalid DWARF file index, this entry will "
<< "be removed:\n";
Row.dumpTableHeader(OS, /*Indent=*/0);
Row.dump(OS);
OS << "\n";
});
continue;
}
const uint32_t FileIdx = OptFileIdx.value();
uint64_t RowAddress = Row.Address.Address;
// Watch out for a RowAddress that is in the middle of a line table entry
// in the DWARF. If we pass an address in between two line table entries
// we will get a RowIndex for the previous valid line table row which won't
// be contained in our function. This is usually a bug in the DWARF due to
// linker problems or LTO or other DWARF re-linking so it is worth emitting
// an error, but not worth stopping the creation of the GSYM.
if (!FI.Range.contains(RowAddress)) {
if (RowAddress < FI.Range.start()) {
Out.Report("Start address lies between valid Row table entries",
[&](raw_ostream &OS) {
OS << "error: DIE has a start address whose LowPC is "
"between the "
"line table Row["
<< RowIndex << "] with address " << HEX64(RowAddress)
<< " and the next one.\n";
Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
});
RowAddress = FI.Range.start();
} else {
continue;
}
}
LineEntry LE(RowAddress, FileIdx, Row.Line);
if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
// We have seen full duplicate line tables for functions in some
// DWARF files. Watch for those here by checking the last
// row was the function's end address (HighPC) and that the
// current line table entry's address is the same as the first
// line entry we already have in our "function_info.Lines". If
// so break out after printing a warning.
auto FirstLE = FI.OptLineTable->first();
if (FirstLE && *FirstLE == LE)
// if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird
Out.Report("Duplicate line table detected", [&](raw_ostream &OS) {
OS << "warning: duplicate line table detected for DIE:\n";
Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
});
else
Out.Report("Non-monotonically increasing addresses",
[&](raw_ostream &OS) {
OS << "error: line table has addresses that do not "
<< "monotonically increase:\n";
for (uint32_t RowIndex2 : RowVector)
CUI.LineTable->Rows[RowIndex2].dump(OS);
Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
});
break;
}
// Skip multiple line entries for the same file and line.
auto LastLE = FI.OptLineTable->last();
if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
continue;
// Only push a row if it isn't an end sequence. End sequence markers are
// included for the last address in a function or the last contiguous
// address in a sequence.
if (Row.EndSequence) {
// End sequence means that the next line entry could have a lower address
// that the previous entries. So we clear the previous row so we don't
// trigger the line table error about address that do not monotonically
// increase.
PrevRow = DWARFDebugLine::Row();
} else {
FI.OptLineTable->push(LE);
PrevRow = Row;
}
}
// If not line table rows were added, clear the line table so we don't encode
// on in the GSYM file.
if (FI.OptLineTable->empty())
FI.OptLineTable = std::nullopt;
}
void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
DWARFDie Die) {
switch (Die.getTag()) {
case dwarf::DW_TAG_subprogram: {
Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
if (!RangesOrError) {
consumeError(RangesOrError.takeError());
break;
}
const DWARFAddressRangesVector &Ranges = RangesOrError.get();
if (Ranges.empty())
break;
auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
if (!NameIndex) {
Out.Report("Function has no name", [&](raw_ostream &OS) {
OS << "error: function at " << HEX64(Die.getOffset())
<< " has no name\n ";
Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
});
break;
}
// All ranges for the subprogram DIE in case it has multiple. We need to
// pass this down into parseInlineInfo so we don't warn about inline
// ranges that are not in the current subrange of a function when they
// actually are in another subgrange. We do this because when a function
// has discontiguos ranges, we create multiple function entries with only
// the info for that range contained inside of it.
AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges);
// Create a function_info for each range
for (const DWARFAddressRange &Range : Ranges) {
// The low PC must be less than the high PC. Many linkers don't remove
// DWARF for functions that don't get linked into the final executable.
// If both the high and low pc have relocations, linkers will often set
// the address values for both to the same value to indicate the function
// has been remove. Other linkers have been known to set the one or both
// PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
// byte addresses to indicate the function isn't valid. The check below
// tries to watch for these cases and abort if it runs into them.
if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
break;
// Many linkers can't remove DWARF and might set the LowPC to zero. Since
// high PC can be an offset from the low PC in more recent DWARF versions
// we need to watch for a zero'ed low pc which we do using ValidTextRanges
// below.
if (!Gsym.IsValidTextAddress(Range.LowPC)) {
// We expect zero and -1 to be invalid addresses in DWARF depending
// on the linker of the DWARF. This indicates a function was stripped
// and the debug info wasn't able to be stripped from the DWARF. If
// the LowPC isn't zero or -1, then we should emit an error.
if (Range.LowPC != 0) {
if (!Gsym.isQuiet()) {
// Unexpected invalid address, emit a warning
Out.Report("Address range starts outside executable section",
[&](raw_ostream &OS) {
OS << "warning: DIE has an address range whose "
"start address "
"is not in any executable sections ("
<< *Gsym.GetValidTextRanges()
<< ") and will not be processed:\n";
Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
});
}
}
break;
}
FunctionInfo FI;
FI.Range = {Range.LowPC, Range.HighPC};
FI.Name = *NameIndex;
if (CUI.LineTable)
convertFunctionLineTable(Out, CUI, Die, Gsym, FI);
if (hasInlineInfo(Die, 0)) {
FI.Inline = InlineInfo();
FI.Inline->Name = *NameIndex;
FI.Inline->Ranges.insert(FI.Range);
bool WarnIfEmpty = true;
parseInlineInfo(Gsym, Out, CUI, Die, 0, FI, *FI.Inline,
AllSubprogramRanges, WarnIfEmpty);
// Make sure we at least got some valid inline info other than just
// the top level function. If we didn't then remove the inline info
// from the function info. We have seen cases where LTO tries to modify
// the DWARF for functions and it messes up the address ranges for
// the inline functions so it is no longer valid.
//
// By checking if there are any valid children on the top level inline
// information object, we will know if we got anything valid from the
// debug info.
if (FI.Inline->Children.empty()) {
if (WarnIfEmpty && !Gsym.isQuiet())
Out.Report("DIE contains inline functions with no valid ranges",
[&](raw_ostream &OS) {
OS << "warning: DIE contains inline function "
"information that has no valid ranges, removing "
"inline information:\n";
Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
});
FI.Inline = std::nullopt;
}
}
// If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
if (LoadDwarfCallSites)
parseCallSiteInfoFromDwarf(CUI, Die, FI);
Gsym.addFunctionInfo(std::move(FI));
}
} break;
default:
break;
}
for (DWARFDie ChildDie : Die.children())
handleDie(Out, CUI, ChildDie);
}
void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
FunctionInfo &FI) {
// Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
// DWARF specification:
// - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
// - DW_AT_call_origin might point to a DIE of the function being called.
// For simplicity, we will just extract return_offset and possibly target name
// if available.
CallSiteInfoCollection CSIC;
for (DWARFDie Child : Die.children()) {
if (Child.getTag() != dwarf::DW_TAG_call_site)
continue;
CallSiteInfo CSI;
// DW_AT_call_return_pc: the return PC (address). We'll convert it to
// offset relative to FI's start.
auto ReturnPC =
dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc));
if (!ReturnPC || !FI.Range.contains(*ReturnPC))
continue;
CSI.ReturnOffset = *ReturnPC - FI.startAddress();
// Attempt to get function name from DW_AT_call_origin. If present, we can
// insert it as a match regex.
if (DWARFDie OriginDie =
Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {
// Include the full unmangled name if available, otherwise the short name.
if (const char *LinkName = OriginDie.getLinkageName()) {
uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false);
CSI.MatchRegex.push_back(LinkNameOff);
} else if (const char *ShortName = OriginDie.getShortName()) {
uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false);
CSI.MatchRegex.push_back(ShortNameOff);
}
}
// For now, we won't attempt to deduce InternalCall/ExternalCall flags
// from DWARF.
CSI.Flags = CallSiteInfo::Flags::None;
CSIC.CallSites.push_back(CSI);
}
if (!CSIC.CallSites.empty()) {
if (!FI.CallSites)
FI.CallSites = CallSiteInfoCollection();
// Append parsed DWARF callsites:
llvm::append_range(FI.CallSites->CallSites, CSIC.CallSites);
}
}
Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
size_t NumBefore = Gsym.getNumFunctionInfos();
auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
if (DwarfUnit.getDWOId()) {
DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
if (!DWOCU->isDWOUnit())
Out.Report(
"warning: Unable to retrieve DWO .debug_info section for some "
"object files. (Remove the --quiet flag for full output)",
[&](raw_ostream &OS) {
std::string DWOName = dwarf::toString(
DwarfUnit.getUnitDIE().find(
{dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
"");
OS << "warning: Unable to retrieve DWO .debug_info section for "
<< DWOName << "\n";
});
else {
ReturnDie = DWOCU->getUnitDIE(false);
}
}
return ReturnDie;
};
if (NumThreads == 1) {
// Parse all DWARF data from this thread, use the same string/file table
// for everything
for (const auto &CU : DICtx.compile_units()) {
DWARFDie Die = getDie(*CU);
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
handleDie(Out, CUI, Die);
}
} else {
// LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
// front before we start accessing any DIEs since there might be
// cross compile unit references in the DWARF. If we don't do this we can
// end up crashing.
// We need to call getAbbreviations sequentially first so that getUnitDIE()
// only works with its local data.
for (const auto &CU : DICtx.compile_units())
CU->getAbbreviations();
// Now parse all DIEs in case we have cross compile unit references in a
// thread pool.
DefaultThreadPool pool(hardware_concurrency(NumThreads));
for (const auto &CU : DICtx.compile_units())
pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
pool.wait();
// Now convert all DWARF to GSYM in a thread pool.
std::mutex LogMutex;
for (const auto &CU : DICtx.compile_units()) {
DWARFDie Die = getDie(*CU);
if (Die) {
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
std::string storage;
raw_string_ostream StrStream(storage);
OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
handleDie(ThreadOut, CUI, Die);
// Print ThreadLogStorage lines into an actual stream under a lock
std::lock_guard<std::mutex> guard(LogMutex);
if (Out.GetOS()) {
Out << storage;
}
Out.Merge(ThreadOut);
});
}
}
pool.wait();
}
size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
return Error::success();
}
llvm::Error DwarfTransformer::verify(StringRef GsymPath,
OutputAggregator &Out) {
Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
auto Gsym = GsymReader::openFile(GsymPath);
if (!Gsym)
return Gsym.takeError();
auto NumAddrs = Gsym->getNumAddresses();
DILineInfoSpecifier DLIS(
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
DILineInfoSpecifier::FunctionNameKind::LinkageName);
std::string gsymFilename;
for (uint32_t I = 0; I < NumAddrs; ++I) {
auto FuncAddr = Gsym->getAddress(I);
if (!FuncAddr)
return createStringError(std::errc::invalid_argument,
"failed to extract address[%i]", I);
auto FI = Gsym->getFunctionInfo(*FuncAddr);
if (!FI)
return createStringError(
std::errc::invalid_argument,
"failed to extract function info for address 0x%" PRIu64, *FuncAddr);
for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
const object::SectionedAddress SectAddr{
Addr, object::SectionedAddress::UndefSection};
auto LR = Gsym->lookup(Addr);
if (!LR)
return LR.takeError();
auto DwarfInlineInfos =
DICtx.getInliningInfoForAddress(SectAddr, DLIS);
uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
if (NumDwarfInlineInfos == 0) {
DwarfInlineInfos.addFrame(
DICtx.getLineInfoForAddress(SectAddr, DLIS).value_or(DILineInfo()));
}
// Check for 1 entry that has no file and line info
if (NumDwarfInlineInfos == 1 &&
DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
DwarfInlineInfos = DIInliningInfo();
NumDwarfInlineInfos = 0;
}
if (NumDwarfInlineInfos > 0 &&
NumDwarfInlineInfos != LR->Locations.size()) {
if (Out.GetOS()) {
raw_ostream &Log = *Out.GetOS();
Log << "error: address " << HEX64(Addr) << " has "
<< NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
<< LR->Locations.size() << "\n";
Log << " " << NumDwarfInlineInfos << " DWARF frames:\n";
for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
const auto &dii = DwarfInlineInfos.getFrame(Idx);
Log << " [" << Idx << "]: " << dii.FunctionName << " @ "
<< dii.FileName << ':' << dii.Line << '\n';
}
Log << " " << LR->Locations.size() << " GSYM frames:\n";
for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
++Idx) {
const auto &gii = LR->Locations[Idx];
Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
<< '/' << gii.Base << ':' << gii.Line << '\n';
}
Gsym->dump(Log, *FI);
}
continue;
}
for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
++Idx) {
const auto &gii = LR->Locations[Idx];
if (Idx < NumDwarfInlineInfos) {
const auto &dii = DwarfInlineInfos.getFrame(Idx);
gsymFilename = LR->getSourceFile(Idx);
// Verify function name
if (!StringRef(dii.FunctionName).starts_with(gii.Name))
Out << "error: address " << HEX64(Addr) << " DWARF function \""
<< dii.FunctionName.c_str()
<< "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
// Verify source file path
if (dii.FileName != gsymFilename)
Out << "error: address " << HEX64(Addr) << " DWARF path \""
<< dii.FileName.c_str() << "\" doesn't match GSYM path \""
<< gsymFilename.c_str() << "\"\n";
// Verify source file line
if (dii.Line != gii.Line)
Out << "error: address " << HEX64(Addr) << " DWARF line "
<< dii.Line << " != GSYM line " << gii.Line << "\n";
}
}
}
}
return Error::success();
}
|