/* Generate CodeView debugging info from the GCC DWARF.
Copyright (C) 2023 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
. */
/* See gas/codeview.h in binutils for more about the constants and structs
listed below. References to Microsoft files refer to Microsoft's PDB
repository: https://github.com/microsoft/microsoft-pdb. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "target.h"
#include "output.h"
#include "errors.h"
#include "md5.h"
#include "function.h"
#include "version.h"
#include "tree.h"
#include "langhooks.h"
#include "dwarf2out.h"
#include "dwarf2codeview.h"
#ifdef CODEVIEW_DEBUGGING_INFO
#define CV_SIGNATURE_C13 4
#define DEBUG_S_SYMBOLS 0xf1
#define DEBUG_S_LINES 0xf2
#define DEBUG_S_STRINGTABLE 0xf3
#define DEBUG_S_FILECHKSMS 0xf4
#define CHKSUM_TYPE_MD5 1
#define S_COMPILE3 0x113c
#define CV_CFL_80386 0x03
#define CV_CFL_X64 0xD0
#define CV_CFL_C 0x00
#define CV_CFL_CXX 0x01
#define LINE_LABEL "Lcvline"
#define END_FUNC_LABEL "Lcvendfunc"
#define SYMBOL_START_LABEL "Lcvsymstart"
#define SYMBOL_END_LABEL "Lcvsymend"
#define HASH_SIZE 16
struct codeview_string
{
codeview_string *next;
uint32_t offset;
char *string;
};
struct string_hasher : free_ptr_hash
{
typedef const char *compare_type;
static hashval_t hash (const codeview_string *x)
{
return htab_hash_string (x->string);
}
static bool equal (const codeview_string *x, const char *y)
{
return !strcmp (x->string, y);
}
static void mark_empty (codeview_string *x)
{
if (x->string)
{
free (x->string);
x->string = NULL;
}
}
static void remove (codeview_string *&x)
{
free (x->string);
}
};
struct codeview_source_file
{
codeview_source_file *next;
unsigned int file_num;
uint32_t string_offset;
char *filename;
uint8_t hash[HASH_SIZE];
};
struct codeview_line
{
codeview_line *next;
unsigned int line_no;
unsigned int label_num;
};
struct codeview_line_block
{
codeview_line_block *next;
uint32_t file_id;
unsigned int num_lines;
codeview_line *lines, *last_line;
};
struct codeview_function
{
codeview_function *next;
function *func;
unsigned int end_label;
codeview_line_block *blocks, *last_block;
};
static unsigned int line_label_num;
static unsigned int func_label_num;
static unsigned int sym_label_num;
static codeview_source_file *files, *last_file;
static unsigned int num_files;
static uint32_t string_offset = 1;
static hash_table *strings_htab;
static codeview_string *strings, *last_string;
static codeview_function *funcs, *last_func;
static const char* last_filename;
static uint32_t last_file_id;
/* Record new line number against the current function. */
void
codeview_source_line (unsigned int line_no, const char *filename)
{
codeview_line *l;
uint32_t file_id = last_file_id;
unsigned int label_num = ++line_label_num;
targetm.asm_out.internal_label (asm_out_file, LINE_LABEL, label_num);
if (!last_func || last_func->func != cfun)
{
codeview_function *f = (codeview_function *)
xmalloc (sizeof (codeview_function));
f->next = NULL;
f->func = cfun;
f->end_label = 0;
f->blocks = f->last_block = NULL;
if (!funcs)
funcs = f;
else
last_func->next = f;
last_func = f;
}
if (filename != last_filename)
{
codeview_source_file *sf = files;
while (sf)
{
if (!strcmp (sf->filename, filename))
{
/* 0x18 is the size of the checksum entry for each file.
0x6 bytes for the header, plus 0x10 bytes for the hash,
then padded to a multiple of 4. */
file_id = sf->file_num * 0x18;
last_filename = filename;
last_file_id = file_id;
break;
}
sf = sf->next;
}
}
if (!last_func->last_block || last_func->last_block->file_id != file_id)
{
codeview_line_block *b;
b = (codeview_line_block *) xmalloc (sizeof (codeview_line_block));
b->next = NULL;
b->file_id = file_id;
b->num_lines = 0;
b->lines = b->last_line = NULL;
if (!last_func->blocks)
last_func->blocks = b;
else
last_func->last_block->next = b;
last_func->last_block = b;
}
if (last_func->last_block->last_line
&& last_func->last_block->last_line->line_no == line_no)
return;
l = (codeview_line *) xmalloc (sizeof (codeview_line));
l->next = NULL;
l->line_no = line_no;
l->label_num = label_num;
if (!last_func->last_block->lines)
last_func->last_block->lines = l;
else
last_func->last_block->last_line->next = l;
last_func->last_block->last_line = l;
last_func->last_block->num_lines++;
}
/* Adds string to the string table, returning its offset. If already present,
this returns the offset of the existing string. */
static uint32_t
add_string (const char *string)
{
codeview_string **slot;
codeview_string *s;
size_t len;
if (!strings_htab)
strings_htab = new hash_table (10);
slot = strings_htab->find_slot_with_hash (string, htab_hash_string (string),
INSERT);
if (*slot)
return (*slot)->offset;
s = (codeview_string *) xmalloc (sizeof (codeview_string));
len = strlen (string);
s->next = NULL;
s->offset = string_offset;
string_offset += len + 1;
s->string = xstrdup (string);
if (last_string)
last_string->next = s;
else
strings = s;
last_string = s;
*slot = s;
return s->offset;
}
/* A new source file has been encountered - record the details and calculate
its hash. */
void
codeview_start_source_file (const char *filename)
{
codeview_source_file *sf;
char *path;
uint32_t string_offset;
FILE *f;
path = lrealpath (filename);
string_offset = add_string (path);
free (path);
sf = files;
while (sf)
{
if (sf->string_offset == string_offset)
return;
sf = sf->next;
}
sf = (codeview_source_file *) xmalloc (sizeof (codeview_source_file));
sf->next = NULL;
sf->file_num = num_files;
sf->string_offset = string_offset;
sf->filename = xstrdup (filename);
f = fopen (filename, "r");
if (!f)
internal_error ("could not open %s for reading", filename);
if (md5_stream (f, sf->hash))
{
fclose (f);
internal_error ("md5_stream failed");
}
fclose (f);
if (last_file)
last_file->next = sf;
else
files = sf;
last_file = sf;
num_files++;
}
/* Write out the strings table into the .debug$S section. The linker will
parse this, and handle the deduplication and hashing for all the object
files. */
static void
write_strings_table (void)
{
codeview_string *string;
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, DEBUG_S_STRINGTABLE);
putc ('\n', asm_out_file);
fputs (integer_asm_op (4, false), asm_out_file);
asm_fprintf (asm_out_file, "%LLcv_strings_end - %LLcv_strings_start\n");
asm_fprintf (asm_out_file, "%LLcv_strings_start:\n");
/* The first entry is always an empty string. */
fputs (integer_asm_op (1, false), asm_out_file);
fprint_whex (asm_out_file, 0);
putc ('\n', asm_out_file);
string = strings;
while (string)
{
ASM_OUTPUT_ASCII (asm_out_file, string->string,
strlen (string->string) + 1);
string = string->next;
}
delete strings_htab;
asm_fprintf (asm_out_file, "%LLcv_strings_end:\n");
ASM_OUTPUT_ALIGN (asm_out_file, 2);
}
/* Write out the file checksums data into the .debug$S section. */
static void
write_source_files (void)
{
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, DEBUG_S_FILECHKSMS);
putc ('\n', asm_out_file);
fputs (integer_asm_op (4, false), asm_out_file);
asm_fprintf (asm_out_file,
"%LLcv_filechksms_end - %LLcv_filechksms_start\n");
asm_fprintf (asm_out_file, "%LLcv_filechksms_start:\n");
while (files)
{
codeview_source_file *next = files->next;
/* This is struct file_checksum in binutils, or filedata in Microsoft's
dumpsym7.cpp:
struct file_checksum
{
uint32_t file_id;
uint8_t checksum_length;
uint8_t checksum_type;
} ATTRIBUTE_PACKED;
followed then by the bytes of the hash, padded to the next 4 bytes.
file_id here is actually the offset in the strings table. */
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, files->string_offset);
putc ('\n', asm_out_file);
fputs (integer_asm_op (1, false), asm_out_file);
fprint_whex (asm_out_file, HASH_SIZE);
putc ('\n', asm_out_file);
fputs (integer_asm_op (1, false), asm_out_file);
fprint_whex (asm_out_file, CHKSUM_TYPE_MD5);
putc ('\n', asm_out_file);
for (unsigned int i = 0; i < HASH_SIZE; i++)
{
fputs (integer_asm_op (1, false), asm_out_file);
fprint_whex (asm_out_file, files->hash[i]);
putc ('\n', asm_out_file);
}
ASM_OUTPUT_ALIGN (asm_out_file, 2);
free (files->filename);
free (files);
files = next;
}
asm_fprintf (asm_out_file, "%LLcv_filechksms_end:\n");
}
/* Write out the line number information for each function into the
.debug$S section. */
static void
write_line_numbers (void)
{
unsigned int func_num = 0;
while (funcs)
{
codeview_function *next = funcs->next;
unsigned int first_label_num;
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, DEBUG_S_LINES);
putc ('\n', asm_out_file);
fputs (integer_asm_op (4, false), asm_out_file);
asm_fprintf (asm_out_file, "%LLcv_lines%u_end - %LLcv_lines%u_start\n",
func_num, func_num);
asm_fprintf (asm_out_file, "%LLcv_lines%u_start:\n", func_num);
/* Output the header (struct cv_lines_header in binutils or
CV_DebugSLinesHeader_t in Microsoft's cvinfo.h):
struct cv_lines_header
{
uint32_t offset;
uint16_t section;
uint16_t flags;
uint32_t length;
};
*/
asm_fprintf (asm_out_file, "\t.secrel32\t%L" LINE_LABEL "%u\n",
funcs->blocks->lines->label_num);
asm_fprintf (asm_out_file, "\t.secidx\t%L" LINE_LABEL "%u\n",
funcs->blocks->lines->label_num);
/* flags */
fputs (integer_asm_op (2, false), asm_out_file);
fprint_whex (asm_out_file, 0);
putc ('\n', asm_out_file);
first_label_num = funcs->blocks->lines->label_num;
/* length */
fputs (integer_asm_op (4, false), asm_out_file);
asm_fprintf (asm_out_file,
"%L" END_FUNC_LABEL "%u - %L" LINE_LABEL "%u\n",
funcs->end_label, first_label_num);
while (funcs->blocks)
{
codeview_line_block *next = funcs->blocks->next;
/* Next comes the blocks, each block being a part of a function
within the same source file (struct cv_lines_block in binutils or
CV_DebugSLinesFileBlockHeader_t in Microsoft's cvinfo.h):
struct cv_lines_block
{
uint32_t file_id;
uint32_t num_lines;
uint32_t length;
};
*/
/* file ID */
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, funcs->blocks->file_id);
putc ('\n', asm_out_file);
/* number of lines */
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, funcs->blocks->num_lines);
putc ('\n', asm_out_file);
/* length of code block: (num_lines * sizeof (struct cv_line)) +
sizeof (struct cv_lines_block) */
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, (funcs->blocks->num_lines * 0x8) + 0xc);
putc ('\n', asm_out_file);
while (funcs->blocks->lines)
{
codeview_line *next = funcs->blocks->lines->next;
/* Finally comes the line number information (struct cv_line in
binutils or CV_Line_t in Microsoft's cvinfo.h):
struct cv_line
{
uint32_t offset;
uint32_t line_no;
};
Strictly speaking line_no is a bitfield: the bottom 24 bits
are the line number, and the top bit means "is a statement".
*/
fputs (integer_asm_op (4, false), asm_out_file);
asm_fprintf (asm_out_file,
"%L" LINE_LABEL "%u - %L" LINE_LABEL "%u\n",
funcs->blocks->lines->label_num, first_label_num);
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file,
0x80000000
| (funcs->blocks->lines->line_no & 0xffffff));
putc ('\n', asm_out_file);
free (funcs->blocks->lines);
funcs->blocks->lines = next;
}
free (funcs->blocks);
funcs->blocks = next;
}
free (funcs);
asm_fprintf (asm_out_file, "%LLcv_lines%u_end:\n", func_num);
func_num++;
funcs = next;
}
}
/* Treat cold sections as separate functions, for the purposes of line
numbers. */
void
codeview_switch_text_section (void)
{
codeview_function *f;
if (last_func && last_func->end_label == 0)
{
unsigned int label_num = ++func_label_num;
targetm.asm_out.internal_label (asm_out_file, END_FUNC_LABEL,
label_num);
last_func->end_label = label_num;
}
f = (codeview_function *) xmalloc (sizeof (codeview_function));
f->next = NULL;
f->func = cfun;
f->end_label = 0;
f->blocks = f->last_block = NULL;
if (!funcs)
funcs = f;
else
last_func->next = f;
last_func = f;
}
/* Mark the end of the current function. */
void
codeview_end_epilogue (void)
{
if (last_func && last_func->end_label == 0)
{
unsigned int label_num = ++func_label_num;
targetm.asm_out.internal_label (asm_out_file, END_FUNC_LABEL,
label_num);
last_func->end_label = label_num;
}
}
/* Return the CodeView constant for the selected architecture. */
static uint16_t
target_processor (void)
{
if (TARGET_64BIT)
return CV_CFL_X64;
else
return CV_CFL_80386;
}
/* Return the CodeView constant for the language being used. */
static uint32_t
language_constant (void)
{
const char *language_string = lang_hooks.name;
if (startswith (language_string, "GNU C++"))
return CV_CFL_CXX;
else if (startswith (language_string, "GNU C"))
return CV_CFL_C;
return 0;
}
/* Write a S_COMPILE3 symbol, which records the details of the compiler
being used. */
static void
write_compile3_symbol (void)
{
unsigned int label_num = ++sym_label_num;
static const char compiler_name[] = "GCC ";
/* This is struct COMPILESYM3 in binutils and Microsoft's cvinfo.h:
struct COMPILESYM3
{
uint16_t length;
uint16_t type;
uint32_t flags;
uint16_t machine;
uint16_t frontend_major;
uint16_t frontend_minor;
uint16_t frontend_build;
uint16_t frontend_qfe;
uint16_t backend_major;
uint16_t backend_minor;
uint16_t backend_build;
uint16_t backend_qfe;
} ATTRIBUTE_PACKED;
*/
fputs (integer_asm_op (2, false), asm_out_file);
asm_fprintf (asm_out_file,
"%L" SYMBOL_END_LABEL "%u - %L" SYMBOL_START_LABEL "%u\n",
label_num, label_num);
targetm.asm_out.internal_label (asm_out_file, SYMBOL_START_LABEL, label_num);
fputs (integer_asm_op (2, false), asm_out_file);
fprint_whex (asm_out_file, S_COMPILE3);
putc ('\n', asm_out_file);
/* Microsoft has the flags as a bitfield, with the bottom 8 bits being the
language constant, and the reset being MSVC-specific stuff. */
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, language_constant ());
putc ('\n', asm_out_file);
fputs (integer_asm_op (2, false), asm_out_file);
fprint_whex (asm_out_file, target_processor ());
putc ('\n', asm_out_file);
/* Write 8 uint16_ts for the frontend and backend versions. As with GAS, we
zero these, as it's easier to record the version in the compiler
string. */
for (unsigned int i = 0; i < 8; i++)
{
fputs (integer_asm_op (2, false), asm_out_file);
fprint_whex (asm_out_file, 0);
putc ('\n', asm_out_file);
}
ASM_OUTPUT_ASCII (asm_out_file, compiler_name, sizeof (compiler_name) - 1);
ASM_OUTPUT_ASCII (asm_out_file, version_string, strlen (version_string) + 1);
ASM_OUTPUT_ALIGN (asm_out_file, 2);
targetm.asm_out.internal_label (asm_out_file, SYMBOL_END_LABEL, label_num);
}
/* Write the CodeView symbols into the .debug$S section. */
static void
write_codeview_symbols (void)
{
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, DEBUG_S_SYMBOLS);
putc ('\n', asm_out_file);
fputs (integer_asm_op (4, false), asm_out_file);
asm_fprintf (asm_out_file, "%LLcv_syms_end - %LLcv_syms_start\n");
asm_fprintf (asm_out_file, "%LLcv_syms_start:\n");
write_compile3_symbol ();
asm_fprintf (asm_out_file, "%LLcv_syms_end:\n");
}
/* Finish CodeView debug info emission. */
void
codeview_debug_finish (void)
{
targetm.asm_out.named_section (".debug$S", SECTION_DEBUG, NULL);
fputs (integer_asm_op (4, false), asm_out_file);
fprint_whex (asm_out_file, CV_SIGNATURE_C13);
putc ('\n', asm_out_file);
write_strings_table ();
write_source_files ();
write_line_numbers ();
write_codeview_symbols ();
}
#endif