From aff250145af6c7a8ea9332bc1306c1219f4a63db Mon Sep 17 00:00:00 2001
From: Andrew Burgess <aburgess@redhat.com>
Date: Fri, 24 Nov 2023 12:04:36 +0000
Subject: gdb: generate gdb-index identically regardless of work thread count

It was observed that changing the number of worker threads that GDB
uses (maintenance set worker-threads NUM) would have an impact on the
layout of the generated gdb-index.

The cause seems to be how the CU are distributed between threads, and
then symbols that appear in multiple CU can be encountered earlier or
later depending on whether a particular CU moves between threads.

I certainly found this behaviour was reproducible when generating an
index for GDB itself, like:

  gdb -q -nx -nh -batch \
      -eiex 'maint set worker-threads NUM' \
      -ex 'save gdb-index /tmp/'

And then setting different values for NUM will change the generated
index.

Now, the question is: does this matter?

I would like to suggest that yes, this does matter.  At Red Hat we
generate a gdb-index as part of the build process, and we would
ideally like to have reproducible builds: for the same source,
compiled with the same tool-chain, we should get the exact same output
binary.  And we do .... except for the index.

Now we could simply force GDB to only use a single worker thread when
we build the index, but, I don't think the idea of reproducible builds
is that strange, so I think we should ensure that our generated
indexes are always reproducible.

To achieve this, I propose that we add an extra step when building the
gdb-index file.  After constructing the initial symbol hash table
contents, we will pull all the symbols out of the hash, sort them,
then re-insert them in sorted order.  This will ensure that the
structure of the generated hash will remain consistent (given the same
set of symbols).

I've extended the existing index-file test to check that the generated
index doesn't change if we adjust the number of worker threads used.
Given that this test is already rather slow, I've only made one change
to the worker-thread count.  Maybe this test should be changed to use
a smaller binary, which is quicker to load, and for which we could
then try many different worker thread counts.

Approved-By: Tom Tromey <tom@tromey.com>
---
 gdb/testsuite/gdb.gdb/index-file.exp | 41 ++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'gdb/testsuite/gdb.gdb')

diff --git a/gdb/testsuite/gdb.gdb/index-file.exp b/gdb/testsuite/gdb.gdb/index-file.exp
index c6edd28..0841592 100644
--- a/gdb/testsuite/gdb.gdb/index-file.exp
+++ b/gdb/testsuite/gdb.gdb/index-file.exp
@@ -35,6 +35,9 @@ with_timeout_factor $timeout_factor {
     clean_restart $filename
 }
 
+# Record how many worker threads GDB is using.
+set worker_threads [gdb_get_worker_threads]
+
 # Generate an index file.
 set dir1 [standard_output_file "index_1"]
 remote_exec host "mkdir -p ${dir1}"
@@ -113,3 +116,41 @@ proc check_symbol_table_usage { filename } {
 
 set index_filename_base [file tail $filename]
 check_symbol_table_usage "$dir1/${index_filename_base}.gdb-index"
+
+# If GDB is using more than 1 worker thread then reduce the number of
+# worker threads, regenerate the index, and check that we get the same
+# index file back.  At one point the layout of the index would vary
+# based on the number of worker threads used.
+if { $worker_threads > 1 } {
+    # Start GDB, but don't load a file yet.
+    clean_restart
+
+    # Adjust the number of threads to use.
+    set reduced_threads [expr $worker_threads / 2]
+    gdb_test_no_output "maint set worker-threads $reduced_threads"
+
+    with_timeout_factor $timeout_factor {
+	# Now load the test binary.
+	gdb_file_cmd $filename
+    }
+
+    # Generate an index file.
+    set dir2 [standard_output_file "index_2"]
+    remote_exec host "mkdir -p ${dir2}"
+    with_timeout_factor $timeout_factor {
+	gdb_test_no_output "save gdb-index $dir2" \
+	    "create second gdb-index file"
+    }
+
+    # Close GDB.
+    gdb_exit
+
+    # Now check that the index files are identical.
+    foreach suffix { gdb-index  } {
+	set result \
+	    [remote_exec host \
+		 "cmp -s \"$dir1/${index_filename_base}.${suffix}\" \"$dir2/${index_filename_base}.${suffix}\""]
+	gdb_assert { [lindex $result 0] == 0 } \
+	    "$suffix files are identical"
+    }
+}
-- 
cgit v1.1