15 files changed, 6291 insertions, 3761 deletions
diff --git a/gprofng/doc/Makefile.am b/gprofng/doc/Makefile.am
index 8be0e92..ad72c26 100644
--- a/gprofng/doc/Makefile.am
+++ b/gprofng/doc/Makefile.am
@@ -19,6 +19,9 @@
 
 AUTOMAKE_OPTIONS = info-in-builddir foreign no-texinfo.tex
 
+PDFS  = gprofng.pdf
+HTMLS = gprofng.html
+
 # Options to extract the man page
 MANCONF = -Dman
 
@@ -26,24 +29,31 @@ TEXI2POD = perl $(srcdir)/../../etc/texi2pod.pl $(AM_MAKEINFOFLAGS)
 POD2MAN = pod2man --center="User Commands" \
 	--release="binutils-$(VERSION)" --section=1
 
-info_TEXINFOS       = gprofng.texi
-gprofng_TEXINFOS    = fdl.texi
+info_TEXINFOS       = gprofng_ug.texi
+gprofng_ug_TEXINFOS = fdl.texi gp-macros.texi
 TEXINFO_TEX         = .
 MAKEINFOHTML        = $(MAKEINFO) --html --no-split
 
-man_MANS = gprofng.1
+man_MANS = gprofng.1 gp-archive.1 gp-collect-app.1 gp-display-html.1 gp-display-src.1 gp-display-text.1
 
 # Build the man page from the texinfo file
 # The sed command removes the no-adjust Nroff command so that
 # the man output looks standard.
-gprofng.1: $(srcdir)/gprofng.texi
+$(man_MANS): $(srcdir)/gp-macros.texi
 	$(AM_V_GEN)touch $@
-	$(AM_V_at)-$(TEXI2POD) $(MANCONF) < $(srcdir)/gprofng.texi > gprofng.pod
-	$(AM_V_at)-($(POD2MAN) gprofng.pod | \
-	  sed -e '/^.if n .na/d' > $@.tmp && \
+	$(AM_V_at)-$(TEXI2POD) $(MANCONF) < $(srcdir)/`basename $@ .1`.texi > $@.pod
+	$(AM_V_at)-($(POD2MAN) $@.pod | sed -e '/^.if n .na/d' > $@.tmp && \
 	  mv -f $@.tmp $@) || (rm -f $@.tmp && exit 1)
-	$(AM_V_at)rm -f gprofng.pod
+	$(AM_V_at)rm -f $@.pod
+
+gprofng.1: $(srcdir)/gprofng.texi
+gp-archive.1: $(srcdir)/gp-archive.texi
+gp-collect-app.1: $(srcdir)/gp-collect-app.texi
+gp-display-html.1: $(srcdir)/gp-display-html.texi
+gp-display-src.1: $(srcdir)/gp-display-src.texi
+gp-display-text.1: $(srcdir)/gp-display-text.texi
 
 MAINTAINERCLEANFILES = gprofng.info $(man_MANS)
+EXTRA_DIST = $(man_MANS) version.texi
 
 info: $(man_MANS)
diff --git a/gprofng/doc/Makefile.in b/gprofng/doc/Makefile.in
index 3cd2068..78f8ae1 100644
--- a/gprofng/doc/Makefile.in
+++ b/gprofng/doc/Makefile.in
@@ -168,11 +168,9 @@ am__v_texidevnull_0 = > /dev/null
 am__v_texidevnull_1 = 
 INFO_DEPS = gprofng.info
 am__TEXINFO_TEX_DIR = $(srcdir)/.
-DVIS = gprofng.dvi
-PDFS = gprofng.pdf
-PSS = gprofng.ps
-HTMLS = gprofng.html
-TEXINFOS = gprofng.texi
+DVIS = gprofng_ug.dvi
+PSS = gprofng_ug.ps
+TEXINFOS = gprofng_ug.texi
 TEXI2DVI = texi2dvi
 TEXI2PDF = $(TEXI2DVI) --pdf --batch
 AM_MAKEINFOHTMLFLAGS = $(AM_MAKEINFOFLAGS)
@@ -214,7 +212,7 @@ man1dir = $(mandir)/man1
 NROFF = nroff
 MANS = $(man_MANS)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
-am__DIST_COMMON = $(gprofng_TEXINFOS) $(srcdir)/Makefile.in \
+am__DIST_COMMON = $(gprofng_ug_TEXINFOS) $(srcdir)/Makefile.in \
 	$(top_srcdir)/../mkinstalldirs mdate-sh texinfo.tex
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 ACLOCAL = @ACLOCAL@
@@ -364,6 +362,8 @@ top_srcdir = @top_srcdir@
 zlibdir = @zlibdir@
 zlibinc = @zlibinc@
 AUTOMAKE_OPTIONS = info-in-builddir foreign no-texinfo.tex
+PDFS = gprofng.pdf
+HTMLS = gprofng.html
 
 # Options to extract the man page
 MANCONF = -Dman
@@ -371,12 +371,13 @@ TEXI2POD = perl $(srcdir)/../../etc/texi2pod.pl $(AM_MAKEINFOFLAGS)
 POD2MAN = pod2man --center="User Commands" \
 	--release="binutils-$(VERSION)" --section=1
 
-info_TEXINFOS = gprofng.texi
-gprofng_TEXINFOS = fdl.texi
+info_TEXINFOS = gprofng_ug.texi
+gprofng_ug_TEXINFOS = fdl.texi gp-macros.texi
 TEXINFO_TEX = .
 MAKEINFOHTML = $(MAKEINFO) --html --no-split
-man_MANS = gprofng.1
+man_MANS = gprofng.1 gp-archive.1 gp-collect-app.1 gp-display-html.1 gp-display-src.1 gp-display-text.1
 MAINTAINERCLEANFILES = gprofng.info $(man_MANS)
+EXTRA_DIST = $(man_MANS) version.texi
 all: all-am
 
 .SUFFIXES:
@@ -417,7 +418,7 @@ mostlyclean-libtool:
 clean-libtool:
 	-rm -rf .libs _libs
 
-gprofng.info: gprofng.texi $(srcdir)/version.texi $(gprofng_TEXINFOS)
+gprofng.info: gprofng_ug.texi $(srcdir)/version.texi $(gprofng_ug_TEXINFOS)
 	$(AM_V_MAKEINFO)restore=: && backupdir="$(am__leading_dot)am$$$$" && \
 	rm -rf $$backupdir && mkdir $$backupdir && \
 	if ($(MAKEINFO) --version) >/dev/null 2>&1; then \
@@ -426,7 +427,7 @@ gprofng.info: gprofng.texi $(srcdir)/version.texi $(gprofng_TEXINFOS)
 	  done; \
 	else :; fi && \
 	if $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \
-	 -o $@ `test -f 'gprofng.texi' || echo '$(srcdir)/'`gprofng.texi; \
+	 -o $@ `test -f 'gprofng_ug.texi' || echo '$(srcdir)/'`gprofng_ug.texi; \
 	then \
 	  rc=0; \
 	else \
@@ -435,31 +436,31 @@ gprofng.info: gprofng.texi $(srcdir)/version.texi $(gprofng_TEXINFOS)
 	fi; \
 	rm -rf $$backupdir; exit $$rc
 
-gprofng.dvi: gprofng.texi $(srcdir)/version.texi $(gprofng_TEXINFOS) 
+gprofng.dvi: gprofng_ug.texi $(srcdir)/version.texi $(gprofng_ug_TEXINFOS) 
 	$(AM_V_TEXI2DVI)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \
 	MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \
 	$(TEXI2DVI) $(AM_V_texinfo) --build-dir=$(@:.dvi=.t2d) -o $@ $(AM_V_texidevnull) \
-	`test -f 'gprofng.texi' || echo '$(srcdir)/'`gprofng.texi
+	`test -f 'gprofng_ug.texi' || echo '$(srcdir)/'`gprofng_ug.texi
 
-gprofng.pdf: gprofng.texi $(srcdir)/version.texi $(gprofng_TEXINFOS) 
+gprofng.pdf: gprofng_ug.texi $(srcdir)/version.texi $(gprofng_ug_TEXINFOS) 
 	$(AM_V_TEXI2PDF)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \
 	MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \
 	$(TEXI2PDF) $(AM_V_texinfo) --build-dir=$(@:.pdf=.t2p) -o $@ $(AM_V_texidevnull) \
-	`test -f 'gprofng.texi' || echo '$(srcdir)/'`gprofng.texi
+	`test -f 'gprofng_ug.texi' || echo '$(srcdir)/'`gprofng_ug.texi
 
-gprofng.html: gprofng.texi $(srcdir)/version.texi $(gprofng_TEXINFOS) 
+gprofng.html: gprofng_ug.texi $(srcdir)/version.texi $(gprofng_ug_TEXINFOS) 
 	$(AM_V_MAKEINFO)rm -rf $(@:.html=.htp)
 	$(AM_V_at)if $(MAKEINFOHTML) $(AM_MAKEINFOHTMLFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \
-	 -o $(@:.html=.htp) `test -f 'gprofng.texi' || echo '$(srcdir)/'`gprofng.texi; \
+	 -o $(@:.html=.htp) `test -f 'gprofng_ug.texi' || echo '$(srcdir)/'`gprofng_ug.texi; \
 	then \
 	  rm -rf $@ && mv $(@:.html=.htp) $@; \
 	else \
 	  rm -rf $(@:.html=.htp); exit 1; \
 	fi
 $(srcdir)/version.texi: @MAINTAINER_MODE_TRUE@ $(srcdir)/stamp-vti
-$(srcdir)/stamp-vti: gprofng.texi $(top_srcdir)/configure
-	@(dir=.; test -f ./gprofng.texi || dir=$(srcdir); \
-	set `$(SHELL) $(srcdir)/mdate-sh $$dir/gprofng.texi`; \
+$(srcdir)/stamp-vti: gprofng_ug.texi $(top_srcdir)/configure
+	@(dir=.; test -f ./gprofng_ug.texi || dir=$(srcdir); \
+	set `$(SHELL) $(srcdir)/mdate-sh $$dir/gprofng_ug.texi`; \
 	echo "@set UPDATED $$1 $$2 $$3"; \
 	echo "@set UPDATED-MONTH $$2 $$3"; \
 	echo "@set EDITION $(VERSION)"; \
@@ -557,7 +558,7 @@ dist-info: $(INFO_DEPS)
 	done
 
 mostlyclean-aminfo:
-	-rm -rf gprofng.t2d gprofng.t2p
+	-rm -rf gprofng_ug.t2d gprofng_ug.t2p
 
 clean-aminfo:
 	-test -z "gprofng.dvi gprofng.pdf gprofng.ps gprofng.html" \
@@ -874,13 +875,19 @@ uninstall-man: uninstall-man1
 # Build the man page from the texinfo file
 # The sed command removes the no-adjust Nroff command so that
 # the man output looks standard.
-gprofng.1: $(srcdir)/gprofng.texi
+$(man_MANS): $(srcdir)/gp-macros.texi
 	$(AM_V_GEN)touch $@
-	$(AM_V_at)-$(TEXI2POD) $(MANCONF) < $(srcdir)/gprofng.texi > gprofng.pod
-	$(AM_V_at)-($(POD2MAN) gprofng.pod | \
-	  sed -e '/^.if n .na/d' > $@.tmp && \
+	$(AM_V_at)-$(TEXI2POD) $(MANCONF) < $(srcdir)/`basename $@ .1`.texi > $@.pod
+	$(AM_V_at)-($(POD2MAN) $@.pod | sed -e '/^.if n .na/d' > $@.tmp && \
 	  mv -f $@.tmp $@) || (rm -f $@.tmp && exit 1)
-	$(AM_V_at)rm -f gprofng.pod
+	$(AM_V_at)rm -f $@.pod
+
+gprofng.1: $(srcdir)/gprofng.texi
+gp-archive.1: $(srcdir)/gp-archive.texi
+gp-collect-app.1: $(srcdir)/gp-collect-app.texi
+gp-display-html.1: $(srcdir)/gp-display-html.texi
+gp-display-src.1: $(srcdir)/gp-display-src.texi
+gp-display-text.1: $(srcdir)/gp-display-text.texi
 
 info: $(man_MANS)
 
diff --git a/gprofng/doc/gp-archive.texi b/gprofng/doc/gp-archive.texi
new file mode 100644
index 0000000..722a954
--- /dev/null
+++ b/gprofng/doc/gp-archive.texi
@@ -0,0 +1,246 @@
+@c ----------------------------------------------------------------------------
+@c This is the Texinfo source file for the gp-collect-app man page.
+@c
+@c Author: Ruud van der Pas
+@c ----------------------------------------------------------------------------
+@ifset man
+\input texinfo @c -*-texinfo-*-
+@setfilename gprofng archive
+@settitle Archive gprofng experiment data
+@include gp-macros.texi
+@end ifset
+
+@c ----------------------------------------------------------------------------
+@c This is from the man-pages(7) man page
+@c
+@c "The list below shows conventional or suggested sections.  Most manual pages
+@c  should include at least the highlighted sections.  Arrange a new manual
+@c  page so that sections are placed in the order shown in the list."
+@c
+@c              NAME
+@c              SYNOPSIS
+@c              CONFIGURATION    [Normally only in Section 4]
+@c              DESCRIPTION
+@c              OPTIONS          [Normally only in Sections 1, 8]
+@c              EXIT STATUS      [Normally only in Sections 1, 8]
+@c              RETURN VALUE     [Normally only in Sections 2, 3]
+@c              ERRORS           [Typically only in Sections 2, 3]
+@c              ENVIRONMENT
+@c              FILES
+@c              VERSIONS         [Normally only in Sections 2, 3]
+@c              ATTRIBUTES       [Normally only in Sections 2, 3]
+@c              CONFORMING TO
+@c              NOTES
+@c              BUGS
+@c              EXAMPLES
+@c              AUTHORS          [Discouraged]
+@c              REPORTING BUGS   [Not used in man-pages]
+@c              COPYRIGHT        [Not used in man-pages]
+@c              SEE ALSO
+@c
+@c This is what the texi2pod.pl tool recognizes:
+@c
+@c for $sect (qw(NAME SYNOPSIS TARGET DESCRIPTION OPTIONS ENVIRONMENT FILES
+@c               BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
+@c
+@c What is interesting is that it places "SEE ALSO" before "COPYRIGHT", which
+@c makes sense and adhered to for the other formats.
+@c ----------------------------------------------------------------------------
+
+@c ----------------------------------------------------------------------------
+@c NAME section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NAME}
+@c man begin NAME
+
+gprofng archive - Archive gprofng experiment data
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SYNOPSIS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SYNOPSIS}
+@c man begin SYNOPSIS
+
+@command{gprofng archive} [@var{option(s)}] @var{experiment}
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c DESCRIPTION section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{DESCRIPTION}
+@c man begin DESCRIPTION
+
+Archive the associated application binaries and source files in a gprofng
+experiment to make it self contained and portable.
+
+By default, the binaries are archived, but the application source files
+are not archived.  Use this tool to change this and afterwards archive
+additional components.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c OPTIONS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{OPTIONS}
+@c man begin OPTIONS
+
+@table @gcctabopt
+
+@item --version
+@ifclear man
+@IndexSubentry{Options, @code{--version}}
+@end ifclear
+
+Print the version number and exit.
+
+@item --help
+@ifclear man
+@IndexSubentry{Options, @code{--help}}
+@end ifclear
+
+Print usage information and exit.
+
+@c -- @item --verbose @{on|off@}
+@c -- @ifclear man
+@c -- @IndexSubentry{Options, @code{--verbose}}
+@c -- @end ifclear
+
+@c -- Enable (on) or disable (off) verbose mode; the default is @samp{off}.
+
+@item -a @{off|on|ldobjects|src|usedldobjects|usedsrc@}
+@ifclear man
+@IndexSubentry{Options, @code{-a}}
+@end ifclear
+
+Specify archiving of binaries and other files.  In addition to disable this
+feature (off), or enable archiving off all loadobjects and sources (on),
+the  other  op tions support a more refined selection.
+
+All of these options enable archiving, but the keyword controls what exactly
+is selected: all load objects (ldobjects), all source files (src), the
+loadobjects asscoiated with a program counter (usedldobjects), or the source
+files associated with a program counter (usedsrc).
+The default is @samp{-a ldobjects}.
+
+@item -n
+@ifclear man
+@IndexSubentry{Options, @code{-n}}
+@end ifclear
+
+Archive the named experiment only, not any of its descendants.
+
+@item -m @var{regex}
+@ifclear man
+@IndexSubentry{Options, @code{-m}}
+@end ifclear
+
+Archive only those source, object, and debug info files whose full path name
+matches the given POSIX compliant @var{regex} regular expression.
+
+@item -q
+@ifclear man
+@IndexSubentry{Options, @code{-q}}
+@end ifclear
+
+Do not write any warnings to stderr.  Warnings are incorporated into the
+.archive file in the experiment directory.  They are shown in the output 
+of @command{gprofng display text}.
+
+@item -F
+@ifclear man
+@IndexSubentry{Options, @code{-F}}
+@end ifclear
+
+Force writing or rewriting of the archive.  This is ignored with the 
+@samp{-n} or @samp{-m} option, or if this is a subexperiment.
+
+@item -d @var{path}
+@ifclear man
+@IndexSubentry{Options, @code{-d}}
+@end ifclear
+
+The @var{path} is the absolute path path to a common archive, which is a
+directory that contains archived files.  If the directory does not
+exist, then it will be created.  Files are saved in the common archive
+directory, and a symbolic link is created in the experiment archive.
+
+@end table
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c NOTES section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NOTES}
+@c man begin NOTES
+
+Default archiving does not occur in case the application profiled terminates
+prematurely, or if archiving is disabled when collecting the performance data.
+In such cases, this tool can be used to afterwards archive the information,
+but it has to be run on the same system where the profiling data was recorded.
+
+Some Java applications store shared objects in jar files.  By default, such
+shared objects are not automatically archived.  To archive shared objects
+contained in jar files, the addpath directive in an .er.rc file. The addpath
+directive should give the path to the jar file, including the jar file itself.
+The .er.rc file should be saved in the user home directory or parent of the
+experiment directory.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SEEALSO section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SEEALSO}
+@c man begin SEEALSO
+
+gprofng(1), gp-collect-app(1), gp-display-html(1), gp-display-src(1), gp-display-text(1)
+
+The user guide for gprofng is maintained as a Texinfo manual.  If the info
+and gprofng programs are correctly installed, the command
+@command{info gprofng} should give access to this document.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c COPYRIGHT section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{COPYRIGHT}
+@c man begin COPYRIGHT
+
+Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3
+or any later version published by the Free Software Foundation;
+with no Invariant Sections, with no Front-Cover Texts, and with no
+Back-Cover Texts.  A copy of the license is included in the
+section entitled ``GNU Free Documentation License''.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c If this text is used for a man page, exit.  Otherwise we need to continue.
+@c ----------------------------------------------------------------------------
+
+@ifset man
+@bye
+@end ifset
diff --git a/gprofng/doc/gp-collect-app.texi b/gprofng/doc/gp-collect-app.texi
new file mode 100644
index 0000000..7e81f85
--- /dev/null
+++ b/gprofng/doc/gp-collect-app.texi
@@ -0,0 +1,380 @@
+@c ----------------------------------------------------------------------------
+@c This is the Texinfo source file for the gp-collect-app man page.
+@c
+@c Author: Ruud van der Pas
+@c ----------------------------------------------------------------------------
+@ifset man
+\input texinfo @c -*-texinfo-*-
+@setfilename gprofng collect app
+@settitle Collect performance data for the target application
+@include gp-macros.texi
+@end ifset
+
+@c ----------------------------------------------------------------------------
+@c This is from the man-pages(7) man page
+@c
+@c "The list below shows conventional or suggested sections.  Most manual pages
+@c  should include at least the highlighted sections.  Arrange a new manual
+@c  page so that sections are placed in the order shown in the list."
+@c
+@c              NAME
+@c              SYNOPSIS
+@c              CONFIGURATION    [Normally only in Section 4]
+@c              DESCRIPTION
+@c              OPTIONS          [Normally only in Sections 1, 8]
+@c              EXIT STATUS      [Normally only in Sections 1, 8]
+@c              RETURN VALUE     [Normally only in Sections 2, 3]
+@c              ERRORS           [Typically only in Sections 2, 3]
+@c              ENVIRONMENT
+@c              FILES
+@c              VERSIONS         [Normally only in Sections 2, 3]
+@c              ATTRIBUTES       [Normally only in Sections 2, 3]
+@c              CONFORMING TO
+@c              NOTES
+@c              BUGS
+@c              EXAMPLES
+@c              AUTHORS          [Discouraged]
+@c              REPORTING BUGS   [Not used in man-pages]
+@c              COPYRIGHT        [Not used in man-pages]
+@c              SEE ALSO
+@c
+@c This is what the texi2pod.pl tool recognizes:
+@c
+@c for $sect (qw(NAME SYNOPSIS TARGET DESCRIPTION OPTIONS ENVIRONMENT FILES
+@c               BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
+@c
+@c What is interesting is that it places "SEE ALSO" before "COPYRIGHT", which
+@c makes sense and adhered to for the other formats.
+@c ----------------------------------------------------------------------------
+
+@c ----------------------------------------------------------------------------
+@c NAME section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NAME}
+@c man begin NAME
+
+gprofng collect app - Collect performance data for the target program
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SYNOPSIS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SYNOPSIS}
+@c man begin SYNOPSIS
+
+@command{gprofng collect app} [@var{option(s)}] @var{target} [@var{option(s)}]
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c DESCRIPTION section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{DESCRIPTION}
+@c man begin DESCRIPTION
+
+Collect performance data on the target program.  In addition to Program Counter
+(PC) sampling, hardware event counters and various tracing options are supported.
+
+For example, this command collects performance data for an executable called
+@samp{a.out} and stores the data collected in an experiment directory with
+the name @samp{example.er}.
+
+@smallexample
+$ gprofng collect app -o example.er ./a.out
+@end smallexample
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c OPTIONS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{OPTIONS}
+@c man begin OPTIONS
+
+@table @gcctabopt
+
+@item --version
+@ifclear man
+@IndexSubentry{Options, @code{--version}}
+@end ifclear
+
+Print the version number and exit.
+
+@item --help
+@ifclear man
+@IndexSubentry{Options, @code{--help}}
+@end ifclear
+
+Print usage information and exit.
+
+@c -- @item --verbose @{on|off@}
+@c -- @ifclear man
+@c -- @IndexSubentry{Options, @code{--verbose}}
+@c -- @end ifclear
+
+@c -- Enable (on) or disable (off) verbose mode; the default is @samp{off}.
+
+@item -p @{off|on|lo|hi|@var{<value>}@}
+@ifclear man
+@IndexSubentry{Options, @code{-p}}
+@end ifclear
+
+Disable (off) or enable (on) clock-profiling using a default sampling
+granularity, or enable clock-profiling implicitly by setting the sampling
+granularity (lo, hi, or a specific value in ms). By default, clock profiling
+is enabled (@samp{-p on}).
+
+@item -h @var{@{<ctr_def>...,<ctr_n_def>@}}
+@ifclear man
+@IndexSubentry{Options, @code{-h}}
+@end ifclear
+Enable hardware event counter profiling and select the counter(s).
+To see the supported counters on this system, use the @samp{-h} option
+without other arguments.
+
+@item -o @var{<exp_name>}
+@ifclear man
+@IndexSubentry{Options, @code{-o}}
+@end ifclear
+
+Specify the name for the experiment directory.  The name has to end with
+@samp{.er} and may contain an absolute path (e.g. @file{/tmp/experiment.er}).
+
+@item -O @var{<exp_name>}
+@ifclear man
+@IndexSubentry{Options, @code{-O}}
+@end ifclear
+
+This is the same as the @samp{-o} option, but unlike this option, silently
+overwrites an existing experiment directory with the same name.
+
+@item -C @var{<comment_string>}
+@ifclear man
+@IndexSubentry{Options, @code{-C}}
+@end ifclear
+
+Add up to 10 comment strings to the experiment.  These comments appear in the
+notes section of the header and can be retrieved with the
+@command{gprofng display text} command using the @samp{-header} option.
+
+@item -j @{on|off|@var{<path>}@}
+@ifclear man
+@IndexSubentry{Options, @code{-j}}
+@end ifclear
+
+Controls Java profiling when the target is a JVM machine. The allowed values of
+this option are: enable (on), disable (off) Java profiling when the target
+program is a JVM, or set @samp{<path>} to a non-default JVM.
+The default is @samp{-j on}
+
+@table @gcctabopt
+
+@item on
+Record profiling data for the JVM machine, and recognize methods compiled by
+the Java HotSpot virtual machine.  Also record Java call stacks.  The default
+is @samp{-j on}.
+
+@item off
+Does not record Java profiling data.  Profiling data for native call stacks is
+still recorded.
+
+@item @var{<path>}
+Records profiling data for the JVM, and use the JVM as installed in @var{<path>}.
+
+@end table
+
+@item -J @var{<jvm-options>}
+@ifclear man
+@IndexSubentry{Options, @code{-J}}
+@end ifclear
+
+Specifies additional options to be passed to the JVM used.  The
+@var{jvm-options} list must be enclosed in quotation marks if it contains more
+than one option. The items in the list need to be separated by spaces or tab.
+Each item is passed as a separate option to the JVM.  Note that this option
+implies @samp{-j on}.
+
+@item -t @var{<duration>}[m|s]
+@ifclear man
+@IndexSubentry{Options, @code{-t}}
+@end ifclear
+
+Collects data for the specified duration.  The duration can be a single number,
+optionally followed by either @samp{m} to specify minutes, or @samp{s} to
+specify seconds, which is the default.
+
+The duration can also two numbers separated by minus (-) sign.  If a single
+number is given, data is collected from the start of the run until the given
+time. If two numbers are given, data is collected from the first time to the
+second.  If the second time is zero, data is collected until the end of the
+run. If two non-zero numbers are given, the first must be less than the second.
+
+@item -n
+@ifclear man
+@IndexSubentry{Options, @code{-n}}
+@end ifclear
+
+This is used for a dry run.  Several run-time settings are displayed, but the
+target is not executed and no performance data is collected.
+
+@item -F @{off|on|=@var{regex}@}
+@ifclear man
+@IndexSubentry{Options, @code{-F}}
+@end ifclear
+
+Control whether descendant processes should have their data recorded.
+To disable/enable this feature, use @samp{off}/@samp{on}.  Use
+@samp{=}@var{regex} to record data on those processes whose executable name
+matches the regular expression.  Only the basename of the executable is used,
+not the full path.  If spaces or characters interpreted by the shell are used,
+enclose the @var{regex} in single quotes.  The default is @samp{-F on}.
+
+@item -a @{off|on|ldobjects|src|usedldobjects|usedsrc@}
+@ifclear man
+@IndexSubentry{Options, @code{-a}}
+@end ifclear
+
+Specify archiving of binaries and other files.  In addition to disable this
+feature (off), or enable archiving off all loadobjects and sources (on),
+the  other  op tions support a more refined selection.
+
+All of these options enable archiving, but the keyword controls what exactly
+is selected: all load objects (ldobjects), all source files (src), the
+loadobjects asscoiated with a program counter (usedldobjects), or the source
+files associated with a program counter (usedsrc).
+The default is @samp{-a ldobjects}.
+
+@item -S @{off|on|@var{<seconds>}@}
+@ifclear man
+@IndexSubentry{Options, @code{-S}}
+@end ifclear
+
+Disable (off), or enable (on) periodic sampling of process-wide resource
+utilization. By default, sampling occurs every second. Use the @var{<seconds>}
+option to change this.  The default is @samp{-S on}.
+
+@item -y @var{<signal>}[,r]
+@ifclear man
+@IndexSubentry{Options, @code{-y}}
+@end ifclear
+
+Controls recording of data with the signal named @var{<signal>}, referred to
+as the pause-resume signal. Whenever the given signal is delivered to the
+process, switch between paused (no data is recorded) and resumed (data is
+recorded) states.
+
+By default, data collection begins in the paused state. If the optional
+@samp{r} is given, data collection begins in the resumed state and data
+collection begins immediately.
+
+SIGUSR1 or SIGUSR2 are recommended for this use, but any signal that is
+not used by the target can be used.
+
+@item -l @var{<signal>}
+@ifclear man
+@IndexSubentry{Options, @code{-l}}
+@end ifclear
+
+Specify a signal that will trigger a sample of process-wide resource utilization.
+When the named @var{<signal>} is delivered to the process, a sample is recorded.
+
+The signal can be specified using the full name, without the initial
+letters @code{SIG}, or the signal number.  Note that the @command{kill}
+command can be used to deliver a signal.
+
+If both the @samp{-l} and @samp{-y} options are used, the signal must be
+different.
+
+@item -s @var{<option>}[,@var{<API>}]
+@ifclear man
+@IndexSubentry{Options, @code{-s}}
+@end ifclear
+
+Enable synchronization wait tracing, where @var{<option>} is used to define the
+specifics of the tracing (on, off, @var{<threshold>}, or all).  The API is 
+selected through the setting for @var{<API>}: @samp{n} selects native/Pthreads,
+@samp{j} selects Java, and @samp{nj} selects both.  The default is @samp{-s off}.
+
+@item -H @{off|on@}
+@ifclear man
+@IndexSubentry{Options, @code{-H}}
+@end ifclear
+
+Disable (off), or enable (on) heap tracing.  The default is @samp{-H off}.
+
+@item -i @{off|on@}
+@ifclear man
+@IndexSubentry{Options, @code{-i}}
+@end ifclear
+
+Disable (off), or enable (on) I/O tracing.  The default is @samp{-i off}.
+
+@end table
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c NOTES section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NOTES}
+@c man begin NOTES
+
+Any executable in the ELF (Executable and Linkable Format) object format can
+be used for profiling with gprofng.  If debug information is available,
+gprofng can provide more details, but this is not a requirement.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SEEALSO section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SEEALSO}
+@c man begin SEEALSO
+
+gprofng(1), gp-archive(1), gp-display-html(1), gp-display-src(1), gp-display-text(1)
+
+The user guide for gprofng is maintained as a Texinfo manual.  If the
+@command{info} and @command{gprofng} programs are correctly installed, the
+command @command{info gprofng} should give access to this document.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c COPYRIGHT section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{COPYRIGHT}
+@c man begin COPYRIGHT
+
+Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3
+or any later version published by the Free Software Foundation;
+with no Invariant Sections, with no Front-Cover Texts, and with no
+Back-Cover Texts.  A copy of the license is included in the
+section entitled ``GNU Free Documentation License''.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c If this text is used for a man page, exit.  Otherwise we need to continue.
+@c ----------------------------------------------------------------------------
+
+@ifset man
+@bye
+@end ifset
diff --git a/gprofng/doc/gp-display-html.texi b/gprofng/doc/gp-display-html.texi
new file mode 100644
index 0000000..de09c34
--- /dev/null
+++ b/gprofng/doc/gp-display-html.texi
@@ -0,0 +1,252 @@
+@c ----------------------------------------------------------------------------
+@c This is the Texinfo source file for the gp-collect-app man page.
+@c
+@c Author: Ruud van der Pas
+@c ----------------------------------------------------------------------------
+@ifset man
+\input texinfo @c -*-texinfo-*-
+@setfilename gprofng display html
+@settitle Generate an HTML based directory structure to browse the profiles
+@include gp-macros.texi
+@end ifset
+
+@c ----------------------------------------------------------------------------
+@c This is from the man-pages(7) man page
+@c
+@c "The list below shows conventional or suggested sections.  Most manual pages
+@c  should include at least the highlighted sections.  Arrange a new manual
+@c  page so that sections are placed in the order shown in the list."
+@c
+@c              NAME
+@c              SYNOPSIS
+@c              CONFIGURATION    [Normally only in Section 4]
+@c              DESCRIPTION
+@c              OPTIONS          [Normally only in Sections 1, 8]
+@c              EXIT STATUS      [Normally only in Sections 1, 8]
+@c              RETURN VALUE     [Normally only in Sections 2, 3]
+@c              ERRORS           [Typically only in Sections 2, 3]
+@c              ENVIRONMENT
+@c              FILES
+@c              VERSIONS         [Normally only in Sections 2, 3]
+@c              ATTRIBUTES       [Normally only in Sections 2, 3]
+@c              CONFORMING TO
+@c              NOTES
+@c              BUGS
+@c              EXAMPLES
+@c              AUTHORS          [Discouraged]
+@c              REPORTING BUGS   [Not used in man-pages]
+@c              COPYRIGHT        [Not used in man-pages]
+@c              SEE ALSO
+@c
+@c This is what the texi2pod.pl tool recognizes:
+@c
+@c for $sect (qw(NAME SYNOPSIS TARGET DESCRIPTION OPTIONS ENVIRONMENT FILES
+@c               BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
+@c
+@c What is interesting is that it places "SEE ALSO" before "COPYRIGHT", which
+@c makes sense and adhered to for the other formats.
+@c ----------------------------------------------------------------------------
+
+@c ----------------------------------------------------------------------------
+@c NAME section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NAME}
+@c man begin NAME
+
+gprofng display html - Generate an HTML based directory structure to browse the profiles
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SYNOPSIS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SYNOPSIS}
+@c man begin SYNOPSIS
+
+@command{gprofng display html} [@var{option(s)}] @var{experiment(s)}
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c DESCRIPTION section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{DESCRIPTION}
+@c man begin DESCRIPTION
+
+Process one or more experiments to generate a directory containing the
+@file{index.html} file that may be used to browse the experiment data.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c OPTIONS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{OPTIONS}
+@c man begin OPTIONS
+
+@table @gcctabopt
+
+@item --version
+@ifclear man
+@IndexSubentry{Options, @code{--version}}
+@end ifclear
+
+Print the version number and exit.
+
+@item --help
+@ifclear man
+@IndexSubentry{Options, @code{--help}}
+@end ifclear
+
+Print usage information and exit.
+
+@item --verbose @{on|off@}
+@ifclear man
+@IndexSubentry{Options, @code{--verbose}}
+@end ifclear
+
+Enable (@samp{on}) or disable (@samp{off)} verbose mode.
+The default is @samp{off}.
+
+@item --debug @{on|s|m|l|xl|off@}
+@item -d      @{on|s|m|l|xl|off@}
+@ifclear man
+@IndexSubentry{Options, @code{-d}}
+@IndexSubentry{Options, @code{--debug}}
+@end ifclear
+
+Control the printing of run time information to assist with troubleshooting,
+or further development of this tool.  The keyword is case insensitive.
+A setting of @samp{on} gives a modest amount of information. The keywords
+@samp{s}, @samp{m}, @samp{l}, and @samp{xl} give an increasing amount of
+information, while @samp{off} disables the printing of debug information.
+This is also the default.
+
+Note that currently @samp{on}, @samp{s}, @samp{m}, and @samp{l} are
+equivalent.  This is expected to change in future updates.
+
+@item ---highlight-percentage @var{value}
+@item -hp @var{value}
+@ifclear man
+@IndexSubentry{Options, @code{--highlight-percentage}}
+@IndexSubentry{Options, @code{-hp}}
+@end ifclear
+
+Set a percentage value in the interval [0,100] to select and color code source
+lines, as well as instructions, that are within this percentage of the maximum
+metric value(s).  The default is 90 (%).
+
+A value of zero @samp{(-hp 0)} disables this feature.
+
+@item --output @var{dirname}
+@item -o       @var{dirname}
+@ifclear man
+@IndexSubentry{Options, @code{--output}}
+@IndexSubentry{Options, @code{-o}}
+@end ifclear
+
+Use @var{dirname} as the directory name to store the HTML files in.
+The default name is @samp{display.<n>.html} with @var{<n>} the first
+positive integer number not in use.  An existing directory with the
+same name is not overwritten.
+
+@item --overwrite @var{dirname}
+@item -O          @var{dirname}
+@ifclear man
+@IndexSubentry{Options, @code{--overwrite}}
+@IndexSubentry{Options, @code{-O}}
+@end ifclear
+
+Use @var{dirname} as the directory name to store the HTML files in.
+
+@item --quiet @{on|off@}
+@item -q      @{on|off@}
+@ifclear man
+@IndexSubentry{Options, @code{--quiet}}
+@IndexSubentry{Options, @code{-q}}
+@end ifclear
+
+Control the display of all warning, debug and verbose messages.
+If set to @samp{on}, the settings for verbose, warnings and debug are ignored.
+By default the quiet mode is disabled (@samp{-q off}).
+
+@item --warnings @{on|off@}
+@item -w         @{on|off@}
+@ifclear man
+@IndexSubentry{Options, @code{--warnings}}
+@IndexSubentry{Options, @code{-w}}
+@end ifclear
+
+Enable (@samp{on}), or disable (@samp{off}) run time warning messages from
+the tool. By default these are enabled.
+
+@end table
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c NOTES section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NOTES}
+@c man begin NOTES
+
+When setting a directory name for the HTML files to be stored in, make sure that
+umask is set to the correct access permissions.
+
+Regardless of the setting for the warning messages, any warnings are accessible
+through the main @file{index.html} page.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SEEALSO section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SEEALSO}
+@c man begin SEEALSO
+
+gprofng(1), gp-archive(1), gp-collect-app(1), gp-display-src(1), gp-display-text(1)
+
+The user guide for gprofng is maintained as a Texinfo manual.  If the
+@command{info} and @command{gprofng} programs are correctly installed, the
+command @command{info gprofng} should give access to this document.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c COPYRIGHT section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{COPYRIGHT}
+@c man begin COPYRIGHT
+
+Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3
+or any later version published by the Free Software Foundation;
+with no Invariant Sections, with no Front-Cover Texts, and with no
+Back-Cover Texts.  A copy of the license is included in the
+section entitled ``GNU Free Documentation License''.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c If this text is used for a man page, exit.  Otherwise we need to continue.
+@c ----------------------------------------------------------------------------
+
+@ifset man
+@bye
+@end ifset
diff --git a/gprofng/doc/gp-display-src.texi b/gprofng/doc/gp-display-src.texi
new file mode 100644
index 0000000..6b32a99
--- /dev/null
+++ b/gprofng/doc/gp-display-src.texi
@@ -0,0 +1,246 @@
+@c ----------------------------------------------------------------------------
+@c This is the Texinfo source file for the gp-collect-app man page.
+@c
+@c Author: Ruud van der Pas
+@c ----------------------------------------------------------------------------
+@ifset man
+\input texinfo @c -*-texinfo-*-
+@setfilename gprofng display src
+@settitle Display the source code, optionally interleaved with the disassembly of the target object
+@include gp-macros.texi
+@end ifset
+
+@c ----------------------------------------------------------------------------
+@c This is from the man-pages(7) man page
+@c
+@c "The list below shows conventional or suggested sections.  Most manual pages
+@c  should include at least the highlighted sections.  Arrange a new manual
+@c  page so that sections are placed in the order shown in the list."
+@c
+@c              NAME
+@c              SYNOPSIS
+@c              CONFIGURATION    [Normally only in Section 4]
+@c              DESCRIPTION
+@c              OPTIONS          [Normally only in Sections 1, 8]
+@c              EXIT STATUS      [Normally only in Sections 1, 8]
+@c              RETURN VALUE     [Normally only in Sections 2, 3]
+@c              ERRORS           [Typically only in Sections 2, 3]
+@c              ENVIRONMENT
+@c              FILES
+@c              VERSIONS         [Normally only in Sections 2, 3]
+@c              ATTRIBUTES       [Normally only in Sections 2, 3]
+@c              CONFORMING TO
+@c              NOTES
+@c              BUGS
+@c              EXAMPLES
+@c              AUTHORS          [Discouraged]
+@c              REPORTING BUGS   [Not used in man-pages]
+@c              COPYRIGHT        [Not used in man-pages]
+@c              SEE ALSO
+@c
+@c This is what the texi2pod.pl tool recognizes:
+@c
+@c for $sect (qw(NAME SYNOPSIS TARGET DESCRIPTION OPTIONS ENVIRONMENT FILES
+@c               BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
+@c
+@c What is interesting is that it places "SEE ALSO" before "COPYRIGHT", which
+@c makes sense and adhered to for the other formats.
+@c ----------------------------------------------------------------------------
+
+@c ----------------------------------------------------------------------------
+@c NAME section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NAME}
+@c man begin NAME
+
+gprofng display src - Display the source code, optionally interleaved with the disassembly of the target object
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SYNOPSIS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SYNOPSIS}
+@c man begin SYNOPSIS
+
+@command{gprofng display src} [@var{option(s)}] @var{target_file}
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c DESCRIPTION section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{DESCRIPTION}
+@c man begin DESCRIPTION
+
+Display the source code listing, or source code interleaved with disassembly code,
+as extracted from the target file (an executable, shared object, object file, or a
+Java .class file).
+
+For example, this command displays the source code and disassembly listing for a
+function called @samp{mxv_core} that is part of object file @samp{mxv.o}:
+
+@smallexample
+$ gprofng display src -disasm mxv_core mxv.o
+@end smallexample
+
+To list the source code and disassembly for all the functions in this file,
+use the following command:
+
+@smallexample
+$ gprofng display src -disasm all -1 mxv.o
+@end smallexample
+
+The @var{target_file} is the name of an executable, a shared object, an object
+file (.o), or a Java .class file.
+
+If no options are given, the source code listing of the @var{target_file}
+is shown.  This is equivalent to @samp{-source all -1}.  If this information
+is not available, a message to this extent is printed.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c OPTIONS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{OPTIONS}
+@c man begin OPTIONS
+
+@table @gcctabopt
+
+@item --version
+@ifclear man
+@IndexSubentry{Options, @code{--version}}
+@end ifclear
+
+Print the version number and exit.
+
+@item --help
+@ifclear man
+@IndexSubentry{Options, @code{--help}}
+@end ifclear
+
+Print usage information and exit.
+
+@c -- @item --verbose @{on|off@}
+@c -- @ifclear man
+@c -- @IndexSubentry{Options, @code{--verbose}}
+@c -- @end ifclear
+
+@c -- Enable (on) or disable (off) verbose mode; the default is @samp{off}.
+
+@item -functions
+@ifclear man
+@IndexSubentry{Options,  @code{-functions}}
+@IndexSubentry{Commands, @code{functions}}
+@end ifclear
+List all the functions from the given object.
+
+@item -source @var{item} @var{tag}
+@ifclear man
+@IndexSubentry{Options, @code{-source}}
+@IndexSubentry{Commands, @code{source}}
+@end ifclear
+Show the source code for @var{item} in @var{target_file}.  The @var{tag}
+is used to differentiate in case there are multiple occurences with the same
+name.
+See the @samp{NOTES} section for the definition of @var{item} and @var{tag}. 
+
+@item -disasm @var{item} @var{tag}
+@ifclear man
+@IndexSubentry{Options,  @code{-disasm}}
+@IndexSubentry{Commands, @code{disasm}}
+@end ifclear
+Include the disassembly in the source listing. The default listing does not
+include the disassembly. If the source code is not available, show a listing
+of the disassembly only.
+See the @samp{NOTES} section for the definition of @var{item} and @var{tag}. 
+
+@item -outfile @var{filename}
+@ifclear man
+@IndexSubentry{Options,  @code{-outfile}}
+@IndexSubentry{Commands, @code{outfile}}
+@end ifclear
+Write results to file @var{filename}.  A dash (-) writes to stdout. This is also
+the default. Note that this option only affects those options included to the
+right of this option.
+
+@end table
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c NOTES section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NOTES}
+@c man begin NOTES
+
+Use @var{item} to specify the name of a function, or of a source or object
+file that was used to build the executable, or shared object.
+
+The @var{tag} is an index used to determine which item is being referred
+to when multiple functions have the same name. It is required, but will
+be ignored if not necessary to resolve the function.
+
+The @var{item} may also be specified in the form @samp{function`file`}, in
+which case the source or disassembly of the named function in the source
+context of the named file will be used.
+
+The special @var{item} and @var{tag} combination @samp{all -1}, is used to
+indicate generating the source, or disassembly, for all functions in the
+@var{target_file}.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SEEALSO section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SEEALSO}
+@c man begin SEEALSO
+
+gprofng(1), gp-archive(1), gp-collect-app(1), gp-display-html(1), gp-display-text(1)
+
+The user guide for gprofng is maintained as a Texinfo manual.  If the info
+and gprofng programs are correctly installed, the command
+@command{info gprofng} should give access to this document.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c COPYRIGHT section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{COPYRIGHT}
+@c man begin COPYRIGHT
+
+Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3
+or any later version published by the Free Software Foundation;
+with no Invariant Sections, with no Front-Cover Texts, and with no
+Back-Cover Texts.  A copy of the license is included in the
+section entitled ``GNU Free Documentation License''.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c If this text is used for a man page, exit.  Otherwise we need to continue.
+@c ----------------------------------------------------------------------------
+
+@ifset man
+@bye
+@end ifset
diff --git a/gprofng/doc/gp-display-text.texi b/gprofng/doc/gp-display-text.texi
new file mode 100644
index 0000000..993f9f0
--- /dev/null
+++ b/gprofng/doc/gp-display-text.texi
@@ -0,0 +1,437 @@
+@c ----------------------------------------------------------------------------
+@c This is the Texinfo source file for the gp-collect-app man page.
+@c
+@c Author: Ruud van der Pas
+@c ----------------------------------------------------------------------------
+@ifset man
+\input texinfo @c -*-texinfo-*-
+@setfilename gprofng display text
+@settitle Display the performance data in plain text format
+@include gp-macros.texi
+@end ifset
+
+@c ----------------------------------------------------------------------------
+@c This is from the man-pages(7) man page
+@c
+@c "The list below shows conventional or suggested sections.  Most manual pages
+@c  should include at least the highlighted sections.  Arrange a new manual
+@c  page so that sections are placed in the order shown in the list."
+@c
+@c              NAME
+@c              SYNOPSIS
+@c              CONFIGURATION    [Normally only in Section 4]
+@c              DESCRIPTION
+@c              OPTIONS          [Normally only in Sections 1, 8]
+@c              EXIT STATUS      [Normally only in Sections 1, 8]
+@c              RETURN VALUE     [Normally only in Sections 2, 3]
+@c              ERRORS           [Typically only in Sections 2, 3]
+@c              ENVIRONMENT
+@c              FILES
+@c              VERSIONS         [Normally only in Sections 2, 3]
+@c              ATTRIBUTES       [Normally only in Sections 2, 3]
+@c              CONFORMING TO
+@c              NOTES
+@c              BUGS
+@c              EXAMPLES
+@c              AUTHORS          [Discouraged]
+@c              REPORTING BUGS   [Not used in man-pages]
+@c              COPYRIGHT        [Not used in man-pages]
+@c              SEE ALSO
+@c
+@c This is what the texi2pod.pl tool recognizes:
+@c
+@c for $sect (qw(NAME SYNOPSIS TARGET DESCRIPTION OPTIONS ENVIRONMENT FILES
+@c               BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
+@c
+@c What is interesting is that it places "SEE ALSO" before "COPYRIGHT", which
+@c makes sense and adhered to for the other formats.
+@c ----------------------------------------------------------------------------
+
+@c ----------------------------------------------------------------------------
+@c NAME section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NAME}
+@c man begin NAME
+
+gprofng display text - Display the performance data in plain text format
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SYNOPSIS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SYNOPSIS}
+@c man begin SYNOPSIS
+
+@command{gprofng display text} [@var{option(s)}] [@var{commands}]
+[-script @var{script-file}] @var{experiment(s)}
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c DESCRIPTION section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{DESCRIPTION}
+@c man begin DESCRIPTION
+
+Print a plain text version of the various displays supported by gprofng.
+
+The input consists of one or more experiment directories.  Through commands,
+the user controls the output.
+
+There is a rich set of commands to control the display of the data. The
+@samp{NOTES} section lists the most common ones. The gprofng user guide
+lists all the commands supported.
+
+Commands specified on the command line need to be prepended with the dash ('-')
+symbol.
+
+In this example, a function overview will be shown, followed by the source
+code listing of function @samp{my-func}, annotated with the
+performance metrics that have been recorded during the data collection
+and stored in experiment directory @samp{my-exp.er}:
+
+@smallexample
+$ gprofng display text -functions -source my-func my-exp.er
+@end smallexample
+
+Instead of, or in addition to, specifying these commands on the command line,
+commands may also be included in a file called the @var{script-file}.
+
+Note that the commands are processed and interpreted from left to right,
+@emph{so the order matters}.
+
+If this tool is invoked without options, commands, or a script file, it
+starts in interpreter mode. The user can then issue the commands interactively.
+The session is terminated with the @command{exit} command in the interpreter.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c OPTIONS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{OPTIONS}
+@c man begin OPTIONS
+
+@table @gcctabopt
+
+@item --version
+@ifclear man
+@IndexSubentry{Options, @code{--version}}
+@end ifclear
+
+Print the version number and exit.
+
+@item --help
+@ifclear man
+@IndexSubentry{Options, @code{--help}}
+@end ifclear
+
+Print usage information and exit.
+
+@c -- @item --verbose @{on|off@}
+@c -- @ifclear man
+@c -- @IndexSubentry{Options, @code{--verbose}}
+@c -- @end ifclear
+
+@c -- Enable (on) or disable (off) verbose mode; the default is @samp{off}.
+
+@item -script @var{script-file}
+@ifclear man
+@IndexSubentry{Options,  @code{-script}}
+@IndexSubentry{Commands, @code{script}}
+@end ifclear
+
+Execute the commands stored in the script file.  This feature may be combined
+with commands specified at the command line.
+
+@end table
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c NOTES section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{NOTES}
+@c man begin NOTES
+
+Many commands are supported. Below, the more common ones are listed in
+mostly alphabetical order, because sometimes it is more logical to
+swap the order of two entries.
+
+@ifset man
+There are many more commands. These are documented in the user guide.
+@end ifset
+
+@table @code
+
+@item callers-callees
+@ifclear man
+@IndexSubentry{Options,  @code{-callers-callees}}
+@IndexSubentry{Commands, @code{callers-callees}}
+@end ifclear
+In a callers-callees panel, it is shown which function(s) call the target
+function (the @emph{callers}) and what functions it is calling (the
+@emph{callees}).
+This command prints the callers-callees panel for each of the functions,
+in the order specified by the function sort metric.
+
+@item calltree
+@ifclear man
+@IndexSubentry{Options,  @code{-calltree}}
+@IndexSubentry{Commands, @code{calltree}}
+@end ifclear
+Display the dynamic call graph from the experiment, showing the hierarchical
+metrics at each level.
+
+@item compare @{on | off | delta | ratio@}
+@ifclear man
+@IndexSubentry{Options,  @code{-compare}}
+@IndexSubentry{Commands, @code{compare}}
+@end ifclear
+By default, the results for multiple experiments are aggregated. This
+command changes this to enable the comparison of experiments for certain
+views (e.g. the function view).  The first experiment specified is defined
+to be the reference.  The following options are supported:
+
+@table @code
+
+@item on
+For each experiment specified on the command line, print the values for
+the metrics that have been activated for the experiment.
+
+@item off
+Disable the comparison of experiments.  This is the default.
+
+@item delta
+Print the values for the reference experiment.  The results for the other
+experiments are shown as a delta relative to the reference (current-reference).
+
+@item ratio
+Print the values for the reference experiment.  The results for the other
+experiments are shown as a ratio relative to the reference (current/reference).
+
+@end table
+
+@item disasm @var{function-name}
+@ifclear man
+@IndexSubentry{Options,  @code{-disasm}}
+@IndexSubentry{Commands, @code{disasm}}
+@end ifclear
+List the source code and instructions for the function specified. The 
+instructions are annotated with the metrics used.
+
+@item fsingle @var{function-name} [@samp{n}]
+@ifclear man
+@IndexSubentry{Options,  @code{-fsingle}}
+@IndexSubentry{Commands, @code{fsingle}}
+@end ifclear
+Write a summary panel for the specified function.  The optional parameter
+@var{n} is needed for those cases where several functions have the same name.
+
+@item fsummary
+@ifclear man
+@IndexSubentry{Options,  @code{-fsummary}}
+@IndexSubentry{Commands, @code{fsummary}}
+@end ifclear
+Write a summary panel for each function in the function list.
+
+@item functions
+@ifclear man
+@IndexSubentry{Options,  @code{-functions}}
+@IndexSubentry{Commands, @code{functions}}
+@end ifclear
+Display a list of all functions executed.  For each function the used metrics
+(e.g. the CPU time) ar shown.
+
+@item header
+@ifclear man
+@IndexSubentry{Options,  @code{-header}}
+@IndexSubentry{Commands, @code{header}}
+@end ifclear
+Shows several operational characteristics of the experiment(s) specified
+on the command line.
+
+@item limit @var{n}
+@ifclear man
+@IndexSubentry{Options,  @code{-limit}}
+@IndexSubentry{Commands, @code{limit}}
+@end ifclear
+Limit the output to @var{n} lines.
+
+@item lines
+@ifclear man
+@IndexSubentry{Options,  @code{-lines}}
+@IndexSubentry{Commands, @code{lines}}
+@end ifclear
+Write a list of source lines and their metrics, ordered by the current
+sort metric. 
+
+@item metric_list
+@ifclear man
+@IndexSubentry{Options,  @code{-metric_list}}
+@IndexSubentry{Commands, @code{metric_list}}
+@end ifclear
+Display the currently selected metrics in the function view and a list
+of all the metrics available for the target experiment(s).
+
+@item metrics @var{metric-spec}
+@ifclear man
+@IndexSubentry{Options,  @code{-metrics}}
+@IndexSubentry{Commands, @code{metrics}}
+@end ifclear
+Define the metrics to be displayed in the function and callers-callees
+overviews.
+
+The @var{metric-spec} can either be the keyword @samp{default}
+to restore the default metrics selection, or a colon separated list
+with metrics.
+
+The gprofng user guide has more details how to define metrics.
+
+@item name @{short | long | mangled@}[:@{soname | nosoname@}]
+@ifclear man
+@IndexSubentry{Options,  @code{-name}}
+@IndexSubentry{Commands, @code{name}}
+@end ifclear
+Specify whether to use the short, long, or mangled form of function names.
+Optionally, the load object that the function is part of can be included in
+the output by adding the @emph{soname} keyword.  It can also be ommitted
+(@emph{nosoname}), which is the default.
+
+Whether there is an actual difference between these types of names depends
+on the language.
+
+Note that there should be no (white)space to the left and right of the 
+colon (@samp{:}).
+
+@item overview
+@ifclear man
+@IndexSubentry{Options,  @code{-overview}}
+@IndexSubentry{Commands, @code{overview}}
+@end ifclear
+Shows a summary of the recorded performance data for the experiment(s)
+specified on the command line.
+
+@item pcs
+@ifclear man
+@IndexSubentry{Options,  @code{-pcs}}
+@IndexSubentry{Commands, @code{pcs}}
+@end ifclear
+Write a list of program counters (PCs) and their metrics, ordered by
+the current sort metric. 
+
+@item sort @var{metric-spec}
+@ifclear man
+@IndexSubentry{Options,  @code{-sort}}
+@IndexSubentry{Commands, @code{sort}}
+@end ifclear
+Sort the function list on the @var{metric-spec} given. 
+
+@IndexSubentry{Sort, Reverse order}
+The data can be sorted in reverse order by prepending the metric definition
+with a minus (@samp{-}) sign.
+
+@noindent
+For example @command{sort -e.totalcpu}.
+
+@IndexSubentry{Sort, Reset to default}
+A default metric for the sort operation has been defined and since this is
+a persistent command, this default can be restored with @code{default} as
+the key (@command{sort default}).
+
+@item source @var{function-name}
+@ifclear man
+@IndexSubentry{Options,  @code{-source}}
+@IndexSubentry{Commands, @code{source}}
+@end ifclear
+List the source code for the function specified, annotated with the metrics
+used.
+
+@item viewmode @{user | expert | machine@}
+@ifclear man
+@IndexSubentry{Options,  @code{-viewmode}}
+@IndexSubentry{Commands, @code{viewmode}}
+@end ifclear
+This command is only relevant for Java programs.  For all other languages
+supported, the viewmode setting has no effect.
+
+The following options are supported:
+
+@table @code
+
+@item user
+Show the Java call stacks for Java threads, but do not show housekeeping
+threads.  The function view includes a function called @samp{<JVM-System>}.
+This represents the aggregated time from non-Java threads.
+In case the JVM software does not report a Java call stack, time is reported
+against the function @samp{<no Java callstack recorded>}.
+
+@item expert
+Show the Java call stacks for Java threads when the user Java code is executed,
+and machine call stacks when JVM code is executed, or when the JVM software
+does not report a Java call stack.  Show the machine call stacks for
+housekeeping threads.
+
+@item machine
+Show the actual native call stacks for all threads.  This is the view mode
+for C, C++, and Fortran.
+
+@end table
+
+@end table
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c SEEALSO section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{SEEALSO}
+@c man begin SEEALSO
+
+gprofng(1), gp-archive(1), gp-collect-app(1), gp-display-html(1), gp-display-src(1)
+
+The user guide for gprofng is maintained as a Texinfo manual.  If the
+@command{info} and @command{gprofng} programs are correctly installed, the
+command @command{info gprofng} should give access to this document.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c COPYRIGHT section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{COPYRIGHT}
+@c man begin COPYRIGHT
+
+Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
+
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3
+or any later version published by the Free Software Foundation;
+with no Invariant Sections, with no Front-Cover Texts, and with no
+Back-Cover Texts.  A copy of the license is included in the
+section entitled ``GNU Free Documentation License''.
+
+@c man end
+@ManPageEnd{}
+
+@c ----------------------------------------------------------------------------
+@c If this text is used for a man page, exit.  Otherwise we need to continue.
+@c ----------------------------------------------------------------------------
+
+@ifset man
+@bye
+@end ifset
diff --git a/gprofng/doc/gp-macros.texi b/gprofng/doc/gp-macros.texi
new file mode 100644
index 0000000..f4bd423
--- /dev/null
+++ b/gprofng/doc/gp-macros.texi
@@ -0,0 +1,72 @@
+@c -- Macro definitions -------------------------------------------------------
+@c
+@c Since only letters can be used, we use capitalization to distinguish
+@c different words.
+@c ----------------------------------------------------------------------------
+@macro CollectApp{}
+@command{gprofng collect app}
+@end macro
+
+@macro DisplayHTML{}
+@command{gprofng display html}
+@end macro
+
+@macro DisplayText{}
+@command{gprofng display text}
+@end macro
+
+@macro DisplaySRC{}
+@command{gprofng display src}
+@end macro
+
+@macro Archive{}
+@command{gprofng archive}
+@end macro
+
+@macro Driver{}
+@command{gprofng}
+@end macro
+
+@macro ProductName{}
+gprofng
+@end macro
+
+@macro ToolName{}
+@command{gprofng}
+@end macro
+
+@macro IndexSubentry{label, string}
+@c -- @cindex \label\ @subentry \string\
+@cindex \label\, \string\
+@end macro
+
+@macro vspace {lines}
+@sp \lines\
+@end macro
+
+@c -- For some reason ending this macro with @noindent does not work out well.
+
+@macro OptionHeader {lines, option, description}
+@sp \lines\
+@noindent
+@code{\option\} @ @emph{\description\}
+@c -- @sp 1
+@end macro
+
+@macro gcctabopt{body}
+@code{\body\}
+@end macro
+
+@macro ManPageStart{headername}
+@ifclear man
+@sp 1
+@noindent @b{\headername\}
+@indentedblock
+@end ifclear
+@end macro
+
+@macro ManPageEnd{}
+@ifclear man
+@end indentedblock
+@end ifclear
+@end macro
diff --git a/gprofng/doc/gprofng.texi b/gprofng/doc/gprofng.texi
index 1a2c84b..d038a47 100644
--- a/gprofng/doc/gprofng.texi
+++ b/gprofng/doc/gprofng.texi
@@ -1,3568 +1,308 @@
-\input texinfo @c -*-texinfo-*-
-
-@c for $sect (qw(NAME SYNOPSIS TARGET DESCRIPTION OPTIONS ENVIRONMENT FILES
-@c               BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
-
 @c ----------------------------------------------------------------------------
-@c This is the Texinfo source file for the GPROFNG manual.
+@c This is the Texinfo source file for the gprofng man page.
 @c
 @c Author: Ruud van der Pas
 @c ----------------------------------------------------------------------------
+@ifset man
+\input texinfo @c -*-texinfo-*-
+@setfilename gprofng
+@settitle The next generation GNU application profiling tool
+@include gp-macros.texi
+@end ifset
 
-@c %**start of header
-
-@setfilename gprofng.info
-@settitle GNU gprofng
-
-@c -- Set the indent for the @example command to 1 space, not 5 ---------------
-@exampleindent 1
-
-@c %**end of header
-
-@c -- Start a new chapter on a new, odd numbered, page ------------------------
-@setchapternewpage odd
-
-@c -- Merge all index entries into the Concepts Index -------------------------
-@syncodeindex fn cp
-@syncodeindex ky cp
-@syncodeindex pg cp
-@syncodeindex vr cp
+@c @ManPageStart{NAME}
+@c @ManPageStart{SYNOPSIS}
+@c @ManPageStart{DESCRIPTION}
+@c @ManPageStart{OPTIONS}
+@c @ManPageStart{NOTES}
+@c @ManPageStart{SEEALSO}
+@c @ManPageStart{COPYRIGHT}
 
-@c -- Macro definitions -------------------------------------------------------
+@c ----------------------------------------------------------------------------
+@c This is from the man-pages(7) man page
+@c
+@c "The list below shows conventional or suggested sections.  Most manual pages
+@c  should include at least the highlighted sections.  Arrange a new manual
+@c  page so that sections are placed in the order shown in the list."
+@c
+@c              NAME
+@c              SYNOPSIS
+@c              CONFIGURATION    [Normally only in Section 4]
+@c              DESCRIPTION
+@c              OPTIONS          [Normally only in Sections 1, 8]
+@c              EXIT STATUS      [Normally only in Sections 1, 8]
+@c              RETURN VALUE     [Normally only in Sections 2, 3]
+@c              ERRORS           [Typically only in Sections 2, 3]
+@c              ENVIRONMENT
+@c              FILES
+@c              VERSIONS         [Normally only in Sections 2, 3]
+@c              ATTRIBUTES       [Normally only in Sections 2, 3]
+@c              CONFORMING TO
+@c              NOTES
+@c              BUGS
+@c              EXAMPLES
+@c              AUTHORS          [Discouraged]
+@c              REPORTING BUGS   [Not used in man-pages]
+@c              COPYRIGHT        [Not used in man-pages]
+@c              SEE ALSO
+@c
+@c This is what the texi2pod.pl tool recognizes:
+@c
+@c for $sect (qw(NAME SYNOPSIS TARGET DESCRIPTION OPTIONS ENVIRONMENT FILES
+@c               BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) {
 @c
-@c Since only letters can be used, we use capitalization to distinguish
-@c different words.
+@c What is interesting is that it places "SEE ALSO" before "COPYRIGHT", which
+@c makes sense and adhered to for the other formats.
 @c ----------------------------------------------------------------------------
-@macro CollectApp{}
-@command{gprofng collect app}
-@end macro
-
-@macro DisplayHTML{}
-@command{gprofng display html}
-@end macro
-
-@macro DisplayText{}
-@command{gprofng display text}
-@end macro
-
-@macro Driver{}
-@command{gprofng}
-@end macro
-
-@macro ProductName{}
-gprofng
-@end macro
-
-@macro ToolName{}
-@command{gprofng}
-@end macro
-
-@macro IndexSubentry{label, string}
-@c -- @cindex \label\ @subentry \string\
-@cindex \label\, \string\
-@end macro
-
-@macro gcctabopt{body}
-@code{\body\}
-@end macro
-
-@c -- Get the version information ---------------------------------------------
-@include version.texi
-
-@c -- Entry for the Info dir structure ----------------------------------------
-@ifnottex
-@dircategory Software development
-@direntry
-* gprofng: (gprofng).                    The next generation profiling tool for Linux
-@end direntry
-@end ifnottex
-
-@c -- Copyright stuff ---------------------------------------------------------
-@copying
-This document is the manual for @ProductName{}, last updated @value{UPDATED}.
 
-Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
-
-@c -- @quotation
-Permission is granted to copy, distribute and/or modify this document
-under the terms of the GNU Free Documentation License,
-Version 1.3 or any later version published by the Free Software
-Foundation; with no Invariant Sections, with no Front-Cover texts,
-and with no Back-Cover Texts.  A copy of the license is included in the
-section entitled ``GNU Free Documentation License.''
-
-@c -- @end quotation
-@end copying
-
-@finalout
-@smallbook
-
-@c -- Define the title page ---------------------------------------------------
-@titlepage
-@title GNU gprofng
-@subtitle The next generation profiling tool for Linux
-@subtitle version @value{VERSION} (last updated @value{UPDATED})
-@author Ruud van der Pas
-@page
-@vskip 0pt plus 1filll
-@insertcopying
-
-@c man begin COPYRIGHT
+@c ----------------------------------------------------------------------------
+@c NAME section
+@c ----------------------------------------------------------------------------
 
-Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
+@ManPageStart{NAME}
+@c man begin NAME
 
-Permission is granted to copy, distribute and/or modify this document
-under the terms of the GNU Free Documentation License, Version 1.3
-or any later version published by the Free Software Foundation;
-with no Invariant Sections, with no Front-Cover Texts, and with no
-Back-Cover Texts.  A copy of the license is included in the
-section entitled ``GNU Free Documentation License''.
+gprofng - The driver for the gprofng application profiling tool
 
 @c man end
+@ManPageEnd{}
 
-@end titlepage
-
-@c -- Generate the Table of Contents ------------------------------------------
-@contents
-
-@c -- The Top node ------------------------------------------------------------
-@c Should contain a short summary, copying permissions and a master menu.
 @c ----------------------------------------------------------------------------
-@ifnottex
-@node Top
-@top  GNU Gprofng
-
-@insertcopying
-@end ifnottex
-
-@ifinfo
-@c -- The menu entries --------------------------------------------------------
-
-@menu
-* Introduction::           About this manual.
-* Overview::               A brief overview of @ProductName{}.
-* A Mini Tutorial::        A short tutorial covering the key features.
-* Terminology::            Various concepts and some terminology explained.
-* Other Document Formats:: How to create this document in other formats.
-* Index::                  The index.
-
-@detailmenu
-
---- The Detailed Node Listing ---
-
-Introduction
-
-Overview
-
-* Main Features::                     A high level overview.
-* Sampling versus Tracing::           The pros and cons of sampling versus tracing.
-* Steps Needed to Create a Profile::  How to create a profile.
-
-A Mini Tutorial
-
-* Getting Started::                 The basics of profiling with @ProductName().
-* Support for Multithreading::      Commands specific to multithreaded applications.
-* Viewing Multiple Experiments::    Analyze multiple experiments.
-* Profile Hardware Event Counters:: How to use hardware event counters.
-* Java Profiling::                  How to profile a Java application.
-
-Terminology
+@c SYNOPSIS section
+@c ----------------------------------------------------------------------------
 
-* The Program Counter::                    What is a Program Counter?
-* Inclusive and Exclusive Metrics::        An explanation of inclusive and exclusive metrics.
-* Metric Definitions::                     Definitions associated with metrics.
-* The Viewmode::                           Select the way call stacks are presented.
-* The Selection List::                     How to define a selection.
-* Load Objects and Functions::             The components in an application.
-* The Concept of a CPU in @ProductName{}:: The definition of a CPU.
-* Hardware Event Counters Explained::      What are event counters?
-* apath::                                  Our generic definition of a path.
+@ManPageStart{SYNOPSIS}
+@c man begin SYNOPSIS
 
-@c -- Index
+@command{gprofng} [@var{option(s)}] @var{action} [@var{qualifier}] [@var{option(s)}] @var{target} [@var{options}]
 
-@end detailmenu
-@end menu
-@end ifinfo
+@c man end
+@ManPageEnd{}
 
-@ifset man
+@c ----------------------------------------------------------------------------
+@c DESCRIPTION section
+@c ----------------------------------------------------------------------------
 
-@c man title gprofng the driver for the gprofng tool suite
+@ManPageStart{DESCRIPTION}
+@c man begin DESCRIPTION
 
-@c man begin SYNOPSIS
-gprofng [OPTION(S)] ACTION [@b{QUALIFIER}] [ARGUMENTS] TARGET
-@c man end
+This is the driver for the gprofng tools suite to gather and analyze performance
+data.
 
-@c man begin DESCRIPTION
-This is the driver for the GPROFNG tools suite to gather and analyze performance data.
+The driver executes the @var{action} specified. An example of an action is
+@samp{collect} to collect performance data. Depending on the action, a
+@var{qualifier} may be needed to further define the command.
+The last item is the @var{target} that the command applies to.
 
-The driver executes the action specified. An example of an action is @code{collect}
-to collect performance data. Depending on the action, a qualifier may be needed to
-define the command. Several qualifiers support options. The last item on the command
-is the target the command applies to.
+There are three places where options are supported.  The driver supports
+options.  These can be found below.  The @var{action}, possibly in combination
+with the @var{qualifier} also supports options. A description of these can be
+found in the man page for the command.  Any options needed to execute the
+target command should follow the target name.
 
-For example, to collect performance data for an application called @code{a.out} and
-store the results in experiment directory @code{mydata.er}, the following command may
-be used:
+For example, to collect performance data for an application called
+@command{a.out} and store the results in experiment directory @samp{mydata.er},
+the following command may be used:
 
 @smallexample
-$ gprofng collect app -o mydata.er a.out
+$ gprofng collect app -o mydata.er a.out -t 2
 @end smallexample
 
-In this example, the action is @code{collect}, the qualifier is @code{app}, the single
-argument is @code{-o mydata.er} and the target is @code{a.out}.
+In this example, the action is @samp{collect}, the qualifier is @samp{app}, the single
+argument to the command is @code{-o mydata.er} and the target is @command{a.out}.
+The target command is invoked with the @samp{-t 2} option.
 
 If gprofng is executed without any additional option, action, or target, a usage
 overview is printed.
 
 @c man end
+@ManPageEnd{}
 
+@c ----------------------------------------------------------------------------
+@c OPTIONS section
+@c ----------------------------------------------------------------------------
+
+@ManPageStart{OPTIONS}
 @c man begin OPTIONS
 
 @table @gcctabopt
 
 @item @var{--version}
-print the version number and exit.
+@ifclear man
+@IndexSubentry{Options, @code{--version}}
+@end ifclear
+Print the version number and exit.
 
 @item @var{--help}
-print usage information and exit.
+@ifclear man
+@IndexSubentry{Options, @code{--help}}
+@end ifclear
+Print usage information and exit.
 
 @end table
 
 @c man end
+@ManPageEnd{}
 
-@c man begin NOTES
-
-The gprofng driver supports the following commands.
-
-@c The man pages for the commands below can be viewed using the command name with "gprofng" replaced by "gp" and the spaces replaced by a dash ("-"). For example the man page
-@c        name for "gprofng collect app" is "gp-collect-app".
-
-Collect performance data:
-
-@table @code
-
-@item gprofng collect app
-collect application performance data.
-
-@end table
-
-Display the performance results:
-
-@table @code
-
-@item gprofng display text
-display the performance data in ASCII format.
-
-@item gprofng display html
-generate an HTML file from one or more experiments.
-
-@end table
-
-Miscellaneous commands:
-
-@table @code
-
-@item gprofng display src
-display source or disassembly with compiler annotations.
-
-@item gprofng archive
-include binaries and source code in an experiment directory.
-
-@end table
-
-It is also possible to invoke the lower level commands directly, but since
-these are subject to change, in particular the options, we recommend to
-use the driver.
-
-@c man end
+@c -----------------------------------------------------------------------------
+@c ENVIRONMENT SECTION
+@c -----------------------------------------------------------------------------
 
+@ManPageStart{ENVIRONMENT}
 @c man begin ENVIRONMENT
+
 The following environment variables are supported:
 
-@table @code
+@table @samp
 
 @item @env{GPROFNG_MAX_CALL_STACK_DEPTH}
-set the depth of the call stack (default is 256).
+@cindex Environment variables
+Set the depth of the call stack (default is 256).
 
 @item @env{GPROFNG_USE_JAVA_OPTIONS}
-may be set when profiling a C/C++ application that uses dlopen() to execute Java code.
+@cindex Environment variables
+May be set when profiling a C/C++ application that uses dlopen() to execute
+Java code.
 
-@item @env{GPROFNG_SSH_REMOTE_DISPLAY}
-use this variable to define the ssh command executed by the remote display tool.
+@c -- deferred @item @env{GPROFNG_SSH_REMOTE_DISPLAY}
+@c -- deferred Use this variable to define the ssh command executed by the remote display tool.
 
-@item @env{GPROFNG_SKIP_VALIDATION}
-set this variable to disable checking hardware, system, and Java versions.
+@c -- deferred @item @env{GPROFNG_SKIP_VALIDATION}
+@c -- deferred Set this variable to disable checking hardware, system, and Java versions.
 
 @item @env{GPROFNG_ALLOW_CORE_DUMP}
-set this variable to allow a core file to be generated; otherwise an error report is created on /tmp.
+@cindex Environment variables
+Set this variable to allow a core file to be generated; otherwise an error
+report is created on /tmp.
 
 @item @env{GPROFNG_ARCHIVE}
-use this variable to define the settings for automatic archiving upon experiment recording completion.
+@cindex Environment variables
+Use this variable to define the settings for automatic archiving upon experiment
+recording completion.
 
 @item @env{GPROFNG_ARCHIVE_COMMON_DIR}
-set this variable to the location of the common archive.
+@cindex Environment variables
+Set this variable to the location of the common archive.
 
 @item @env{GPROFNG_JAVA_MAX_CALL_STACK_DEPTH}
-set the depth of the Java call stack; the default is 256; set to 0 to disable capturing of call stacks.
+@cindex Environment variables
+Set the depth of the Java call stack; the default is 256; set to 0 to disable
+capturing of call stacks.
 
 @item @env{GPROFNG_JAVA_NATIVE_MAX_CALL_STACK_DEPTH}
-set the depth of the Java native call stack; the default is 256; set to 0 to disable capturing of call stacks (JNI and assembly call stacks are not captured).
+@cindex Environment variables
+Set the depth of the Java native call stack; the default is 256; set to 0 to
+disable capturing of call stacks (JNI and assembly call stacks are not
+captured).
 
 @end table
 
 @c man end
+@ManPageEnd{}
 
-@c man begin SEEALSO
-The man pages for the various gprofng commands are not available yet, but
-the @option{--help} option supported on each of the commands lists the options
-and provides more information.
-
-For example this displays the options supported on the @command{gprofng collect app}
-command:
-
-@smallexample
-$ gprofng collect app --help
-@end smallexample
-
-The user guide is available as an Info entry for @file{gprofng}.
-@c man end
-
-@end ifset
-
-@c man begin DESCRIPTION
-@c man end
-
-@c -- A new node --------------------------------------------------------------
-@node    Introduction
-@chapter Introduction
-@c ----------------------------------------------------------------------------
-The @ProductName{} tool is the next generation profiler for Linux. It consists 
-of various commands to generate and display profile information.
-
-This manual starts with a tutorial how to create and interpret a profile. This
-part is highly practical and has the goal to get users up to speed as quickly
-as possible. As soon as possible, we would like to show you how to get your
-first profile on your screen.
-
-This is followed by more examples, covering many of the features. At the
-end of this tutorial, you should feel confident enough to tackle the more
-complex tasks.
-
-In a future update a more formal reference manual will be included as well.
-Since even in this tutorial we use certain terminology, we have included a
-chapter with descriptions at the end. In case you encounter unfamiliar 
-wordings or terminology, please check this chapter.
-
-One word of caution. In several cases we had to somewhat tweak the screen
-output in order to make it fit. This is why the output may look somewhat
-different when you try things yourself.
-
-For now, we wish you a smooth profiling experience with @ProductName{} and 
-good luck tackling performance bottlenecks.
-
-@c -- A new node --------------------------------------------------------------
-@c cccccc @node    A Brief Overview of @ProductName{}
-@node    Overview
-@chapter A Brief Overview of @ProductName{}
-@c ----------------------------------------------------------------------------
-
-@menu
-* Main Features::                     A high level overview.
-* Sampling versus Tracing::           The pros and cons of sampling versus tracing.
-* Steps Needed to Create a Profile::  How to create a profile.
-@end menu
-
-Before we cover this tool in quite some detail, we start with a brief overview
-of what it is, and the main features. Since we know that many of you would 
-like to get started rightaway, already in this first chapter we explain the
-basics of profiling with @ToolName{}.
-
-@c ----------------------------------------------------------------------------
-@c TBD Review this text. Probably be more specific on the gcc releases and
-@c processor specifics.
-@c ----------------------------------------------------------------------------
-
-@c -- A new node --------------------------------------------------------------
-@node    Main Features
-@section Main Features
-@c ----------------------------------------------------------------------------
-
-@noindent
-These are the main features of the @ProductName{} tool:
-
-@itemize @bullet
-
-@item
-Profiling is supported for an application written in C, C++, Java, or Scala.
-
-@c TBD Java: up to 1.8 full support, support other than for modules 
-
-@item
-Shared libraries are supported. The information is presented at the instruction
-level.
-
-@item
-The following multithreading programming models are supported: Pthreads,
-OpenMP, and Java threads.
-
-@item
-This tool works with unmodified production level executables. There is no need to 
-recompile the code, but if the @code{-g} option has been used when building
-the application, source line level information is available.
-
-@item
-The focus is on support for code generated with the @code{gcc} compiler, but 
-there is some limited support for the @code{icc} compiler as well. Future
-improvements and enhancements will focus on @code{gcc} though.
-
-@item
-Processors from Intel, AMD, and Arm are supported, but the level of support
-depends on the architectural details. In particular, hardware event counters
-may not be supported.
-
-@item 
-Several views into the data are supported. For example, a function overview
-where the time is spent, but also a source line, disassembly, call tree and 
-a caller-callees overview are available.
-
-@item
-Through filters, the user can zoom in on an area of interest.
-
-@item
-Two or more profiles can be aggregated, or used in a comparison. This comparison 
-can be obtained at the function, source line, and disassembly level.
-
-@item
-Through a scripting language, and customization of the metrics shown,
-the generation and creation of a profile can be fully automated and provide 
-tailored output.
-
-@end itemize
-
-@c -- A new node --------------------------------------------------------------
-@node    Sampling versus Tracing
-@section Sampling versus Tracing
-@c ----------------------------------------------------------------------------
-
-A key difference with some other profiling tools is that the main data 
-collection command @CollectApp{} mostly uses 
-@cindex Program Counter sampling
-@cindex PC sampling
-Program Counter (PC) sampling
-under the hood. 
-
-With @emph{sampling}, the executable is stopped at regular intervals. Each time
-it is halted, key information is gathered and stored. This includes the Program
-Counter that keeps track of where the execution is. Hence the name.
-
-Together with operational
-data, this information is stored in the experiment directory and can be
-viewed in the second phase.
-
-For example, the PC information is used to derive where the program was when
-it was halted. Since the sampling interval is known, it is relatively easy to 
-derive how much time was spent in the various parts of the program.
-
-The opposite technique is generally referred to as @emph{tracing}. With
-tracing, the target is instrumented with specific calls that collect the
-requested information.
-
-These are some of the pros and cons of PC sampling verus tracing:
-
-@itemize
-
-@item
-Since there is no need to recompile, existing executables can be used
-and the profile measures the behaviour of exactly the same executable that is
-used in production runs.
-
-With sampling, one inherently profiles a different executable because
-the calls to the instrumentation library may affect the compiler optimizations 
-and run time behaviour. 
-
-@item
-With sampling, there are very few restrictions on what can be profiled and even without
-access to the source code, a basic profile can be made.
-
-@item
-A downside of sampling is that, depending on the sampling frequency, small 
-functions may be missed or not captured accurately. Although this is rare, 
-this may happen and is the reason why the user has control over the sampling rate.
-
-@item
-While tracing produces precise information, sampling is statistical in nature.
-As a result, small variations may occur across seemingly identical runs. We
-have not observed more than a few percent deviation though. Especially if 
-the target job executed for a sufficiently long time.
-
-@item
-With sampling, it is not possible to get an accurate count how often
-functions are called.
-
-@end itemize
-
-@c -- A new node --------------------------------------------------------------
-@node    Steps Needed to Create a Profile
-@section Steps Needed to Create a Profile
-@c ----------------------------------------------------------------------------
-
-Creating a profile takes two steps. First the profile data needs to be 
-generated. This is followed by a viewing step to create a report from the
-information that has been gathered.
-
-Every @ProductName{} command starts with @ToolName{}, the name of the driver. This is followed
-by a keyword to define the high level functionality. Depending on this
-keyword, a third qualifier may be needed to further narrow down the request. 
-This combination is then followed by options that are specific to the functionality
-desired.
-
-The command to gather, or ``collect'', the performance data is called 
-@CollectApp{}. Aside from numerous options, this command takes the name
-of the target executable as an input parameter.
-
-Upon completion of the run, the performance data can be
-found in the newly created 
-@cindex Experiment directory
-experiment directory.
-
-Unless explicitly specified otherwise, a default
-name for this directory is chosen. The name is @code{test.<n>.er} where
-@code{n} is the first integer number not in use yet for such a name.
-
-For example, the first time @CollectApp{} is invoked, an experiment
-directory with the name @code{test.1.er} is created.
-
-Upon a subsequent invocation of @CollectApp{} in the same directory,
-an experiment directory with the name @code{test.2.er} will be created, 
-and so forth.
-
-Note that @CollectApp{} supports an option to explicitly name the experiment directory.
-Outside of the restriction that the name of this directory has to end
-with @code{.er}, any valid directory name can be used for this.
-
-Now that we have the performance data, the next step is to display it.
-
-@pindex @DisplayText{}
-The most commonly used command to view the performance information is 
-@DisplayText{}. This is a very extensive and customizable tool that 
-produces the information in ASCII format. 
-
-@pindex @DisplayHTML{}
-Another option is to use @DisplayHTML{}. This tool generates a directory with 
-files in html format. These can be viewed in a browser, allowing for easy 
-navigation through the profile data.
-
-@c -- A new node --------------------------------------------------------------
-@node    A Mini Tutorial 
-@chapter A Mini Tutorial 
-@c ----------------------------------------------------------------------------
-
-In this chapter we present and discuss the main functionality of @ToolName{}.
-This will be a practical approach, using an example code to generate profile
-data and show how to get various performance reports. 
-
-@menu
-* Getting Started::                 The basics of profiling with @ProductName().
-* Support for Multithreading::      Commands specific to multithreaded applications.
-* Viewing Multiple Experiments::    Analyze multiple experiments.
-* Profile Hardware Event Counters:: How to use hardware event counters.
-* Java Profiling::                  How to profile a Java application.
-@end menu
-
-@c -- A new node --------------------------------------------------------------
-@node    Getting Started
-@section Getting Started
-@c ----------------------------------------------------------------------------
-
-The information presented here provides a good and common basis for many 
-profiling tasks, but there are more features that you may want to leverage.
-
-These are covered in subsequent sections in this chapter.
-
-@menu
-* The Example Program::                        A description of the example program used.
-* A First Profile::                            How to get the first profile.
-* The Source Code View::                       Display the metrics in the source code.
-* The Disassembly View::                       Display the metrics at the instruction level.
-* Display and Define the Metrics::             An example how to customize the metrics.
-* A First Customization of the Output::        An example how to customize the output.
-* Name the Experiment Directory::              Change the name of the experiment directory.
-* Control the Number of Lines in the Output::  Change the number of lines in the tables.
-* Sorting the Performance Data::               How to set the metric to sort by.
-* Scripting::                                  Use a script to execute the commands.
-* A More Elaborate Example::                   An example of customization.
-* The Call Tree::                              Display the dynamic call tree.
-* More Information on the Experiment::         How to get additional statistics.
-* Control the Sampling Frequency::             How to control the sampling granularity.
-* Information on Load Objects::                How to get more information on load objects.
-@end menu
-
-@c -- A new node --------------------------------------------------------------
-@node       The Example Program
-@subsection The Example Program
-@c ----------------------------------------------------------------------------
-
-Throughout this guide we use the same example C code that implements the 
-multiplication of a vector of length @math{n} by an @math{m} by @math{n}
-matrix. The result is stored in a vector of length @math{m}. 
-@cindex Pthreads
-@cindex Posix Threads
-The algorithm has been parallelized using Posix Threads, or Pthreads for short.
-
-The code was built using the @code{gcc} compiler and the name of the executable 
-is
-@cindex mxv-pthreads.exe
-mxv-pthreads.exe.
-
-The matrix sizes can be set through the @code{-m} and @code{-n} options. The
-number of threads is set with the @code{-t} option. To increase the duration
-of the run, the multiplication is executed repeatedly. 
-
-This is an example that multiplies a @math{3000} by @math{2000} matrix with
-a vector of length @math{2000} using @math{2} threads:
-
-@smallexample
-@verbatim
-$ ./mxv-pthreads.exe -m 3000 -n 2000 -t 2
-mxv: error check passed - rows = 3000 columns = 2000 threads = 2
-$
-@end verbatim
-@end smallexample
-
-The program performs an internal check to verify the results are correct.
-The result of this check is printed, followed by the matrix sizes and the 
-number of threads used.
-
-@c -- A new node --------------------------------------------------------------
-@node       A First Profile
-@subsection A First Profile
-@c ----------------------------------------------------------------------------
-
-The first step is to collect the performance data. It is important to remember
-that much more information is gathered than may be shown by default. Often a
-single data collection run is sufficient to get a lot of insight.
-
-The @CollectApp{} command is used for the data collection. Nothing needs to be
-changed in the way the application is executed. The only difference is that it
-is now run under control of the tool, as shown below:
-
-@cartouche
-@smallexample
-$ gprofng collect app ./mxv.pthreads.exe -m 3000 -n 2000 -t 1
-@end smallexample
-@end cartouche
-
-This command produces the following output:
-
-@smallexample
-@verbatim
-Creating experiment database test.1.er (Process ID: 2416504) ...
-mxv: error check passed - rows = 3000 columns = 2000 threads = 1
-@end verbatim
-@end smallexample
-
-We see the message that a directory with the name @code{test.1.er} 
-has been created. 
-The application then completes as usual and we have our first experiment 
-directory that can be analyzed.
-
-The tool we use for this is called @DisplayText{}. It takes the name of
-the experiment directory as an argument.
-
-@cindex Interpreter mode 
-If invoked this way, the tool starts in the interactive @emph{interpreter} mode.
-While in this environment, commands can be given and the tool responds. This is
-illustrated below:
-
-@smallexample
-@verbatim
-$ gprofng display text test.1.er
-Warning: History and command editing is not supported on this system.
-(gp-display-text) quit
-$
-@end verbatim
-@end smallexample
-
-@cindex Command line mode 
-While useful in certain cases, we prefer to use this tool in command line mode,
-by specifying the commands to be issued when invoking the tool. The way to do
-this is to prepend the command with a hyphen (@code{-}) if used on the command
-line.
-
-For example,
-@IndexSubentry{Commands, @code{functions}}
-with the @code{functions} command we request a list of the functions that 
-have been executed and their respective CPU times:
-
-@cartouche
-@smallexample
-$ gprofng display text -functions test.1.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-$ gprofng display text -functions test.1.er
-Functions sorted by metric: Exclusive Total CPU Time
-
-Excl.     Incl.      Name
-Total     Total
-CPU sec.  CPU sec.
-2.272     2.272      <Total>
-2.160     2.160      mxv_core
-0.047     0.103      init_data
-0.030     0.043      erand48_r
-0.013     0.013      __drand48_iterate
-0.013     0.056      drand48
-0.008     0.010      _int_malloc
-0.001     0.001      brk
-0.001     0.002      sysmalloc
-0.        0.001      __default_morecore
-0.        0.113      __libc_start_main
-0.        0.010      allocate_data
-0.        2.160      collector_root
-0.        2.160      driver_mxv
-0.        0.113      main
-0.        0.010      malloc
-0.        0.001      sbrk
-@end verbatim
-@end smallexample
-
-As easy and simple as these steps are, we do have a first profile of our program!
-There are three columns. The first two contain the 
-@cindex Total CPU time
-@emph{Total CPU Time}, 
-which 
-is the sum of the user and system time. @xref{Inclusive and Exclusive Metrics}
-for an explanation of ``exclusive'' and ``inclusive'' times.
-
-The first line echoes the metric that is used to sort the output. By default, this
-is the exclusive CPU time, but the sort metric can be changed by the user.
-
-We then see three columns with the exclusive and inclusive CPU times, plus the
-name of the function.
-
-@IndexSubentry{Miscellaneous, @code{<Total>}}
-The function with the name @code{<Total>} is not a user function, but is introduced
-by @ToolName{} and is used to display the accumulated metric values. In this case,
-we see that the total CPU time of this job was @code{2.272} seconds.
-
-With @code{2.160} seconds, function @code{mxv_core} is the most time 
-consuming function. It is also a leaf function.
-
-The next function in the list is @code{init_data}. Although the CPU time spent in
-this part is negligible, this is an interesting entry because the inclusive CPU
-time of @code{0.103} seconds is higher than the exclusive CPU time of @code{0.047}
-seconds. Clearly it is calling another function,
-or even more than one function. 
-@xref{The Call Tree} for the details how to get more information on this.
-
-The function @code{collector_root} does not look familiar. It is one of the internal
-functions used by @CollectApp{} and can be ignored. While the inclusive time is high,
-the exclusive time is zero. This means it doesn't contribute to the performance.
-
-The question is how we know where this function originates from? There is a very useful
-command to get more details on a function. @xref{Information on Load Objects}.
-
-@c -- A new node --------------------------------------------------------------
-@node       The Source Code View
-@subsection The Source Code View
-@c ----------------------------------------------------------------------------
-
-In general, you would like to focus the tuning efforts on the most time
-consuming part(s) of the program. In this case that is easy, since 2.160
-seconds on a total of 2.272 seconds is spent in function @code{mxv_core}. 
-That is 95% of the total and it is time to dig deeper and look
-@cindex Source level timings
-at the time distribution at the source code level.
-
-@IndexSubentry{Commands, @code{source}}
-The @code{source} command is used to accomplish this. It takes the name of the
-function, not the source filename, as an argument. This is demonstrated
-below, where the @DisplayText{} command is used to show the annotated
-source listing of function @code{mxv_core}.
-
-Please note that the source code has to be compiled with the @code{-g}
-option in order for the source code feature to work. Otherwise the
-location can not be determined.
-
-@cartouche
-@smallexample
-$ gprofng display text -source mxv_core test.1.er
-@end smallexample
-@end cartouche
-
-The slightly modified output is as follows:
-
-@smallexample
-@verbatim
-Source file: <apath>/mxv.c
-Object file: mxv-pthreads.exe (found as test.1.er/archives/...)
-Load Object: mxv-pthreads.exe (found as test.1.er/archives/...)
-
-   Excl.     Incl.
-   Total     Total
-   CPU sec.  CPU sec.
-
-   <lines deleted>
-                               <Function: mxv_core>
-   0.        0.             32. void __attribute__ ((noinline)) 
-                                mxv_core (
-                                uint64_t row_index_start, 
-                                uint64_t row_index_end,
-                                uint64_t m, uint64_t n, 
-                                double **restrict A,
-                                double *restrict b, 
-                                double *restrict c)
-   0.        0.             33. {
-   0.        0.             34.    for (uint64_t i=row_index_start; 
-                                        i<=row_index_end; i++) {
-   0.        0.             35.       double row_sum = 0.0;
-## 1.687     1.687          36.       for (int64_t j=0; j<n; j++)
-   0.473     0.473          37.          row_sum += A[i][j]*b[j];
-   0.        0.             38.       c[i] = row_sum;
-                            39.    }
-   0.        0.             40. }
-@end verbatim
-@end smallexample
-
-The first three lines provide information on the location of the source file,
-the object file and the load object (@xref{Load Objects and Functions}).
-
-Function @code{mxv_core} is part of a source file that has other functions
-as well. These functions will be shown, but without timing information. They
-have been removed in the output shown above.
-
-This is followed by the annotated source code listing. The selected metrics 
-are shown first, followed by a source line number, and the source code.
-@IndexSubentry{Miscellaneous ,@code{##}}
-The most time consuming line(s) are marked with the @code{##} symbol. In
-this way they are easier to find.
-
-What we see is that all of the time is spent in lines 36-37. 
-
-@IndexSubentry{Commands, @code{lines}}
-A related command sometimes comes handy as well. It is called @code{lines}
-and displays a list of the source lines and their metrics, ordered according
-to the current sort metric (@xref{Sorting the Performance Data}).
-
-Below the command and the output. For lay-out reasons, only the top 10 is 
-shown here and the last part of the text on some lines has been replaced
-by dots.
-
-@cartouche
-@smallexample
-$ gprofng display text -lines test.1.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Lines sorted by metric: Exclusive Total CPU Time
-
-Excl.     Incl.  Name
-Total     Total
-CPU sec.  CPU sec.
-2.272     2.272  <Total>
-1.687     1.687  mxv_core, line 36 in "mxv.c"
-0.473     0.473  mxv_core, line 37 in "mxv.c"
-0.032     0.088  init_data, line 72 in "manage_data.c"
-0.030     0.043  <Function: erand48_r, instructions without line numbers>
-0.013     0.013  <Function: __drand48_iterate, instructions without ...>
-0.013     0.056  <Function: drand48, instructions without line numbers>
-0.012     0.012  init_data, line 77 in "manage_data.c"
-0.008     0.010  <Function: _int_malloc, instructions without ...>
-0.003     0.003  init_data, line 71 in "manage_data.c"
-@end verbatim
-@end smallexample
-
-What this overview immediately highlights is that the next most time consuming
-source line takes 0.032 seconds only. With an inclusive time of 0.088 seconds,
-it is also clear that this branch of the code does not impact the performance.
-
-@c -- A new node --------------------------------------------------------------
-@node       The Disassembly View
-@subsection The Disassembly View
-@c ----------------------------------------------------------------------------
-
-The source view is very useful to obtain more insight where the time is spent,
-but sometimes this is not sufficient. This is when the disassembly view comes
-in. It is activated with the 
-@IndexSubentry{Commands, @code{disasm}}
-@code{disasm} 
-command and as with the source view, it displays an annotated listing. In this
-@cindex Instruction level timings
-case it shows the instructions with the metrics, interleaved with the
-source lines. The
-instructions have a reference in square brackets (@code{[} and @code{]})
-to the source line they correspond to.
-
-@noindent
-This is what we get for our example:
-
-@cartouche
-@smallexample
-$ gprofng display text -disasm mxv_core test.1.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Source file: <apath>/mxv.c
-Object file: mxv-pthreads.exe (found as test.1.er/archives/...)
-Load Object: mxv-pthreads.exe (found as test.1.er/archives/...)
-
-   Excl.     Incl.
-   Total     Total
-   CPU sec.  CPU sec.
-
-   <lines deleted>
-                        32. void __attribute__ ((noinline)) 
-                            mxv_core (
-                            uint64_t row_index_start, 
-                            uint64_t row_index_end,
-                            uint64_t m, uint64_t n, 
-                            double **restrict A,
-                            double *restrict b, 
-                            double *restrict c)
-                        33. {
-                            <Function: mxv_core>
-   0.        0.             [33]   4021ba:  mov    0x8(%rsp),%r10
-                        34.    for (uint64_t i=row_index_start;
-                                    i<=row_index_end; i++) {
-   0.        0.             [34]   4021bf:  cmp    %rsi,%rdi
-   0.        0.             [34]   4021c2:  jbe    0x37
-   0.        0.             [34]   4021c4:  ret
-                        35.        double row_sum = 0.0;
-                        36.        for (int64_t j=0; j<n; j++)
-                        37.           row_sum += A[i][j]*b[j];
-   0.        0.             [37]   4021c5:  mov    (%r8,%rdi,8),%rdx
-   0.        0.             [36]   4021c9:  mov    $0x0,%eax
-   0.        0.             [35]   4021ce:  pxor   %xmm1,%xmm1
-   0.002     0.002          [37]   4021d2:  movsd  (%rdx,%rax,8),%xmm0
-   0.096     0.096          [37]   4021d7:  mulsd  (%r9,%rax,8),%xmm0
-   0.375     0.375          [37]   4021dd:  addsd  %xmm0,%xmm1
-## 1.683     1.683          [36]   4021e1:  add    $0x1,%rax
-   0.004     0.004          [36]   4021e5:  cmp    %rax,%rcx
-   0.        0.             [36]   4021e8:  jne    0xffffffffffffffea
-                        38.        c[i] = row_sum;
-   0.        0.             [38]   4021ea:  movsd  %xmm1,(%r10,%rdi,8)
-   0.        0.             [34]   4021f0:  add    $0x1,%rdi
-   0.        0.             [34]   4021f4:  cmp    %rdi,%rsi
-   0.        0.             [34]   4021f7:  jb     0xd
-   0.        0.             [35]   4021f9:  pxor   %xmm1,%xmm1
-   0.        0.             [36]   4021fd:  test   %rcx,%rcx
-   0.        0.             [36]   402200:  jne    0xffffffffffffffc5
-   0.        0.             [36]   402202:  jmp    0xffffffffffffffe8
-                        39.    }
-                        40. }
-   0.        0.             [40]   402204:  ret
-@end verbatim
-@end smallexample
-
-For each instruction, the timing values are given and we can exactly which ones
-are the most expensive. As with the source level view, the most expensive 
-instructions are market with the @code{##} symbol.
-
-As illustrated below and similar to the @code{lines} command, we can get 
-an overview of the instructions executed by using the 
-@IndexSubentry{Commands, @code{pcs}}
-@code{pcs} 
-command. 
-
-@noindent
-Below the command and the output, which again has been restricted
-to 10 lines:
-
-@cartouche
-@smallexample
-$ gprofng display text -pcs test.1.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-PCs sorted by metric: Exclusive Total CPU Time
-
-Excl.     Incl.      Name
-Total     Total
-CPU sec.  CPU sec.
-2.272     2.272  <Total>
-1.683     1.683  mxv_core + 0x00000027, line 36 in "mxv.c"
-0.375     0.375  mxv_core + 0x00000023, line 37 in "mxv.c"
-0.096     0.096  mxv_core + 0x0000001D, line 37 in "mxv.c"
-0.027     0.027  init_data + 0x000000BD, line 72 in "manage_data.c"
-0.012     0.012  init_data + 0x00000117, line 77 in "manage_data.c"
-0.008     0.008  _int_malloc + 0x00000A45
-0.007     0.007  erand48_r + 0x00000062
-0.006     0.006  drand48 + 0x00000000
-0.005     0.005  __drand48_iterate + 0x00000005
-@end verbatim
-@end smallexample
-
-@c -- A new node --------------------------------------------------------------
-@node       Display and Define the Metrics
-@subsection Display and Define the Metrics
-@c ----------------------------------------------------------------------------
-
-The default metrics shown by @DisplayText{} are useful, but there is more
-recorded than displayed. We can customize the values shown by defining the 
-metrics ourselves.
-
-@IndexSubentry{Commands, @code{metric_list}}
-There are two commands related to changing the metrics shown: @code{metric_list}
-and 
-@IndexSubentry{Commands, @code{metrics}}
-@code{metrics}.
-
-The first command shows the metrics in use, plus all the metrics that have 
-been stored as part of the experiment. The second command may be used to
-define the metric list.
-
-In our example we get the following values for the metrics:
-
-@IndexSubentry{Commands, @code{metric_list}}
-@cartouche
-@smallexample
-$ gprofng display text -metric_list test.1.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Current metrics: e.totalcpu:i.totalcpu:name
-Current Sort Metric: Exclusive Total CPU Time ( e.totalcpu )
-Available metrics:
-   Exclusive Total CPU Time: e.%totalcpu
-   Inclusive Total CPU Time: i.%totalcpu
-                       Size: size
-                 PC Address: address
-                       Name: name
-@end verbatim
-@end smallexample
-
-This shows the metrics currently in use, the metric that is used to sort
-the data and all the metrics that have been recorded, but are not necessarily
-shown.
-
-@cindex Default metrics
-In this case, the default metrics are set to the exclusive and inclusive
-total CPU times, plus the name of the function, or load object.
-
-@IndexSubentry{Commands, @code{metrics}}
-The @code{metrics} command is used to define the metrics that need to be
-displayed. 
-
-For example, to display the exclusive total CPU time, both as a number and a
-percentage, use the following metric definition: @code{e.%totalcpu}
-
-Since the metrics can be tailored for different views, there is a way
-to reset them to the default. This is done through the special keyword
-@code{default}.
-
-@c -- A new node --------------------------------------------------------------
-@node    A First Customization of the Output
-@subsection A First Customization of the Output
-@c ----------------------------------------------------------------------------
-
-With the information just given, we can customize the function overview. 
-For sake of the example, we would like to display the name of the function
-first, followed by the exclusive CPU time, given as an absolute number and 
-a percentage.
-
-Note that the commands are parsed in order of appearance. This is why we
-need to define the metrics @emph{before} requesting the function overview:
-
-@cartouche
-@smallexample
-$ gprofng display text -metrics name:e.%totalcpu -functions test.1.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Current metrics: name:e.%totalcpu
-Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
-Functions sorted by metric: Exclusive Total CPU Time
-
-Name                Excl. Total
-                    CPU
-                     sec.      %
- <Total>            2.272 100.00
- mxv_core           2.160  95.04
- init_data          0.047   2.06
- erand48_r          0.030   1.32
- __drand48_iterate  0.013   0.57
- drand48            0.013   0.57
- _int_malloc        0.008   0.35
- brk                0.001   0.04
- sysmalloc          0.001   0.04
- __default_morecore 0.      0.
- __libc_start_main  0.      0.
- allocate_data      0.      0.
- collector_root     0.      0.
- driver_mxv         0.      0.
- main               0.      0.
- malloc             0.      0.
- sbrk               0.      0.
-@end verbatim
-@end smallexample
-
-This was a first and simple example how to customize the output. Note that we
-did not rerun our profiling job and merely modified the display settings.
-Below we will show other and also more advanced examples of customization.
-
-
-@c -- A new node --------------------------------------------------------------
-@node    Name the Experiment Directory
-@subsection Name the Experiment Directory
-@c ----------------------------------------------------------------------------
-
-When using @CollectApp{}, the default names for experiments work fine, but
-they are quite generic. It is often more convenient to select a more 
-descriptive name. For example, one that reflects conditions for the experiment 
-conducted.
-
-For this, the mutually exclusive @code{-o} and @code{-O} options come in handy. 
-Both may be used to provide a name for the experiment directory, but the
-behaviour of @CollectApp{} is different.
-
-With the 
-@IndexSubentry{Options, @code{-o}}
-@code{-o} 
-option, an existing experiment directory is not overwritten. You either
-need to explicitly remove an existing directory first, or use a name that is not
-in use yet.
-
-This is in contrast with the behaviour for the
- @IndexSubentry{Options, @code{-O}}
-@code{-O} 
-option. Any existing (experiment) directory with the same name is silently 
-overwritten.
-
-Be aware that the name of the experiment directory has to end with @code{.er}.
-
-@c -- A new node --------------------------------------------------------------
-@node    Control the Number of Lines in the Output
-@subsection Control the Number of Lines in the Output
-@c ----------------------------------------------------------------------------
-
-@IndexSubentry{Commands, @code{limit}}
-The @code{limit <n>} command can be used to control the number of lines printed
-in various overviews, including the function view, but it also takes effect
-for other display commands, like @code{lines}.
-
-The argument @code{<n>} should be a positive integer number. It sets the number
-of lines in the function view. A value of zero resets the limit to the default.
-
-Be aware that the pseudo-function @code{<Total>} counts as a regular function.
-For example @code{limit 10} displays nine user level functions.
-
-@c -- A new node --------------------------------------------------------------
-@node    Sorting the Performance Data
-@subsection Sorting the Performance Data
-@c ----------------------------------------------------------------------------
-
-@IndexSubentry{Commands, @code{sort}}
-The @code{sort <key>} command sets the key to be used when sorting the 
-performance data.
-
-The key is a valid metric definition, but the
-@cindex Visibility field
-visibility field 
-(@xref{Metric Definitions})
-in the metric
-definition is ignored since this does not affect the outcome of the sorting
-operation.
-For example if we set the sort key to @code{e.totalcpu}, the values
-will be sorted in descending order with respect to the exclusive total
-CPU time.
-
-The data can be sorted in reverse order by prepending the metric definition
-with a minus (@code{-}) sign. For example @code{sort -e.totalcpu}.
-
-A default metric for the sort operation has been defined and since this is 
-a persistent command, this default can be restored with @code{default} as 
-the key.
-
-@c -- A new node --------------------------------------------------------------
-@node    Scripting
-@subsection Scripting
 @c ----------------------------------------------------------------------------
-
-As is probably clear by now, the list with commands for @DisplayText{} can be
-very long. This is tedious and also error prone. Luckily, there is an easier and 
-more elegant way to control the behaviour of this tool.
-
-@IndexSubentry{Commands, @code{script}}
-Through the @code{script} command, the name of a file with commands can be
-passed in. These commands are parsed and executed as if they appeared on
-the command line in the same order as encountered in the file. The commands
-in this script file can actually be mixed with commands on the command line.
-
-The difference between the commands in the script file and those used on the
-command line is that the latter require a leading dash (@code{-}) symbol.
-
-Comment lines are supported. They need to start with the @code{#} symbol.
-
-@c -- A new node --------------------------------------------------------------
-@node       A More Elaborate Example
-@subsection A More Elaborate Example
+@c NOTES section
 @c ----------------------------------------------------------------------------
 
-With the information presented so far, we can customize our data
-gathering and display commands.
-
-As an example, to reflect the name of the algorithm and the number of threads 
-that were used in the experiment, we select @code{mxv.1.thr.er} 
-as the name of the experiment directory.
-All we then need to 
-do is to add the 
- @IndexSubentry{Options, @code{-O}}
-@code{-O} 
-option followed by this name on the command line when running @CollectApp{}:
-
-@cartouche
-@smallexample
-$ exe=mxv-pthreads.exe
-$ m=3000
-$ n=2000
-$ gprofng collect app -O mxv.1.thr.er ./$exe -m $m -n $n -t 1
-@end smallexample
-@end cartouche
-
-The commands to generate the profile are put into a file that we simply call
-@code{my-script}:
-
-@smallexample
-@verbatim
-$ cat my-script
-# This is my first gprofng script
-# Set the metrics
-metrics i.%totalcpu:e.%totalcpu:name
-# Use the exclusive time to sort
-sort e.totalcpu
-# Limit the function list to 5 lines
-limit 5
-# Show the function list
-functions
-@end verbatim
-@end smallexample
-
-This script file is then specified as input to the @DisplayText{} command 
-that is used to display the performance information stored in 
-@code{mxv.1.thr.er}:
-
-@cartouche
-@smallexample
-$ gprofng display text -script my-script mxv.1.thr.er
-@end smallexample
-@end cartouche
-
-The command above produces the following output:
-
-@smallexample
-@verbatim
-# This is my first gprofng script
-# Set the metrics
-Current metrics: i.%totalcpu:e.%totalcpu:name
-Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
-# Use the exclusive time to sort
-Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
-# Limit the function list to 5 lines
-Print limit set to 5
-# Show the function list
-Functions sorted by metric: Exclusive Total CPU Time
-
-Incl. Total   Excl. Total    Name
-CPU           CPU
- sec.      %   sec.      %
-2.272 100.00  2.272 100.00   <Total>
-2.159  95.00  2.159  95.00   mxv_core
-0.102   4.48  0.054   2.37   init_data
-0.035   1.54  0.025   1.10   erand48_r
-0.048   2.11  0.013   0.57   drand48
-@end verbatim
-@end smallexample
-
-In the first part of the output, our comment lines in the script file are 
-shown. These are interleaved with an acknowledgement message for the commands.
-
-This is followed by a profile consisting of 5 lines only. For both metrics,
-the percentages plus the timings are given. The numbers are sorted with respect 
-to the exclusive total CPU time.
-
-It is now immediately clear that function @code{mxv_core} is responsbile for
-95% of the CPU time and @code{init_data} takes 4.5% only.
-
-This is also where we see sampling in action. Although this is exactly the
-same job we profiled before, the timings are somewhat different, but the
-differences are very small.
-
-@c -- A new node --------------------------------------------------------------
-@node       The Call Tree
-@subsection The Call Tree
-@c ----------------------------------------------------------------------------
-
-The call tree shows the dynamic hierarchy of the application by displaying the
-functions executed and their parent. It helps to find the most expensive path
-in the program.
-
-@IndexSubentry{Commands, @code{calltree}}
-This feature is enabled through the @code{calltree} command. This is how to get
-this tree for our current experiment:
-
-@cartouche
-@smallexample
-$ gprofng display text -calltree mxv.1.thr.er
-@end smallexample
-@end cartouche
-
-This displays the following structure:
-
-@smallexample
-@verbatim
-Functions Call Tree. Metric: Attributed Total CPU Time
-
-Attr.      Name
-Total
-CPU sec.
-2.272      +-<Total>
-2.159        +-collector_root
-2.159        |  +-driver_mxv
-2.159        |    +-mxv_core
-0.114        +-__libc_start_main
-0.114          +-main
-0.102            +-init_data
-0.048            |  +-drand48
-0.035            |    +-erand48_r
-0.010            |      +-__drand48_iterate
-0.011            +-allocate_data
-0.011            |  +-malloc
-0.011            |    +-_int_malloc
-0.001            |      +-sysmalloc
-0.001            +-check_results
-0.001              +-malloc
-0.001                +-_int_malloc
-@end verbatim
-@end smallexample
-
-At first sight this may not be what you expected and some explanation is in
-place.
-
-@c ----------------------------------------------------------------------------
-@c TBD: Revise this text when we have user and machine mode.
-@c ----------------------------------------------------------------------------
-First of all, function @code{collector_root} is internal to @ToolName{} and
-should be hidden to the user. This is part of a planned future enhancement.
-
-Recall that the @code{objects} and @code{fsingle} commands are very useful
-to find out more about load objects in general, but also to help identify
-an unknown entry in the function overview. @xref{Load Objects and Functions}.
-
-Another thing to note is that there are two main branches. The one under
-@code{collector_root} and the second one under @code{__libc_start_main}.
-This reflects the fact that we are executing a parallel program. Even though
-we only used one thread for this run, this is still executed in a separate
-path.
-
-The main, sequential part of the program is displayed under @code{main} and
-shows the functions called and the time they took.
-
-There are two things worth noting for the call tree feature:
-
-@itemize
-
-@item
-This is a dynamic tree and since sampling is used, it most likely looks
-slighlty different across seemingly identical profile runs. In case the
-run times are short, it is worth considering to use a high resolution
-through the 
-@IndexSubentry{Options, @code{-p}}
-@code{-p} 
-option. For example to use @code{-p hi} to increase the sampling rate.
-
-@item
-In case hardware event counters have been enabled 
-(@xref{Profile Hardware Event Counters}), these values are also displayed
-in the call tree view.
-
-@end itemize
-
-@c -- A new node --------------------------------------------------------------
-@node       More Information on the Experiment
-@subsection More Information on the Experiment
-@c ----------------------------------------------------------------------------
-
-The experiment directory not only contains performance related data. Several
-system characteristics, the actually command executed, and some global 
-performance statistics can be displayed.
-
-@IndexSubentry{Commands, @code{header}}
-The @code{header} command displays information about the experiment(s).
-For example, this is the command to extract this data from for our experiment 
-directory:
-
-@cartouche
-@smallexample
-$ gprofng display text -header mxv.1.thr.er
-@end smallexample
-@end cartouche
-
-The above command prints the following information. Note that some of the
-lay-out and the information has been modified. The textual changes are 
-marked with the @code{<} and @code{>} symbols.
-
-@smallexample
-@verbatim
-Experiment: mxv.1.thr.er
-No errors
-No warnings
-Archive command `gp-archive -n -a on 
-         --outfile <exp_dir>/archive.log <exp_dir>'
-
-Target command (64-bit): './mxv-pthreads.exe -m 3000 -n 2000 -t 1'
-Process pid 30591, ppid 30589, pgrp 30551, sid 30468
-Current working directory: <cwd>
-Collector version: `2.36.50'; experiment version 12.4 (64-bit)
-Host `<hostname>', OS `Linux <version>', page size 4096, 
-     architecture `x86_64'
-  16 CPUs, clock speed 1995 MHz.
-  Memory: 30871514 pages @  4096 = 120591 MB.
-Data collection parameters:
-  Clock-profiling, interval = 997 microsecs.
-  Periodic sampling, 1 secs.
-  Follow descendant processes from: fork|exec|combo
-
-Experiment started <date and time>
-
-Experiment Ended: 2.293162658
-Data Collection Duration: 2.293162658
-@end verbatim
-@end smallexample
-
-The output above may assist in troubleshooting, or to verify some of the
-operational conditions and we recommand to include this command when 
-generating a profile.
-
-@IndexSubentry{Options, @code{-C}}
-Related to this command there is a useful option to record your own comment(s) in 
-an experiment.
-To this end, use the @code{-C} option on the @CollectApp{} tool to
-specify a comment string. Up to ten comment lines can be included. 
-These comments are displayed with the @code{header} command on
-the @DisplayText{} tool.
-
-@IndexSubentry{Commands, @code{overview}}
-The @code{overview} command displays information on the experiment(s) and also
-shows a summary of the values for the metric(s) used. This is an example how to
-use it on our newly created experiment directory:
-
-@cartouche
-@smallexample
-$ gprofng display text -overview mxv.1.thr.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Experiment(s):
-
-Experiment      :mxv.1.thr.er
-  Target        : './mxv-pthreads.exe -m 3000 -n 2000 -t 1'
-  Host          : <hostname> (<ISA>, Linux <version>)
-  Start Time    : <date and time>
-  Duration      : 2.293 Seconds
-
-Metrics:
-
-  Experiment Duration (Seconds): [2.293]
-  Clock Profiling
-    [X]Total CPU Time - totalcpu (Seconds): [*2.272]
-
-Notes: '*' indicates hot metrics, '[X]' indicates currently enabled 
-       metrics.
-       The metrics command can be used to change selections. The 
-       metric_list command lists all available metrics.
-@end verbatim
-@end smallexample
-
-This command provides a dashboard overview that helps to easily identify
-where the time is spent and in case hardware event counters are used, it
-shows their total values.
-
-@c -- A new node --------------------------------------------------------------
-@node       Control the Sampling Frequency
-@subsection Control the Sampling Frequency
-@c ----------------------------------------------------------------------------
-
-So far we did not talk about the frequency of the sampling process, but in
-some cases it is useful to change the default of 10 milliseconds.
-
-The advantage of increasing the sampling frequency is that functions that
-do not take much time per invocation are more accurately captured. The
-downside is that more data is gathered. This has an impact on the overhead
-of the collection process and more disk space is required. 
-
-In general this is not an immediate concern, but with heavily threaded
-applications that run for an extended period of time, increasing the 
-frequency may have a more noticeable impact.
-
-@IndexSubentry{Options, @code{-p}}
-The @code{-p} option on the @CollectApp{} tool is used to enable or disable
-clock based profiling, or to explicitly set the sampling rate. 
-@cindex Sampling interval
-This option takes one of the following keywords:
-
-@table @code
-
-@item off
-Disable clock based profiling.
-
-@item on
-Enable clock based profiling with a per thread sampling interval of 10 ms. This is the default.
-
-@item lo
-Enable clock based profiling with a per thread sampling interval of 100 ms.
-
-@item hi
-Enable clock based profiling with a per thread sampling interval of 1 ms.
-
-@item <value>
-Enable clock based profiling with a per thread sampling interval of <value>. 
-
-@end table 
-
-One may wonder why there is an option to disable clock based profiling. This
-is because by default, it is enabled when conducting hardware event counter
-experiments (@xref{Profile Hardware Event Counters}).
-With the @code{-p off} option, this can be disabled.
-
-If an explicit value is set for the sampling, the number can be an integer or a 
-floating-point number.
-A  suffix of @code{u} for microseconds, or @code{m} for milliseconds is supported. 
-If no suffix is used, the value is assumed to be in milliseconds.
-
-If the value is smaller than the clock profiling minimum, a warning message is issued
-and it is set to the minimum.
-In case it is not a multiple of the clock profiling resolution, it is silently rounded 
-down to the nearest multiple of the clock resolution. 
-
-If the value exceeds the clock profiling maximum, is negative, or zero, an error is 
-reported.
-
-@IndexSubentry{Commands, @code{header}}
-Note that the @code{header} command echoes the sampling rate used.
-
-@c -- A new node --------------------------------------------------------------
-@node    Information on Load Objects
-@subsection Information on Load Objects
-@c ----------------------------------------------------------------------------
-
-It may happen that the function list contains a function that is not known to 
-the user. This can easily happen with library functions for example.
-Luckily there are three commands that come in handy then. 
-
-@IndexSubentry{Commands, @code{objects}}
-@IndexSubentry{Commands, @code{fsingle}}
-@IndexSubentry{Commands, @code{fsummary}}
-These commands are @code{objects}, @code{fsingle}, and @code{fsummary}. 
-They provide details on
-@cindex Load objects
-load objects (@xref{Load Objects and Functions}).
-
-The @code{objects} command lists all load objects that have been referenced 
-during the performance experiment.
-Below we show the command and the result for our profile job. Like before, 
-the (long) path names in the output have been shortened and replaced by the 
-@IndexSubentry{Miscellaneous, @code{<apath>}}
-@code{<apath>} symbol that represents an absolute directory path.
-
-@cartouche
-@smallexample
-$ gprofng display text -objects mxv.1.thr.er
-@end smallexample
-@end cartouche
-
-The output includes the name and path of the target executable:
-
-@smallexample
-@verbatim
- <Unknown> (<Unknown>)
- <mxv-pthreads.exe> (<apath>/mxv-pthreads.exe)
- <librt-2.17.so> (/usr/lib64/librt-2.17.so)
- <libdl-2.17.so> (/usr/lib64/libdl-2.17.so)
- <libbfd-2.36.50.20210505.so> (<apath>/libbfd-2.36.50 <etc>)
- <libopcodes-2.36.50.20210505.so> (<apath>/libopcodes-2. <etc>)
- <libc-2.17.so> (/usr/lib64/libc-2.17.so)
- <libpthread-2.17.so> (/usr/lib64/libpthread-2.17.so)
- <libm-2.17.so> (/usr/lib64/libm-2.17.so)
- <libgp-collector.so> (<apath>/libgp-collector.so)
- <ld-2.17.so> (/usr/lib64/ld-2.17.so)
- <DYNAMIC_FUNCTIONS> (DYNAMIC_FUNCTIONS)
-@end verbatim
-@end smallexample
-
-@IndexSubentry{Commands, @code{fsingle}}
-The @code{fsingle} command may be used to get more details on a specific entry 
-in the function view, say. For example, the command below provides additional
-information on the @code{collector_root} function shown in the function overview.
-
-@cartouche
-@smallexample
-$ gprofng display text -fsingle collector_root mxv.1.thr.er
-@end smallexample
-@end cartouche
-
-Below the output from this command. It has been somewhat modified to match the
-display requirements.
-
-@smallexample
-@verbatim
-collector_root
-  Exclusive Total CPU Time: 0.    (  0. %)
-  Inclusive Total CPU Time: 2.159 ( 95.0%)
-            Size:   401
-      PC Address: 10:0x0001db60
-     Source File: <apath>/dispatcher.c
-     Object File: mxv.1.thr.er/archives/libgp-collector.so_HpzZ6wMR-3b
-     Load Object: <apath>/libgp-collector.so
-    Mangled Name:
-         Aliases:
-@end verbatim
-@end smallexample
-
-In this table we not only see how much time was spent in this function, we
-also see where it originates from. In addition to this, the size and start
-address are given as well. If the source code location is known it is also 
-shown here.
-
-@IndexSubentry{Commands, @code{fsummary}}
-The related @code{fsummary} command displays the same information as 
-@code{fsingle}, but for all functions in the function overview, 
-including @code{<Total>}:
-
-@cartouche
-@smallexample
-$ gprofng display text -fsummary mxv.1.thr.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Functions sorted by metric: Exclusive Total CPU Time
-
-<Total>
-  Exclusive Total CPU Time: 2.272 (100.0%)
-  Inclusive Total CPU Time: 2.272 (100.0%)
-            Size:     0
-      PC Address: 1:0x00000000
-     Source File: (unknown)
-     Object File: (unknown)
-     Load Object: <Total>
-    Mangled Name:
-         Aliases:
-
-mxv_core
-  Exclusive Total CPU Time: 2.159 ( 95.0%)
-  Inclusive Total CPU Time: 2.159 ( 95.0%)
-            Size:    75
-      PC Address: 2:0x000021ba
-     Source File: <apath>/mxv.c
-     Object File: mxv.1.thr.er/archives/mxv-pthreads.exe_hRxWdccbJPc
-     Load Object: <apath>/mxv-pthreads.exe
-    Mangled Name:
-         Aliases:
-
-          ... etc ...
-@end verbatim
-@end smallexample
-
-@c -- A new node --------------------------------------------------------------
-@node    Support for Multithreading
-@section Support for Multithreading
-@c ----------------------------------------------------------------------------
-
-In this chapter we introduce and discuss the support for multithreading. As
-is shown below, nothing needs to be changed when collecting the performance 
-data.
-
-The difference is that additional commands are available to get more 
-information on the parallel environment, plus that several filters allow
-the user to zoom in on specific threads.
-
-@c -- A new node --------------------------------------------------------------
-@node       Creating a Multithreading Experiment
-@subsection Creating a Multithreading Experiment
-@c ----------------------------------------------------------------------------
-
-We demonstrate the support for multithreading using the same code and settings
-as before, but this time we use 2 threads:
-
-@cartouche
-@smallexample
-$ exe=mxv-pthreads.exe
-$ m=3000
-$ n=2000
-$ gprofng collect app -O mxv.2.thr.er ./$exe -m $m -n $n -t 2
-@end smallexample
-@end cartouche
-
-First of all, note that we did not change anything, other than setting the 
-number of threads to 2. Nothing special is needed to profile a multithreaded
-job when using @ToolName{}.
-
-The same is true when displaying the performance results. The same commands
-that we used before work unmodified. For example, this is all that is needed to 
-get a function overview:
-
-@cartouche
-@smallexample
-$ gpprofng display text -limit 10 -functions mxv.2.thr.er
-@end smallexample
-@end cartouche
-
-This produces the following familiar looking output:
-
-@smallexample
-@verbatim
-Print limit set to 10
-Functions sorted by metric: Exclusive Total CPU Time
-
-Excl.     Incl.      Name
-Total     Total
-CPU sec.  CPU sec.
-2.268     2.268      <Total>
-2.155     2.155      mxv_core
-0.044     0.103      init_data
-0.030     0.046      erand48_r
-0.016     0.016      __drand48_iterate
-0.013     0.059      drand48
-0.008     0.011      _int_malloc
-0.003     0.003      brk
-0.        0.003      __default_morecore
-0.        0.114      __libc_start_main
-@end verbatim
-@end smallexample
-
-@c -- A new node --------------------------------------------------------------
-@node       Commands Specific to Multithreading
-@subsection Commands Specific to Multithreading
-@c ----------------------------------------------------------------------------
-
-The function overview shown above shows the results aggregated over all the 
-threads. The interesting new element is that we can also look at the 
-performance data for the individual threads.
-
-@IndexSubentry{Commands, @code{thread_list}}
-The @code{thread_list} command displays how many threads have been used:
-
-@cartouche
-@smallexample
-$ gprofng display text -thread_list mxv.2.thr.er
-@end smallexample
-@end cartouche
-
-This produces the following output, showing that three threads have
-been used:
-
-@smallexample
-@verbatim
-Exp Sel Total
-=== === =====
-  1 all     3
-@end verbatim
-@end smallexample
-
-The output confirms there is one experiment and that by default all
-threads are selected.
-
-It may seem surprising to see three threads here, since we used the 
-@code{-t 2} option, but it is common for a Pthreads program to use one 
-additional thread. This is typically the thread that runs from start to 
-finish and handles the sequential portions of the code, as well as takes
-care of managing the threads. 
-
-It is no different in our example code. At some point, the main thread 
-creates and activates the two threads that perform the multiplication 
-of the matrix with the vector. Upon completion of this computation,
-the main thread continues.
-
-@IndexSubentry{Commands, @code{threads}}
-The @code{threads} command is simple, yet very powerful. It shows the
-total value of the metrics for each thread. To make it easier to 
-interpret the data, we modify the metrics to include percentages:
-
-@cartouche
-@smallexample
-$ gprofng display text -metrics e.%totalcpu -threads mxv.2.thr.er
-@end smallexample
-@end cartouche
-
-The command above produces the following overview:
-
-@smallexample
-@verbatim
-Current metrics: e.%totalcpu:name
-Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
-Objects sorted by metric: Exclusive Total CPU Time
-
-Excl. Total    Name
-CPU
- sec.      %
-2.258 100.00   <Total>
-1.075  47.59   Process 1, Thread 3
-1.070  47.37   Process 1, Thread 2
-0.114   5.03   Process 1, Thread 1
-@end verbatim
-@end smallexample
-
-The first line gives the total CPU time accumulated over the threads
-selected. This is followed by the metric value(s) for each thread.
-
-From this it is clear that the main thread is responsible for 5% of
-the total CPU time, while the other two threads take 47% each.
-
-This view is ideally suited to verify if there any load balancing
-issues and also to find the most time consuming thread(s).
-
-@IndexSubentry{Filters, Thread selection}
-While useful, often more information than this is needed. This is
-@IndexSubentry{Commands, @code{thread_select}}
-where the thread selection filter comes in. Through the @code{thread_select}
-command, one or more threads may be selected 
-(@xref{The Selection List} how to define the selection list).
-
-Since it is most common to use this command in a script, we do so as
-well here. Below the script we are using:
-
-@cartouche
-@smallexample
-# Define the metrics
-metrics e.%totalcpu
-# Limit the output to 10 lines
-limit 10
-# Get the function overview for thread 1
-thread_select 1
-functions
-# Get the function overview for thread 2
-thread_select 2
-functions
-# Get the function overview for thread 3
-thread_select 3
-functions
-@end smallexample
-@end cartouche
-
-The definition of the metrics and the output limiter has been shown and
-explained before and will be ignored. The new command we focus on is 
-@IndexSubentry{Commands, @code{thread_select}}
-@code{thread_select}.
-
-This command takes a list (@xref{The Selection List}) to select specific
-threads. In this case we simply use the individual thread numbers that we
-obtained with the @code{thread_list} command earlier.
-
-This restricts the output of the @code{functions} command to the thread
-number(s) specified. This means that the script above shows which 
-function(s) each thread executes and how much CPU time they consumed.
-Both the timings and their percentages are given.
-
-This is the relevant part of the output for the first thread:
-
-@smallexample
-@verbatim
-# Get the function overview for thread 1
-Exp Sel Total
-=== === =====
-  1 1       3
-Functions sorted by metric: Exclusive Total CPU Time
-
-Excl. Total    Name
-CPU
- sec.      %
-0.114 100.00   <Total>
-0.051  44.74   init_data
-0.028  24.56   erand48_r
-0.017  14.91   __drand48_iterate
-0.010   8.77   _int_malloc
-0.008   7.02   drand48
-0.      0.     __libc_start_main
-0.      0.     allocate_data
-0.      0.     main
-0.      0.     malloc
-@end verbatim
-@end smallexample
-
-As usual, the comment lines are echoed. This is followed by a confirmation
-of our selection. We see that indeed thread 1 has been selected. What is
-displayed next is the function overview for this particular thread. Due to
-the @code{limit 10} command, there are ten entries in this list.
-
-Below are the overviews for threads 2 and 3 respectively. We see that all
-of the CPU time is spent in function @code{mxv_core} and that this time
-is approximately the same for both threads.
-
-@smallexample
-@verbatim
-# Get the function overview for thread 2
-Exp Sel Total
-=== === =====
-  1 2       3
-Functions sorted by metric: Exclusive Total CPU Time
-
-Excl. Total    Name
-CPU
- sec.      %
-1.072 100.00   <Total>
-1.072 100.00   mxv_core
-0.      0.     collector_root
-0.      0.     driver_mxv
-
-# Get the function overview for thread 3
-Exp Sel Total
-=== === =====
-  1 3       3
-Functions sorted by metric: Exclusive Total CPU Time
-
-Excl. Total    Name
-CPU
- sec.      %
-1.076 100.00   <Total>
-1.076 100.00   mxv_core
-0.      0.     collector_root
-0.      0.     driver_mxv
-@end verbatim
-@end smallexample
-
-When analyzing the performance of a multithreaded application, it is sometimes
-useful to know whether threads have mostly executed on the same core, say, or
-if they have wandered across multiple cores. This sort of stickiness is usually 
-referred to as
-@cindex Thread affinity
-@emph{thread affinity}.
-
-Similar to the commands for the threads, there are several commands related 
-to the usage of the cores, or @emph{CPUs} as they are called in @ToolName{}
-(@xref{The Concept of a CPU in @ProductName{}}).
-
-In order to have some more interesting data to look at, we created a new
-experiment, this time using 8 threads:
-
-@cartouche
-@smallexample
-$ exe=mxv-pthreads.exe
-$ m=3000
-$ n=2000
-$ gprofng collect app -O mxv.8.thr.er ./$exe -m $m -n $n -t 8
-@end smallexample
-@end cartouche
-
-@IndexSubentry{Commands, @code{cpu_list}}
-Similar to the @code{thread_list} command, the @code{cpu_list} command 
-displays how many CPUs have been used. 
-@IndexSubentry{Commands, @code{cpus}}
-The equivalent of the @code{threads} threads command, is the @code{cpus} 
-command, which shows the CPU numbers that were used and how much time was 
-spent on each of them. Both are demonstrated below.
-
-@cartouche
-@smallexample
-$ gprofng display text -metrics e.%totalcpu -cpu_list -cpus mxv.8.thr.er
-@end smallexample
-@end cartouche
-
-This command produces the following output:
-
-@smallexample
-@verbatim
-Current metrics: e.%totalcpu:name
-Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
-Exp Sel Total
-=== === =====
-  1 all    10
-Objects sorted by metric: Exclusive Total CPU Time
-
-Excl. Total    Name
-CPU
- sec.      %
-2.310 100.00   <Total>
-0.286  12.39   CPU 7
-0.284  12.30   CPU 13
-0.282  12.21   CPU 5
-0.280  12.13   CPU 14
-0.266  11.52   CPU 9
-0.265  11.48   CPU 2
-0.264  11.44   CPU 11
-0.194   8.42   CPU 0
-0.114   4.92   CPU 1
-0.074   3.19   CPU 15
-@end verbatim
-@end smallexample
-
-@c ----------------------------------------------------------------------------
-@c TBD - Ruud
-@c I'd like to improve this and have a way to see where a thread has executed.
-@c ----------------------------------------------------------------------------
-
-What we see in this table is that a total of 10 CPUs have been used. This is
-followed by a list with all the CPU numbers that have been used during the 
-run. For each CPU it is shown how much time was spent on it.
-
-While the table with thread times shown earlier may point at a load imbalance
-in the application, this overview has a different purpose.
-
-For example, we see that 10 CPUs have been used, but we know that the 
-application uses 9 threads only.
-This means that at least one thread has executed on more than one CPU. In 
-itself this is not something to worry about, but warrants a deeper 
-investigation.
-
-Honesty dictates that next we performed a pre-analysis to find out 
-which thread(s) have been running on more than one CPU. We found this 
-to be thread 7. It has executed on CPUs 0 and 15.
-
-With this knowledge, we wrote the script shown below. It zooms in on
-the behaviour of thread 7.
-
-@cartouche
-@smallexample
-# Define the metrics
-metrics e.%totalcpu
-# Limit the output to 10 lines
-limit 10
-functions
-# Get the function overview for CPU 0
-cpu_select 0
-functions
-# Get the function overview for CPU 15
-cpu_select 15
-functions
-@end smallexample
-@end cartouche
-
-From the earlier shown threads overview, we know that thread 7 has
-used @code{0.268} seconds of CPU time..
-
-By selecting CPUs 0 and 15, respectively, we get the following
-function overviews:
-
-@smallexample
-@verbatim
-# Get the function overview for CPU 0
-Exp Sel Total
-=== === =====
-  1 0      10
-Functions sorted by metric: Exclusive Total CPU Time
-
-Excl. Total    Name
-CPU
- sec.      %
-0.194 100.00   <Total>
-0.194 100.00   mxv_core
-0.      0.     collector_root
-0.      0.     driver_mxv
-
-# Get the function overview for CPU 15
-Exp Sel Total
-=== === =====
-  1 15     10
-Functions sorted by metric: Exclusive Total CPU Time
-
-Excl. Total    Name
-CPU
- sec.      %
-0.074 100.00   <Total>
-0.074 100.00   mxv_core
-0.      0.     collector_root
-0.      0.     driver_mxv
-@end verbatim
-@end smallexample
-
-This shows that thread 7 spent @code{0.194} seconds on CPU 0 and 
-@code{0.074} seconds on CPU 15.
-
-@c -- A new node --------------------------------------------------------------
-@node    Viewing Multiple Experiments
-@section Viewing Multiple Experiments
-@c ----------------------------------------------------------------------------
-
-One thing we did not cover sofar is that @ToolName{} fully supports the analysis
-of multiple experiments. The @DisplayText{} tool accepts a list of experiments.
-The data can either be aggregated across the experiments, or used in a 
-comparison.
-
-Mention @code{experiment_list}
-
-@c -- A new node --------------------------------------------------------------
-@node    Aggregation of Experiments
-@subsection Aggregation of Experiments
-@c ----------------------------------------------------------------------------
-
-By default, the data for multiple experiments is aggregrated and the display 
-commands shows these combined results.
-
-For example, we can aggregate the data for our single and dual thread
-experiments. Below is the script we used for this:
-
-@cartouche
-@smallexample
-# Define the metrics
-metrics e.%totalcpu
-# Limit the output to 10 lines
-limit 10
-# Get the list with experiments
-experiment_list
-# Get the function overview
-functions
-@end smallexample
-@end cartouche
-
-@IndexSubentry{Commands, @code{experiment_list}}
-With the exception of the @code{experiment_list} command, all commands
-used have been discussed earlier.
-
-The @code{experiment_list} command provides a list of the experiments
-that have been loaded. This is is used to verify we are looking at the
-experiments we intend to aggregate.
-
-@cartouche
-@smallexample
-$ gprofng display text -script my-script-agg mxv.1.thr.er mxv.2.thr.er
-@end smallexample
-@end cartouche
-
-With the command above, we get the following output:
-
-@smallexample
-@verbatim
-# Define the metrics
-Current metrics: e.%totalcpu:name
-Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
-# Limit the output to 10 lines
-Print limit set to 10
-# Get the list with experiments
-ID Sel   PID Experiment
-== === ===== ============
- 1 yes 30591 mxv.1.thr.er
- 2 yes 11629 mxv.2.thr.er
-# Get the function overview
-Functions sorted by metric: Exclusive Total CPU Time
-
-Excl. Total    Name
-CPU
- sec.      %
-4.533 100.00   <Total>
-4.306  94.99   mxv_core
-0.105   2.31   init_data
-0.053   1.17   erand48_r
-0.027   0.59   __drand48_iterate
-0.021   0.46   _int_malloc
-0.021   0.46   drand48
-0.001   0.02   sysmalloc
-0.      0.     __libc_start_main
-0.      0.     allocate_data
-@end verbatim
-@end smallexample
-
-The first five lines should look familiar. The five lines following, echo
-the comment line in the script and show the overview of the experiments.
-This confirms two experiments have been loaded and that both are active.
-
-This is followed by the function overview. The timings have been summed
-up and the percentages are adjusted accordingly. For example, the total
-accumulated time is indeed 2.272 + 2.261 = 4.533 seconds.
-
-@c -- A new node --------------------------------------------------------------
-@node       Comparison of Experiments
-@subsection Comparison of Experiments
-@c ----------------------------------------------------------------------------
-
-The support for multiple experiments really shines in comparison mode. This
-feature is enabled through the command 
-@IndexSubentry{Commands, @code{compare on/off}}
-@code{compare on} 
-and is disabled
-by setting 
-@code{compare off}.
-
-@cindex Compare experiments
-In comparison mode, the data for the various experiments is shown side by
-side, as illustrated below where we compare the results for the multithreaded
-experiments using one and two threads respectively:
-
-@cartouche
-@smallexample
-$ gprofng display text -compare on -functions mxv.1.thr.er mxv.2.thr.er
-@end smallexample
-@end cartouche
-
-@noindent
-This produces the following output:
-
-@smallexample
-@verbatim
-Functions sorted by metric: Exclusive Total CPU Time
-
-mxv.1.thr.er  mxv.2.thr.er  mxv.1.thr.er  mxv.2.thr.er
-Excl. Total   Excl. Total   Incl. Total   Incl. Total    Name
-CPU           CPU           CPU           CPU
- sec.          sec.          sec.          sec.
-2.272         2.261         2.272         2.261          <Total>
-2.159         2.148         2.159         2.148          mxv_core
-0.054         0.051         0.102         0.104          init_data
-0.025         0.028         0.035         0.045          erand48_r
-0.013         0.008         0.048         0.053          drand48
-0.011         0.010         0.012         0.010          _int_malloc
-0.010         0.017         0.010         0.017          __drand48_iterate
-0.001         0.            0.001         0.             sysmalloc
-0.            0.            0.114         0.114          __libc_start_main
-0.            0.            0.011         0.010          allocate_data
-0.            0.            0.001         0.             check_results
-0.            0.            2.159         2.148          collector_root
-0.            0.            2.159         2.148          driver_mxv
-0.            0.            0.114         0.114          main
-0.            0.            0.012         0.010          malloc
-@end verbatim
-@end smallexample
-
-This table is already helpful to more easily compare (two) profiles, but 
-there is more that we can do here. 
-
-By default, in comparison mode, all measured values are shown. Often 
-profiling is about comparing performance data. It is therefore
-more useful to look at differences, or ratios, using one experiment as 
-a reference. 
-
-The values shown are relative to this difference. For example if a ratio
-is below one, it means the reference value was higher. 
-
-@IndexSubentry{Commands, @code{compare on/off}}
-This feature is supported on the @code{compare} command. In addition to @code{on},
-or @code{off}, this command also supports 
-@IndexSubentry{Commands, @code{compare delta}}
-@code{delta}, or 
-@IndexSubentry{Commands, @code{compare ratio}}
-@code{ratio}.
-
-Usage of one of these two keywords enables the comparison feature and shows
-either the difference, or the ratio, relative to the reference data.
-
-In the example below, we use the same two experiments used in the comparison
-above, but as before, the number of lines is restricted to 10 and we focus on 
-the exclusive timings plus percentages. For the comparison part we are 
-interested in the differences.
-
-This is the script that produces such an overview:
-
-@cartouche
-@smallexample
-# Define the metrics
-metrics e.%totalcpu
-# Limit the output to 10 lines
-limit 10
-# Set the comparison mode to differences
-compare delta
-# Get the function overview
-functions
-@end smallexample
-@end cartouche
-
-Assuming this script file is called @code{my-script-comp}, this is how we
-get the table displayed on our screen:
-
-@cartouche
-@smallexample
-$ gprofng display text -script my-script-comp mxv.1.thr.er mxv.2.thr.er
-@end smallexample
-@end cartouche
-
-Leaving out some of the lines printed, but we have seen before, we get 
-the following table:
-
-@smallexample
-@verbatim
-mxv.1.thr.er  mxv.2.thr.er
-Excl. Total   Excl. Total     Name
-CPU           CPU
- sec.      %   delta      %
-2.272 100.00  -0.011 100.00   <Total>
-2.159  95.00  -0.011  94.97   mxv_core
-0.054   2.37  -0.003   2.25   init_data
-0.025   1.10  +0.003   1.23   erand48_r
-0.013   0.57  -0.005   0.35   drand48
-0.011   0.48  -0.001   0.44   _int_malloc
-0.010   0.44  +0.007   0.75   __drand48_iterate
-0.001   0.04  -0.001   0.     sysmalloc
-0.      0.    +0.      0.     __libc_start_main
-0.      0.    +0.      0.     allocate_data
-@end verbatim
-@end smallexample
-
-It is now easy to see that the CPU times for the most time consuming
-functions in this code are practically the same. 
-
-While in this case we used the delta as a comparison,
-
-Note that the comparison feature is supported at the function, source, and 
-disassembly level. There is no practical limit on the number of experiments
-that can be used in a comparison.
-
-
-
-@c -- A new node --------------------------------------------------------------
-@node    Profile Hardware Event Counters
-@section Profile Hardware Event Counters
-@c ----------------------------------------------------------------------------
-
-Many processors provide a set of hardware event counters and @ToolName{}
-provides support for this feature.
-@xref{Hardware Event Counters Explained} for those readers that are not 
-familiar with such counters and like to learn more.
-
-In this section we explain how to get the details on the event counter
-support for the processor used in the experiment(s), and show several
-examples.
-
-@c -- A new node --------------------------------------------------------------
-@node       Getting Information on the Counters Supported
-@subsection Getting Information on the Counters Supported
-@c ----------------------------------------------------------------------------
-
-The first step is to check if the processor used for the experiments is 
-supported by @ToolName{}.
-
-@IndexSubentry{Options, @code{-h}}
-The @code{-h} option on @CollectApp{} will show the event counter
-information:
-
-@cartouche
-@smallexample
-$ gprofng collect app -h
-@end smallexample
-@end cartouche
-
-In case the counters are supported, a list with the events is printed. 
-Otherwise, a warning message will be issued. 
-
-For example, below we show this command and the output on an Intel Xeon 
-Platinum 8167M (aka ``Skylake'') processor. The output has been split
-into several sections and each section is commented upon separately.
-
-@smallexample
-@verbatim
-Run "gprofng collect app --help" for a usage message.
-
-Specifying HW counters on `Intel Arch PerfMon v2 on Family 6 Model 85' 
-(cpuver=2499):
-
-  -h {auto|lo|on|hi}
-	turn on default set of HW counters at the specified rate
-  -h <ctr_def> [-h <ctr_def>]...
-  -h <ctr_def>[,<ctr_def>]...
-	specify HW counter profiling for up to 4 HW counters
-@end verbatim
-@end smallexample
+@ManPageStart{NOTES}
+@c man begin NOTES
 
-The first line shows how to get a usage overview. This is followed by
-some information on the target processor.
+The gprofng driver supports the following commands.
+@vspace{1}
 
-The next five lines explain in what ways the @code{-h} option can be 
-used to define the events to be monitored.
+@c The man pages for the commands below can be viewed using the command name with "gprofng" replaced by "gp" and the spaces replaced by a dash ("-"). For example the man page
+@c        name for "gprofng collect app" is "gp-collect-app".
 
-The first version shown above enables a default set of counters. This
-default depends on the processor this command is executed on. The
-keyword following the @code{-h} option defines the sampling rate:
+@i{Collect performance data:}
 
 @table @code
 
-@item auto
-Match the sample rate of used by clock profiling. If the latter is disabled,
-Use a per thread sampling rate of approximately 100 samples per second.
-This setting is the default and preferred.
-
-@item on
-Use a per thread sampling rate of approximately 100 samples per second.
-
-@item lo
-Use a per thread sampling rate of approximately 10 samples per second.
-
-@item hi
-Use a per thread sampling rate of approximately 1000 samples per second.
-
-@end table 
-
-The second and third variant define the events to be monitored. Note
-that the number of simultaneous events supported is printed. In this
-case we can monitor four events in a single profiling job.
-
-It is a matter of preference whether you like to use the @code{-h}
-option for each event, or use it once, followed by a comma separated
-list.
-
-There is one slight catch though. The counter definition below has 
-mandatory comma (@code{,}) between the event and the rate. While a 
-default can be used for the rate, the comma cannot be omitted. 
-This may result in a somewhat awkward counter definition in case
-the default sampling rate is used.
-
-For example, the following two commands are equivalent. Note
-the double comma in the second command. This is not a typo.
-
-@cartouche
-@smallexample
-$ gprofng collect app -h cycles -h insts ... 
-$ gprofng collect app -h cycles,,insts ... 
-@end smallexample
-@end cartouche
-
-In the first command this comma is not needed, because a 
-comma (``@code{,}'') immediately followed by white space may 
-be omitted.
-
-This is why we prefer the this syntax and in the remainder will 
-use the first version of this command.
-
-@IndexSubentry{Hardware event counters, counter definition}
-The counter definition takes an event name, plus optionally one or
-more attributes, followed by a comma, and optionally the sampling rate.
-The output section below shows the formal definition.
+@item gprofng collect app
+Collect application performance data.
 
-@cartouche
-@smallexample
-  <ctr_def> == <ctr>[[~<attr>=<val>]...],[<rate>]
-@end smallexample
-@end cartouche
+@end table
 
-The printed help then explains this syntax. Below we have summarized
-and expanded this output:
+@i{Display the performance results:}
 
 @table @code
 
-@item <ctr>
-The counter name must be selected from the available counters listed
-as part of the output printed with the @code{-h} option.
-On most systems, if a counter is not listed, it may still be specified 
-by its numeric value.
+@item gprofng display text
+Display the performance data in ASCII format.
 
-@item ~<attr>=<val>
-This is an optional attribute that depends on the processor. The list
-of supported attributes is printed in the output. Examples of 
-attributes are ``user'', or ``system''. The value can given in decimal
-or hexadecimal format.
-Multiple attributes may be specified, and each must be preceded 
-by a ~.
+@item gprofng display html
+Generate an HTML file from one or more experiments.
 
-@item <rate>
+@end table
 
-The sampling rate is one of the following:
+@i{Miscellaneous commands:}
 
 @table @code
 
-@item auto
-This is the default and matches the rate used by clock profiling.
-If clock profiling is disabled, use @code{on}.
-
-@item on
-Set the per thread maximum sampling rate to ~100 samples/second
-
-@item lo
-Set the per thread maximum sampling rate to ~10 samples/second
-
-@item hi
-Set the per thread maximum sampling rate to ~1000 samples/second
-
-@item <interval>
-Define the sampling interval. 
-@xref{Control the Sampling Frequency} how to define this.
+@item gprofng display src
+Display source or disassembly with compiler annotations.
 
-@end table
+@item gprofng archive
+Include binaries and source code in an experiment directory.
 
 @end table
 
-After the section with the formal definition of events and counters, a
-processor specific list is displayed. This part starts with an overview
-of the default set of counters and the aliased names supported 
-@emph{on this specific processor}.
-
-@smallexample
-@verbatim
-Default set of HW counters:
-
-    -h cycles,,insts,,llm
-
-Aliases for most useful HW counters:
-
- alias    raw name                   type units regs description
-
- cycles   unhalted-core-cycles   CPU-cycles 0123 CPU Cycles
- insts    instruction-retired        events 0123 Instructions Executed
- llm      llc-misses                 events 0123 Last-Level Cache Misses
- br_msp   branch-misses-retired      events 0123 Branch Mispredict
- br_ins   branch-instruction-retired events 0123 Branch Instructions
-@end verbatim
-@end smallexample
-
-The definitions given above may or may not be available on other processors,
-but we try to maximize the overlap across alias sets.
-
-The table above shows the default set of counters defined for this processor,
-and the aliases. For each alias the full ``raw'' name is given, plus the
-unit of the number returned by the counter (CPU cycles, or a raw count), 
-the hardware counter the event is allowed to be mapped onto, and a short 
-description.
-
-The last part of the output contains all the events that can be monitored:
-
-@smallexample
-@verbatim
-Raw HW counters:
-
-    name                                type      units regs description
-
-    unhalted-core-cycles                     CPU-cycles 0123
-    unhalted-reference-cycles                    events 0123
-    instruction-retired                          events 0123
-    llc-reference                                events 0123
-    llc-misses                                   events 0123
-    branch-instruction-retired                   events 0123
-    branch-misses-retired                        events 0123
-    ld_blocks.store_forward                      events 0123
-    ld_blocks.no_sr                              events 0123
-    ld_blocks_partial.address_alias              events 0123
-    dtlb_load_misses.miss_causes_a_walk          events 0123
-    dtlb_load_misses.walk_completed_4k           events 0123
-
-    <many lines deleted>
-
-    l2_lines_out.silent                          events 0123
-    l2_lines_out.non_silent                      events 0123
-    l2_lines_out.useless_hwpf                    events 0123
-    sq_misc.split_lock                           events 0123
-
-See Chapter 19 of the "Intel 64 and IA-32 Architectures Software
-Developer's Manual Volume 3B: System Programming Guide"
-@end verbatim
-@end smallexample
-
-As can be seen, these names are not always easy to correlate to a specific
-event of interest. The processor manual should provide more clarity on this.
-
-@c -- A new node --------------------------------------------------------------
-@node       Examples Using Hardware Event Counters
-@subsection Examples Using Hardware Event Counters
-@c ----------------------------------------------------------------------------
-
-The previous section may give the impression that these counters are hard to
-use, but as we will show now, in practice it is quite simple.
-
-With the information from the @code{-h} option, we can easily set up our first 
-event counter experiment.
-
-We start by using the default set of counters defined for our processor and we
-use 2 threads:
-
-@cartouche
-@smallexample
-$ exe=mxv-pthreads.exe
-$ m=3000
-$ n=2000
-$ exp=mxv.hwc.def.2.thr.er
-$ gprofng collect app -O $exp -h auto ./$exe -m $m -n $n -t 2
-@end smallexample
-@end cartouche
-
-@IndexSubentry{Options, @code{-h}}
-@IndexSubentry{Hardware event counters, @code{auto} option}
-The new option here is @code{-h auto}. The @code{auto} keyword enables 
-hardware event counter profiling and selects the default set of counters 
-defined for this processor.
-
-As before, we can display the information, but there is one practical hurdle
-to take. Unless we like to view all metrics recorded, we would need to know
-the names of the events that have been enabled. This is tedious and also not
-portable in case we would like to repeat this experiment on another processor.
-
-@IndexSubentry{Hardware event counters, @code{hwc} metric}
-This is where the special @code{hwc} metric comes very handy. It 
-automatically expands to the active set of events used.
-
-With this, it is very easy to display the event counter values. Note that
-although the regular clock based profiling was enabled, we only want to see 
-the counter values. We also request to see the percentages and limit the
-output to the first 5 lines:
-
-@cartouche
-@smallexample
-$ exp=mxv.hwc.def.2.thr.er
-$ gprofng display text -metrics e.%hwc -limit 5 -functions $exp
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Current metrics: e.%cycles:e+%insts:e+%llm:name
-Current Sort Metric: Exclusive CPU Cycles ( e.%cycles )
-Print limit set to 5
-Functions sorted by metric: Exclusive CPU Cycles
-
-Excl. CPU     Excl. Instructions  Excl. Last-Level   Name
-Cycles        Executed            Cache Misses
- sec.      %                  %                 %
-2.691 100.00  7906475309 100.00   122658983 100.00   <Total>
-2.598  96.54  7432724378  94.01   121745696  99.26   mxv_core
-0.035   1.31   188860269   2.39       70084   0.06   erand48_r
-0.026   0.95    73623396   0.93      763116   0.62   init_data
-0.018   0.66    76824434   0.97       40040   0.03   drand48
-@end verbatim
-@end smallexample
-
-As we have seen before, the first few lines echo the settings.
-This includes a list with the hardware event counters used by
-default.
-
-The table that follows makes it very easy to get an overview where the 
-time is spent and how many of the target events have occurred.
-
-As before, we can drill down deeper and see the same metrics at the source
-line and instruction level. Other than using @code{hwc} in the metrics
-definitions, nothing has changed compared to the previous examples:
-
-@cartouche
-@smallexample
-$ exp=mxv.hwc.def.2.thr.er
-$ gprofng display text -metrics e.hwc -source mxv_core $exp
-@end smallexample
-@end cartouche
-
-This is the relevant part of the output. Since the lines get very long,
-we have somewhat modified the lay-out:
-
-@smallexample
-@verbatim
-   Excl. CPU Excl.        Excl.
-   Cycles    Instructions Last-Level
-    sec.     Executed     Cache Misses
-                                         <Function: mxv_core>
-   0.                 0          0   32. void __attribute__ ((noinline)) 
-                                         mxv_core(...)
-   0.                 0          0   33. {
-   0.                 0          0   34.   for (uint64_t i=...) {
-   0.                 0          0   35.     double row_sum = 0.0;
-## 1.872     7291879319   88150571   36.     for (int64_t j=0; j<n; j++)
-   0.725      140845059   33595125   37.        row_sum += A[i][j]*b[j];
-   0.                 0          0   38.     c[i] = row_sum;
-                                     39.    }
-   0.                 0          0   40. }
-@end verbatim
-@end smallexample
-
-In a smiliar way we can display the event counter values at the instruction
-level. Again we have modified the lay-out due to page width limitations:
-
-@cartouche
-@smallexample
-$ exp=mxv.hwc.def.2.thr.er
-$ gprofng display text -metrics e.hwc -disasm mxv_core $exp
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-   Excl. CPU Excl.        Excl.
-   Cycles    Instructions Last-Level
-    sec.     Executed     Cache Misses
-                                                <Function: mxv_core>
-   0.                 0          0  [33] 4021ba: mov   0x8(%rsp),%r10
-                                    34.   for (uint64_t i=...) {
-   0.                 0          0  [34] 4021bf: cmp   %rsi,%rdi
-   0.                 0          0  [34] 4021c2: jbe   0x37
-   0.                 0          0  [34] 4021c4: ret
-                                    35.       double row_sum = 0.0;
-                                    36.       for (int64_t j=0; j<n; j++)
-                                    37.         row_sum += A[i][j]*b[j];
-   0.                 0          0  [37] 4021c5: mov   (%r8,%rdi,8),%rdx
-   0.                 0          0  [36] 4021c9: mov   $0x0,%eax
-   0.                 0          0  [35] 4021ce: pxor  %xmm1,%xmm1
-   0.002       12804230     321394  [37] 4021d2: movsd (%rdx,%rax,8),%xmm0
-   0.141       60819025    3866677  [37] 4021d7: mulsd (%r9,%rax,8),%xmm0
-   0.582       67221804   29407054  [37] 4021dd: addsd %xmm0,%xmm1
-## 1.871     7279075109   87989870  [36] 4021e1: add   $0x1,%rax
-   0.002       12804210      80351  [36] 4021e5: cmp   %rax,%rcx
-   0.                 0          0  [36] 4021e8: jne   0xffffffffffffffea
-                                    38.       c[i] = row_sum;
-   0.                 0          0  [38] 4021ea: movsd %xmm1,(%r10,%rdi,8)
-   0.                 0          0  [34] 4021f0: add   $0x1,%rdi
-   0.                 0          0  [34] 4021f4: cmp   %rdi,%rsi
-   0.                 0          0  [34] 4021f7: jb    0xd
-   0.                 0          0  [35] 4021f9: pxor  %xmm1,%xmm1
-   0.                 0          0  [36] 4021fd: test  %rcx,%rcx
-   0.                 0      80350  [36] 402200: jne   0xffffffffffffffc5
-   0.                 0          0  [36] 402202: jmp   0xffffffffffffffe8
-                                    39.   }
-                                    40. }
-   0.                 0          0  [40]  402204:  ret
-@end verbatim
-@end smallexample
-
-So far we have used the default settings for the event counters. It is
-quite straightforward to select specific counters. For sake of the
-example, let's assume we would like to count how many branch instructions
-and retired memory load instructions that missed in the L1 cache have been
-executed. We also want to count these events with a high resolution.
-
-This is the command to do so:
-
-@cartouche
-@smallexample
-$ exe=mxv-pthreads.exe
-$ m=3000
-$ n=2000
-$ exp=mxv.hwc.sel.2.thr.er
-$ hwc1=br_ins,hi
-$ hwc2=mem_load_retired.l1_miss,hi
-$ gprofng collect app -O $exp -h $hwc1 -h $hwc2 $exe -m $m -n $n -t 2
-@end smallexample
-@end cartouche
-
-As before, we get a table with the event counts. Due to the very
-long name for the second counter, we have somewhat modified the
-output.
-
-@cartouche
-@smallexample
-$ gprofng display text -limit 10 -functions mxv.hwc.sel.2.thr.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Functions sorted by metric: Exclusive Total CPU Time
-Excl.     Incl.     Excl. Branch  Excl.                 Name
-Total     Total     Instructions  mem_load_retired.l1_miss
-CPU sec.  CPU sec.                Events
-2.597     2.597     1305305319    4021340               <Total>
-2.481     2.481     1233233242    3982327               mxv_core
-0.040     0.107       19019012       9003               init_data
-0.028     0.052       23023048      15006               erand48_r
-0.024     0.024       19019008       9004               __drand48_iterate
-0.015     0.067       11011009       2998               drand48
-0.008     0.010              0       3002               _int_malloc
-0.001     0.001              0          0               brk
-0.001     0.002              0          0               sysmalloc
-0.        0.001              0          0               __default_morecore
-@end verbatim
-@end smallexample
-
-@IndexSubentry{Commands, @code{compare ratio}}
-When using event counters, the values could be very large and it is not easy
-to compare the numbers. As we will show next, the @code{ratio} feature is
-very useful when comparing such profiles.
-
-To demonstrate this, we have set up another event counter experiment where
-we would like to compare the number of last level cache miss and the number
-of branch instructions executed when using a single thread, or two threads.
-
-These are the commands used to generate the experiment directories:
-
-@cartouche
-@smallexample
-$ exe=./mxv-pthreads.exe
-$ m=3000
-$ n=2000
-$ exp1=mxv.hwc.comp.1.thr.er
-$ exp2=mxv.hwc.comp.2.thr.er
-$ gprofng collect app -O $exp1 -h llm -h br_ins $exe -m $m -n $n -t 1
-$ gprofng collect app -O $exp2 -h llm -h br_ins $exe -m $m -n $n -t 2
-@end smallexample
-@end cartouche
-
-The following script has been used to get the tables. Due to lay-out
-restrictions, we have to create two tables, one for each counter.
-
-@cartouche
-@smallexample
-# Limit the output to 5 lines
-limit 5
-# Define the metrics
-metrics name:e.llm
-# Set the comparison to ratio
-compare ratio
-functions
-# Define the metrics
-metrics name:e.br_ins
-# Set the comparison to ratio
-compare ratio
-functions
-@end smallexample
-@end cartouche
-
-Note that we print the name of the function first, followed by the counter 
-data.
-The new element is that we set the comparison mode to @code{ratio}. This
-divides the data in a column by its counterpart in the reference experiment.
-
-This is the command using this script and the two experiment directories as 
-input:
-
-@cartouche
-@smallexample
-$ gprofng display text -script my-script-comp-counters \
-  mxv.hwc.comp.1.thr.er \
-  mxv.hwc.comp.2.thr.er
-@end smallexample
-@end cartouche
-
-By design, we get two tables, one for each counter:
-
-@smallexample
-@verbatim
-Functions sorted by metric: Exclusive Last-Level Cache Misses
-
-                              mxv.hwc.comp.1.thr.er  mxv.hwc.comp.2.thr.er
-Name                          Excl. Last-Level       Excl. Last-Level
-                              Cache Misses           Cache Misses
-                                                         ratio
- <Total>                      122709276              x   0.788
- mxv_core                     121796001              x   0.787
- init_data                       723064              x   1.055
- erand48_r                       100111              x   0.500
- drand48                          60065              x   1.167
-
-Functions sorted by metric: Exclusive Branch Instructions
-
-                              mxv.hwc.comp.1.thr.er  mxv.hwc.comp.2.thr.er
-Name                          Excl. Branch           Excl. Branch
-                              Instructions           Instructions
-                                                       ratio
- <Total>                      1307307316             x 0.997
- mxv_core                     1235235239             x 0.997
- erand48_r                      23023033             x 0.957
- drand48                        20020009             x 0.600
- __drand48_iterate              17017028             x 0.882
-@end verbatim
-@end smallexample
-
-A ratio less than one in the second column, means that this counter
-value was smaller than the value from the reference experiment shown
-in the first column.
-
-This kind of presentation of the results makes it much easier to 
-quickly interpret the data.
-
-We conclude this section with thread-level event counter overviews,
-but before we go into this, there is an important metric we need to
-mention.
-
-@IndexSubentry{Hardware event counters, IPC}
-In case it is known how many instructions and CPU cycles have been executed,
-the value for the IPC (``Instructions Per Clockycle'') can be computed. 
-@xref{Hardware Event Counters Explained}.
-This is a derived metric that gives an indication how well the processor
-is utilized. The inverse of the IPC is called CPI.
-
-The @DisplayText{} command automatically computes the IPC and CPI values
-if an experiment contains the event counter values for the instructions
-and CPU cycles executed. These are part of the metric list and can be
-displayed, just like any other metric.
-
-@IndexSubentry{Commands, @code{metric_list}}
-This can be verified through the @code{metric_list} command. If we go
-back to our earlier experiment with the default event counters, we get
-the following result.
-
-@cartouche
-@smallexample
-$ gprofng display text -metric_list mxv.hwc.def.2.thr.er
-@end smallexample
-@end cartouche
-
-@smallexample
-@verbatim
-Current metrics: e.totalcpu:i.totalcpu:e.cycles:e+insts:e+llm:name
-Current Sort Metric: Exclusive Total CPU Time ( e.totalcpu )
-Available metrics:
-         Exclusive Total CPU Time: e.%totalcpu
-         Inclusive Total CPU Time: i.%totalcpu
-             Exclusive CPU Cycles: e.+%cycles
-             Inclusive CPU Cycles: i.+%cycles
-  Exclusive Instructions Executed: e+%insts
-  Inclusive Instructions Executed: i+%insts
-Exclusive Last-Level Cache Misses: e+%llm
-Inclusive Last-Level Cache Misses: i+%llm
- Exclusive Instructions Per Cycle: e+IPC
- Inclusive Instructions Per Cycle: i+IPC
- Exclusive Cycles Per Instruction: e+CPI
- Inclusive Cycles Per Instruction: i+CPI
-                             Size: size
-                       PC Address: address
-                             Name: name
-@end verbatim
-@end smallexample
-
-Among the other metrics, we see the new metrics for the IPC and CPI 
-listed.
-
-In the script below, we use this information and add the IPC and CPI 
-to the metrics to be displayed. We also use a the thread filter to 
-display these values for the individual threads.
-
-This is the complete script we have used. Other than a different selection
-of the metrics, there are no new features.
-
-@cartouche
-@smallexample
-# Define the metrics
-metrics e.insts:e.%cycles:e.IPC:e.CPI
-# Sort with respect to cycles
-sort e.cycles
-# Limit the output to 5 lines
-limit 5
-# Get the function overview for all threads
-functions
-# Get the function overview for thread 1
-thread_select 1
-functions
-# Get the function overview for thread 2
-thread_select 2
-functions
-# Get the function overview for thread 3
-thread_select 3
-functions
-@end smallexample
-@end cartouche
-
-In the metrics definition on the second line, we explicitly request the 
-counter values for the instructions (@code{e.insts}) and CPU cycles 
-(@code{e.cycles}) executed. These names can be found in output from the
-@code{metric_list} commad above.
-In addition to these metrics, we also request the IPC and CPI to be shown.
-
-As before, we used the @code{limit} command to control the number of 
-functions displayed. We then request an overview for all the threads,
-followed by three sets of two commands to select a thread and display the 
-function overview.
-
-The script above is used as follows:
-
-@cartouche
-@smallexample
-$ gprofng display text -script my-script-ipc mxv.hwc.def.2.thr.er
-@end smallexample
-@end cartouche
-
-This script produces four tables. We list them separately below,
-and have left out the additional output.
-
-The first table shows the accumulated values across the three
-threads that have been active.
-
-@smallexample
-@verbatim
-Functions sorted by metric: Exclusive CPU Cycles
-
-Excl.         Excl. CPU     Excl.  Excl.   Name
-Instructions  Cycles        IPC    CPI
-Executed       sec.      %
-7906475309    2.691 100.00  1.473  0.679   <Total>
-7432724378    2.598  96.54  1.434  0.697   mxv_core
- 188860269    0.035   1.31  2.682  0.373   erand48_r
-  73623396    0.026   0.95  1.438  0.696   init_data
-  76824434    0.018   0.66  2.182  0.458   drand48
-@end verbatim
-@end smallexample
-
-This shows that IPC of this program is completely dominated
-by function @code{mxv_core}. It has a fairly low IPC value
-of 1.43.
-
-The next table is for thread 1 and shows the values for the 
-main thread.
-
-@smallexample
-@verbatim
-Exp Sel Total
-=== === =====
-  1 1       3
-Functions sorted by metric: Exclusive CPU Cycles
-
-Excl.         Excl. CPU     Excl.  Excl.   Name
-Instructions  Cycles        IPC    CPI
-Executed       sec.      %
-473750931     0.093 100.00  2.552  0.392   <Total>
-188860269     0.035  37.93  2.682  0.373   erand48_r
- 73623396     0.026  27.59  1.438  0.696   init_data
- 76824434     0.018  18.97  2.182  0.458   drand48
-134442832     0.013  13.79  5.250  0.190   __drand48_iterate
-@end verbatim
-@end smallexample
-
-Although this thread hardly uses any CPU cycles, the overall IPC 
-of 2.55 is not all that bad.
-
-Last, we show the tables for threads 2 and 3:
-
-@smallexample
-@verbatim
-Exp Sel Total
-=== === =====
-  1 2       3
-Functions sorted by metric: Exclusive CPU Cycles
-
-Excl.         Excl. CPU     Excl.  Excl.   Name
-Instructions  Cycles        IPC    CPI
-Executed       sec.      %
-3716362189    1.298 100.00  1.435  0.697   <Total>
-3716362189    1.298 100.00  1.435  0.697   mxv_core
-         0    0.      0.    0.     0.      collector_root
-         0    0.      0.    0.     0.      driver_mxv
-
-Exp Sel Total
-=== === =====
-  1 3       3
-Functions sorted by metric: Exclusive CPU Cycles
-
-Excl.         Excl. CPU     Excl.  Excl.   Name
-Instructions  Cycles        IPC    CPI
-Executed       sec.      %
-3716362189    1.300 100.00  1.433  0.698   <Total>
-3716362189    1.300 100.00  1.433  0.698   mxv_core
-         0    0.      0.    0.     0.      collector_root
-         0    0.      0.    0.     0.      driver_mxv
-@end verbatim
-@end smallexample
-
-It is seen that both execute the same number of instructions and
-take about the same number of CPU cycles. As a result, the IPC is
-the same for both threads.
+It is also possible to invoke the lower level commands directly, but since
+these are subject to change, in particular the options, we recommend to
+use the driver.
 
-@c -- A new node --------------------------------------------------------------
-@c TBD @node    Additional Features
-@c TBD @section Additional Features
-@c ----------------------------------------------------------------------------
+@c man end
+@ManPageEnd{}
 
-@c -- A new node --------------------------------------------------------------
-@c TBD @node    More Filtering Capabilities
-@c TBD @subsection More Filtering Capabilities
 @c ----------------------------------------------------------------------------
-
-@c TBD Cover @code{samples} and @code{seconds}
-
-@c -- A new node --------------------------------------------------------------
-@node    Java Profiling 
-@section Java Profiling
+@c SEEALSO section
 @c ----------------------------------------------------------------------------
 
-@IndexSubentry{Java profiling, @code{-j on/off}}
-The @CollectApp{} command supports Java profiling. The @code{-j on} option
-can be used for this, but since this feature is enabled by default, there is 
-no need to set this explicitly. Java profiling may be disabled through the 
-@code{-j off} option.
+@ManPageStart{SEEALSO}
+@c man begin SEEALSO
 
-The program is compiled as usual and the experiment directory is created 
-similar to what we have seen before. The only difference with a C/C++
-application is that the program has to be explicitly executed by java.
+gp-archive(1), gp-collect-app(1), gp-display-html(1), gp-display-src(1),
+gp-display-text(1)
 
-For example, this is how to generate the experiment data for a Java
-program that has the source code stored in file @code{Pi.java}:
+Each gprofng command also supports the @option{--help} option. This lists the
+options and a short description for each option.
 
-@cartouche
-@smallexample
-$ javac Pi.java
-$ gprofng collect app -j on -O pi.demo.er java Pi < pi.in
-@end smallexample
-@end cartouche
-
-Regarding which java is selected to generate the data, @ToolName{} 
-first looks for the JDK in the path set in either the 
-@IndexSubentry{Java profiling, @code{JDK_HOME}}
-@code{JDK_HOME} environment variable, or in the
-@IndexSubentry{Java profiling, @code{JAVA_PATH}}
-@code{JAVA_PATH} environment variable. If neither of these variables is 
-set, it checks for a JDK in the search path (set in the PATH
-environment variable). If there is no JDK in this path, it checks for 
-the java executable in @code{/usr/java/bin/java}.
-
-In case additional options need to be passed on to the JVM, the 
-@IndexSubentry{Java profiling, @code{-J <string>}}
-@code{-J <string>} option can be used. The string with the
-option(s) has to be delimited by quotation marks in case
-there is more than one argument.
-
-The @DisplayText{} command may be used to view the performance data. There is
-no need for any special options and the same commands as previously discussed
-are supported.
-
-@IndexSubentry{Commands, @code{viewmode}}
-@IndexSubentry{Java profiling, different view modes}
-The @code{viewmode} command 
-@xref{The Viewmode}
-is very useful to examine the call stacks. 
-
-For example, this is how one can see the native call stacks. For
-lay-out purposes we have restricted the list to the first five entries:
-
-@cartouche
-@smallexample
-$ gprofng display text -limit 5 -viewmode machine -calltree pi.demo.er 
-@end smallexample
-@end cartouche
+For example this displays the options supported on the
+@command{gprofng collect app} command:
 
 @smallexample
-@verbatim
-Print limit set to 5
-Viewmode set to machine
-Functions Call Tree. Metric: Attributed Total CPU Time
-
-Attr.      Name
-Total
-CPU sec.
-1.381      +-<Total>
-1.171        +-Pi.calculatePi(double)
-0.110        +-collector_root
-0.110        |  +-JavaMain
-0.070        |    +-jni_CallStaticVoidMethod
-@end verbatim
+$ gprofng collect app --help
 @end smallexample
 
-@noindent
-Note that the selection of the viewmode is echoed in the output.
-
-@c -- A new node --------------------------------------------------------------
-@c TBD @node Summary of Options and Commands
-@c TBD @chapter Summary of Options and Commands
-@c ----------------------------------------------------------------------------
-
-@c -- A new node --------------------------------------------------------------
-@node    Terminology
-@chapter Terminology
-
-Throughout this manual, certain terminology specific to profiling tools, 
-or @ToolName{}, or even to this document only, is used. In this chapter we 
-explain this terminology in detail.
-
-@menu
-* The Program Counter::                    What is a Program Counter?
-* Inclusive and Exclusive Metrics::        An explanation of inclusive and exclusive metrics.
-* Metric Definitions::                     Definitions associated with metrics.
-* The Viewmode::                           Select the way call stacks are presented.
-* The Selection List::                     How to define a selection.
-* Load Objects and Functions::             The components in an application.
-* The Concept of a CPU in @ProductName{}:: The definition of a CPU.
-* Hardware Event Counters Explained::      What are event counters?
-* apath::                                  Our generic definition of a path.
-@end menu
-
-@c ----------------------------------------------------------------------------
-@node    The Program Counter
-@section The Program Counter
-@c ----------------------------------------------------------------------------
-
-@cindex PC
-@cindex Program Counter
-The @emph{Program Counter}, or PC for short, keeps track where program execution is.
-The address of the next instruction to be executed is stored in a special
-purpose register in the processor, or core.
-
-@cindex Instruction pointer
-The PC is sometimes also referred to as the @emph{instruction pointer}, but
-we will use Program Counter or PC throughout this document.
-
-@c ----------------------------------------------------------------------------
-@node    Inclusive and Exclusive Metrics
-@section Inclusive and Exclusive Metrics
-@c ----------------------------------------------------------------------------
-
-In the remainder, these two concepts occur quite often and for lack of a better
-place, they are explained here.
-
-@cindex Inclusive metric
-The @emph{inclusive} value for a metric includes all values that are part of
-the dynamic extent of the target function. For example if function @code{A}
-calls functions @code{B} and @code{C}, the inclusive CPU time for @code{A} 
-includes the CPU time spent in @code{B} and @code{C}.
-
-@cindex Exclusive metric
-In contrast with this, the @emph{exclusive} value for a metric is computed
-by excluding the metric values used by other functions called. In our imaginary
-example, the exclusive CPU time for function @code{A} is the time spent outside
-calling functions @code{B} and @code{C}.
-
-@cindex Leaf function
-In case of a @emph{leaf function}, the inclusive and exclusive values for the 
-metric are the same since by definition, it is not calling any other 
-function(s).
-
-Why do we use these two different values? The inclusive metric shows the most
-expensive path, in terms of this metric, in the application. For example, if
-the metric is cache misses, the function with the highest inclusive metric
-tells you where most of the cache misses come from.
-
-Within this branch of the application, the exclusive metric points to the
-functions that contribute and help to identify which part(s) to consider
-for further analysis.
-
-@c ----------------------------------------------------------------------------
-@node    Metric Definitions
-@section Metric Definitions
-@c ----------------------------------------------------------------------------
-The metrics to be shown are highly customizable. In this section we explain 
-the definitions associated with metrics.
-
-@IndexSubentry{Commands, @code{metrics}}
-The @code{metrics} command takes a colon (:) separated list with special
-keywords. This keyword consists of the following three fields: 
-@code{<flavor>}@code{<visibility>}@code{<metric_name>}.
-
-@cindex Flavor field
-@cindex Visibility field
-@cindex Metric name field
-The @emph{<flavor>} field is either an @code{e} for ``exclusive'', or @code{i}
-for ``inclusive''. The @code{<metric_name>} field is the name of the metric
-request. The @emph{<visibility>} field consists of one ore more characters
-from the following table:
-
-@table @code
-
-@item .
-Show the metric as time. This applies to timing metrics and hardware event counters
-that measure cycles. Interpret as @code{+} for other metrics.
-
-@item %
-Show the metric as a percentage of the total value for this metric.
-
-@item +
-Show the metric as an absolute value. For hardware event counters this is
-the event count. Interpret as @code{.} for timing metrics.
-
-@item |
-Do not show any metric value. Cannot be used with other visibility characters.
-
-@end table 
-
-@c ----------------------------------------------------------------------------
-@node    The Viewmode
-@section The Viewmode
-
-@cindex Viewmode
-@IndexSubentry{Commands, @code{viewmode}}
-
-There are different ways to view a call stack in Java. In @ToolName{}, this
-is called the @emph{viewmode} and the setting is controlled through a command
-with the same name.
-
-The @code{viewmode} command takes one of the following keywords:
-
-@table @code
-
-@item user
-This is the default and shows the Java call stacks for Java threads.
-No call stacks for any housekeeping threads are shown. The function 
-list contains a function 
-@IndexSubentry{Java profiling, @code{<JVM-System>}}
-@code{<JVM-System>} that represents the aggregated time from non-Java
-threads.
-When the JVM software does not report a Java call stack, time is reported
-against the function 
-@IndexSubentry{Java profiling, @code{<no Java callstack recorded>}}
-@code{<no Java callstack recorded>}.
-
-
-@item expert
-Show the Java call stacks for Java threads when the Java code from the
-user is executed and machine call stacks when JVM code is executed, or 
-when the JVM software does not report a Java call stack. 
-Show the machine call stacks for housekeeping threads.
-
-@item machine
-Show the actual native call stacks for all threads.
-
-@end table
-
-@c ----------------------------------------------------------------------------
-@c ----------------------------------------------------------------------------
-@node    The Selection List
-@section The Selection List
-@c ----------------------------------------------------------------------------
-
-@cindex Selection list
-@cindex List specification
-Several commands allow the user to specify a subset of a list. For example,
-to select specific threads from all the threads that have been used when 
-conducting the experiment(s). 
-
-Such a selection list (or ``list'' in the remainder of this section) can be a 
-single number, a contiguous range of numbers with the start and end numbers 
-separated by a hyphen (@code{-}), a comma-separated list of numbers and 
-ranges, or the @code{all} keyword. Lists must not contain spaces. 
+The user guide for gprofng is maintained as a Texinfo manual.  If the
+@command{info} and @command{gprofng} programs are correctly installed, the
+command @command{info gprofng} should give access to this document.
 
-Each list can optionally be preceded by an experiment list with a similar 
-format, separated from the list by a colon (:). 
-If no experiment list is included, the list applies to all experiments.
-
-Multiple lists can be concatenated by separating the individual lists 
-by a plus sign.
-
-These are some examples of various filters using a list:
-
-@table @code
-
-@item thread_select 1
-Select thread 1 from all experiments.
-
-@item thread_select all:1
-Select thread 1 from all experiments.
-
-@item thread_select 1:1+2:2
-Select thread 1 from experiment 1 and thread 2 from experiment 2.
-
-@item cpu_select all:1,3,5
-Selects cores 1, 3, and 5 from all experiments.
-
-@item cpu_select 1,2:all
-Select all cores from experiments 1 and 2, as listed by the @code{by exp_list} command.
-
-@end table
-
-@c ----------------------------------------------------------------------------
-@node    Load Objects and Functions
-@section Load Objects and Functions
-@c ----------------------------------------------------------------------------
-
-An application consists of various components. The source code files are 
-compiled into object files. These are then glued together at link time to form
-the executable. 
-During execution, the program may also dynamically load objects.
-
-@cindex Load object 
-A @emph{load object} is defined to be an executable, or shared object. A shared
-library is an example of a load object in @ToolName{}.
-
-Each load object, contains a text section with the instructions generated by the 
-compiler, a data section for data, and various symbol tables.
-All load objects must contain an 
-@cindex ELF
-ELF 
-symbol table, which gives the names and addresses of all the globally known 
-functions in that object. 
-
-Load objects compiled with the -g option contain additional symbolic information 
-that can augment the ELF symbol table and provide information about functions that 
-are not global, additional information about object modules from which the functions 
-came, and line number information relating addresses to source lines.
-
-The term
-@cindex Function
-@emph{function}
-is used to describe a set of instructions that represent a high-level operation 
-described in the source code. The term also covers methods as used in C++ and in
-the Java programming language. 
-
-In the @ToolName{} context, functions are provided in source code format. 
-Normally their names appear in the symbol table representing a set of addresses. 
-@cindex Program Counter
-@cindex PC
-If the Program Counter (PC) is within that set, the program is executing within that function.
-
-In principle, any address within the text segment of a load object can be mapped to a 
-function. Exactly the same mapping is used for the leaf PC and all the other PCs on the 
-call stack. 
-
-Most of the functions correspond directly to the source model of the program, but 
-there are exceptions. This topic is however outside of the scope of this guide.
+@c man end
+@ManPageEnd{}
 
 @c ----------------------------------------------------------------------------
-@node    The Concept of a CPU in @ProductName{}
-@section The Concept of a CPU in @ProductName{}
+@c COPYRIGHT section
 @c ----------------------------------------------------------------------------
 
-@cindex CPU
-In @ProductName{}, there is the concept of a CPU. Admittedly, this is not the
-best word to describe what is meant here and may be replaced in the future.
-
-The word CPU is used in many of the displays.
-In the context of @ProductName{}, it is meant to denote a part of the 
-processor that is capable of executing instructions and with its own state, 
-like the program counter.
+@ManPageStart{COPYRIGHT}
+@c man begin COPYRIGHT
 
-For example, on a contemporary processor, a CPU could be a core. In case
-hardware threads are supported within a core, it could be one of those
-hardware threads.
+Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
 
-@c ----------------------------------------------------------------------------
-@node    Hardware Event Counters Explained
-@section Hardware Event Counters Explained
-@c ----------------------------------------------------------------------------
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3
+or any later version published by the Free Software Foundation;
+with no Invariant Sections, with no Front-Cover Texts, and with no
+Back-Cover Texts.  A copy of the license is included in the
+section entitled ``GNU Free Documentation License''.
 
-@IndexSubentry{Hardware event counters, description}
-For quite a number of years now, many microprocessors have supported hardware 
-event counters. 
-
-On the hardware side, this means that in the processor there are one or more 
-registers dedicated to count certain activities, or ``events''.
-Examples of such events are the number of instructions executed, or the number 
-of cache misses at level 2 in the memory hierarchy.
-
-While there is a limited set of such registers, the user can map events onto
-them. In case more than one register is available, this allows for the 
-simultaenous measurement of various events.
-
-A simple, yet powerful, example is to simultaneously count the number of CPU 
-cycles and the number of instructions excuted. These two numbers can then be
-used to compute the 
-@cindex IPC
-@emph{IPC} value. IPC stands for ``Instructions Per Clockcycle'' and each processor 
-has a maximum. For example, if this maximum number is 2, it means the 
-processor is capable of executing two instructions every clock cycle.
-
-Whether this is actually achieved, depends on several factors, including the
-instruction characteristics.
-However, in case the IPC value is well below this maximum in a time critical 
-part of the application and this cannot be easily explained, further 
-investigation is probably warranted.
-
-@cindex CPI
-A related metric is called @emph{CPI}, or ``Clockcycles Per Instruction''.
-It is the inverse of the CPI and can be compared against the theoretical
-value(s) of the target instruction(s). A significant difference may point
-at a bottleneck.
-
-One thing to keep in mind is that the value returned by a counter can either
-be the number of times the event occured, or a CPU cycle count. In case of 
-the latter it is possible to convert this number to time.
-
-@IndexSubentry{Hardware event counters, variable CPU frequency}
-This is often easier to interpret than a simple count, but there is one
-caveat to keep in mind. The CPU frequency may not have been constant while
-the experimen was recorded and this impacts the time reported.
-
-These event counters, or ``counters'' for short, provide great insight into
-what happens deep inside the processor. In case higher level information does
-not provide the insight needed, the counters provide the information to get 
-to the bottom of a performance problem.
-
-There are some things to consider though. 
-
-@itemize @bullet
-
-@item
-The event definitions and names vary across processors and it may even happen 
-that some events change with an update.
-Unfortunately and this is luckily rare, there are sometimes bugs causing the 
-wrong count to be returned.
-
-@IndexSubentry{Hardware event counters, alias name}
-In @ToolName{}, some of the processor specific event names have an alias 
-name. For example @code{insts} measures the instructions executed. 
-These aliases not only makes it easier to identify the functionality, but also 
-provide portability of certain events across processors.
-
-@item
-Another complexity is that there are typically many events one can monitor.
-There may up to hundreds of events available and it could require several
-experiments to zoom in on the root cause of a performance problem.
-
-@item
-There may be restrictions regarding the mapping of event(s) onto the 
-counters. For example, certain events may be restricted to specific 
-counters only. As a result, one may have to conduct additional experiments
-to cover all the events of interest.
-
-@item
-The names of the events may also not be easy to interpret. In such cases,
-the description can be found in the architecture manual for the processor.
-
-@end itemize
-
-Despite these drawbacks, hardware event counters are extremely useful and
-may even turn out to be indispensable.
+@c man end
+@ManPageEnd{}
 
 @c ----------------------------------------------------------------------------
-@node    apath
-@section What is <apath>?
-@c ----------------------------------------------------------------------------
-
-In most cases, @ToolName{} shows the absolute pathnames of directories. These
-tend to be rather long, causing display issues in this document. 
-
-Instead of wrapping these long pathnames over multiple lines, we decided to 
-represent them by the @code{<apath>} symbol, which stands for ``an absolute 
-pathname''.
-
-Note that different occurrences of @code{<apath>} may represent different
-absolute pathnames.
-
-@c -- A new node --------------------------------------------------------------
-@node    Other Document Formats
-@chapter Other Document Formats
+@c If this text is used for a man page, exit.  Otherwise we need to continue.
 @c ----------------------------------------------------------------------------
 
-This document is written in Texinfo and the source text is made available as
-part of the binutils distribution. The file name is @code{gprofng.texi} and
-can be found in subdirectory @code{doc} under directory @code{gprofng} in the 
-top level directory.
-
-This file can be used to generate the document in the @code{info}, @code{html}, 
-and @code{pdf} formats.
-The default installation procedure creates a file in the @code{info} format and 
-stores it in the documentation section of binutils.
-
-The probably easiest way to generate a different format from this Texinfo 
-document is to go to the distribution directory that was created when the 
-tools were built.
-This is either the default distribution directory, or the one that has been set
-with the @code{--prefix} option as part of the @code{configure} command.
-In this example we symbolize this location with @code{<dist>}.
-
-The make file called @code{Makefile} in directory @code{<dist>/gprofng/doc}
-supports several commands to generate this document in different formats. 
-We recommend to use these commands.
-
-They create the file(s) and install it in the documentation directory of binutils,
-which is @code{<dist>/share/doc} in case @code{html} or @code{pdf} is selected and
-@code{<dist>/share/info} for the file in the @code{info} format.
-
-To generate this document in the requested format and install it in the documentation 
-directory, the commands below should be executed. In this notation, @code{<format>} 
-is one of @code{info}, @code{html}, or @code{pdf}:
-
-@smallexample
-@verbatim
-$ cd <dist>/gprofng/doc
-$ make install-<format>
-@end verbatim
-@end smallexample
-
-@noindent
-Some things to note:
-
-@itemize
-
-@item
-For the @code{pdf} file to be generated, the 
-@cindex TeX
-TeX document formatting software is required and the relevant commmands need
-to be included in the search path. An example of a popular TeX implementation 
-is @emph{TexLive}. It is beyond the scope of this document to go into the
-details of installing and using TeX, but it is well documented elsewhere.
-
-@item
-Instead of generating a single file in the @code{html} format, it is also 
-possible to create a directory with individual files for the various chapters. 
-To do so, remove the use of @code{--no-split} in variable @code{MAKEINFOHTML}
-in the make file in the @code{doc} directory.
-
-@item
-The make file also supports commands to only generate the file in the desired
-format and not move them to the documentation directory. This is
-accomplished through the @code{make <format>} command.
-
-@end itemize
-
-@ifnothtml
-@node       Index
-@unnumbered Index
-@printindex cp
-@end ifnothtml
-
+@ifset man
 @bye
+@end ifset
diff --git a/gprofng/doc/gprofng_ug.texi b/gprofng/doc/gprofng_ug.texi
new file mode 100644
index 0000000..1fe95c7
--- /dev/null
+++ b/gprofng/doc/gprofng_ug.texi
@@ -0,0 +1,4396 @@
+\input texinfo @c -*-texinfo-*-
+
+@c ----------------------------------------------------------------------------
+@c This is the Texinfo source file for the GPROFNG manual.  This manual
+@c includes the man pages for the various tools.
+@c
+@c Author: Ruud van der Pas
+@c ----------------------------------------------------------------------------
+
+@c %**start of header
+
+@setfilename gprofng.info
+@settitle GNU gprofng
+
+@c -- Set the indent for the @example command to 1 space, not 5 ---------------
+@exampleindent 1
+
+@paragraphindent 3
+
+@c %**end of header
+
+@c -- Start a new chapter on a new, odd numbered, page ------------------------
+@setchapternewpage odd
+
+@c -- Merge all index entries into the Concepts Index -------------------------
+@syncodeindex fn cp
+@syncodeindex ky cp
+@syncodeindex pg cp
+@syncodeindex vr cp
+
+@c -- Macros specific to gprofng ----------------------------------------------
+@include gp-macros.texi
+
+@c -- Get the version information ---------------------------------------------
+@include version.texi
+
+@c -- Entry for the Info dir structure ----------------------------------------
+@ifnottex
+@dircategory Software development
+@direntry
+* gprofng: (gprofng).                    The next generation profiling tool for Linux
+@end direntry
+@end ifnottex
+
+@c -- Copyright stuff ---------------------------------------------------------
+@copying
+This document is the manual for @ProductName{}, last updated @value{UPDATED}.
+
+Copyright @copyright{} 2022-2023 Free Software Foundation, Inc.
+
+@c -- @quotation
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License,
+Version 1.3 or any later version published by the Free Software
+Foundation; with no Invariant Sections, with no Front-Cover texts,
+and with no Back-Cover Texts.  A copy of the license is included in the
+section entitled ``GNU Free Documentation License.''
+
+@c -- @end quotation
+@end copying
+
+@finalout
+@smallbook
+
+@c -- Define the title page ---------------------------------------------------
+@titlepage
+@title GNU gprofng
+@subtitle The next generation profiling tool for Linux
+@subtitle version @value{VERSION} (last updated @value{UPDATED})
+@author Ruud van der Pas
+@page
+@vskip 0pt plus 1filll
+@insertcopying
+@end titlepage
+
+@c -- Generate the Table of Contents ------------------------------------------
+@contents
+
+@c -- The Top node ------------------------------------------------------------
+@c Should contain a short summary, copying permissions and a master menu.
+@c ----------------------------------------------------------------------------
+@ifnottex
+@node Top
+@top  GNU Gprofng
+
+@insertcopying
+@end ifnottex
+
+@ifinfo
+@c -- The menu entries --------------------------------------------------------
+
+@c * Display Source Code::             Display the source code and disassembly.
+@c * Archive Experiment Data::         Archive an experiment.
+
+@menu
+* Introduction::                    About this manual.
+* Overview::                        A brief overview of @ProductName{}.
+* A Mini Tutorial::                 A short tutorial covering the key features.
+* The gprofng Tools::               An overview of the tools supported.
+* Performance Data Collection::     Record the performance information.
+* View the Performance Information:: Different ways to view the data.
+* Terminology::                     Concepts and terminology explained.
+* Other Document Formats::          Create this document in other formats.
+* The gprofng Man Pages::           The gprofng man pages.
+* Index::                           The index.
+
+@detailmenu
+
+--- The Detailed Node Listing ---
+
+Introduction
+
+Overview
+
+* Main Features::                     A high level overview.
+* Sampling versus Tracing::           The pros and cons of sampling versus tracing.
+* Steps Needed to Create a Profile::  How to create a profile.
+
+A Mini Tutorial
+
+* Getting Started::                 The basics of profiling with @ProductName().
+* Support for Multithreading::      Commands specific to multithreaded applications.
+* View Multiple Experiments::       Analyze multiple experiments simultaneously.
+* Profile Hardware Event Counters:: How to use hardware event counters.
+* Java Profiling::                  How to profile a Java application.
+
+The gprofng Tools
+
+* Tools Overview::                            A brief description of the tools.
+* The gprofng.rc file with default settings:: Customize the settings.
+* Filters::                                   Filters.
+* Supported Environment Variables::           The supported environment variables.
+
+Terminology
+
+* The Program Counter::                    What is a Program Counter?
+* Inclusive and Exclusive Metrics::        An explanation of inclusive and exclusive metrics.
+* Metric Definitions::                     Definitions associated with metrics.
+* The Viewmode::                           Select the way call stacks are presented.
+* The Selection List::                     How to define a selection.
+* Load Objects and Functions::             The components in an application.
+* The Concept of a CPU in @ProductName{}:: The definition of a CPU.
+* Hardware Event Counters Explained::      What are event counters?
+* apath::                                  Our generic definition of a path.
+
+The gprofng Man Pages
+
+* gprofng collect app::                    The man page for gprofng collect app.
+* gprofng display text::                   The man page for gprofng display text.
+* gprofng display src::                    The man page for gprofng display src.
+* gprofng display html::                   The man page for gprofng display html.
+* gprofng archive::                        The man page for gprofng archive.
+
+@c -- Index
+
+@end detailmenu
+@end menu
+@end ifinfo
+
+@c -- A new node --------------------------------------------------------------
+@node    Introduction
+@chapter Introduction
+@c ----------------------------------------------------------------------------
+The @ProductName{} tool is the next generation profiler for Linux.  It consists
+of various commands to generate and display profile information.
+
+This manual starts with a tutorial how to create and interpret a profile.  This
+part is highly practical and has the goal to get users up to speed as quickly
+as possible.  As soon as possible, we would like to show you how to get your
+first profile on your screen.
+
+This is followed by more examples, covering many of the features.  At the
+end of this tutorial, you should feel confident enough to tackle the more
+complex tasks.
+
+In a future update a more formal reference manual will be included as well.
+Since even in this tutorial we use certain terminology, we have included a
+chapter with descriptions at the end.  In case you encounter unfamiliar
+wordings or terminology, please check this chapter.
+
+One word of caution.  In several cases we had to somewhat tweak the screen
+output in order to make it fit.  This is why the output may look somewhat
+different when you try things yourself.
+
+For now, we wish you a smooth profiling experience with @ProductName{} and
+good luck tackling performance bottlenecks.
+
+@c -- A new node --------------------------------------------------------------
+@c cccccc @node    A Brief Overview of @ProductName{}
+@node    Overview
+@chapter A Brief Overview of @ProductName{}
+@c ----------------------------------------------------------------------------
+
+@menu
+* Main Features::                     A high level overview.
+* Sampling versus Tracing::           The pros and cons of sampling versus tracing.
+* Steps Needed to Create a Profile::  How to create a profile.
+@end menu
+
+Before we cover this tool in quite some detail, we start with a brief overview
+of what it is, and the main features.  Since we know that many of you would
+like to get started rightaway, already in this first chapter we explain the
+basics of profiling with @ToolName{}.
+
+@c ----------------------------------------------------------------------------
+@c TBD Review this text.  Probably be more specific on the gcc releases and
+@c processor specifics.
+@c ----------------------------------------------------------------------------
+
+@c -- A new node --------------------------------------------------------------
+@node    Main Features
+@section Main Features
+@c ----------------------------------------------------------------------------
+
+@noindent
+These are the main features of the @ProductName{} tool:
+
+@itemize @bullet
+
+@item
+Profiling is supported for an application written in C, C++, Java, or Scala.
+
+@c TBD Java: up to 1.8 full support, support other than for modules
+
+@item
+Shared libraries are supported.  The information is presented at the instruction
+level.
+
+@item
+The following multithreading programming models are supported: Pthreads,
+OpenMP, and Java threads.
+
+@item
+This tool works with unmodified production level executables.  There is no need to
+recompile the code, but if the @samp{-g} option has been used when building
+the application, source line level information is available.
+
+@item
+The focus is on support for code generated with the @command{gcc} compiler, but
+there is some limited support for the @command{icc} compiler as well.  Future
+improvements and enhancements will focus on @command{gcc} though.
+
+@item
+Processors from Intel, AMD, and Arm are supported, but the level of support
+depends on the architectural details.  In particular, hardware event counters
+may not be supported.  If this is the case, all views not related to these
+counters still ought to work though.
+
+@item
+Several views into the data are supported.  For example, a function overview
+where the time is spent, but also a source line, disassembly, call tree and
+a caller-callees overview are available.
+
+@item
+Through filters, the user can zoom in on an area of interest.
+
+@item
+Two or more profiles can be aggregated, or used in a comparison.  This comparison
+can be obtained at the function, source line, and disassembly level.
+
+@item
+Through a simple scripting language, and customization of the metrics shown,
+the generation and creation of a profile can be fully automated and provide
+tailored output.
+
+@end itemize
+
+@c -- A new node --------------------------------------------------------------
+@node    Sampling versus Tracing
+@section Sampling versus Tracing
+@c ----------------------------------------------------------------------------
+
+A key difference with some other profiling tools is that the main data
+collection command @CollectApp{} mostly uses
+@cindex Program Counter sampling
+@cindex PC sampling
+Program Counter (PC) sampling
+under the hood.
+
+With @emph{sampling}, the executable is interrupted at regular intervals.  Each
+time it is halted, key information is gathered and stored.  This includes the
+Program Counter that keeps track of where the execution is.  Hence the name.
+
+Together with operational data, this information is stored in the experiment
+directory and can be viewed in the second phase.
+
+For example, the PC information is used to derive where the program was when
+it was halted.  Since the sampling interval is known, it is relatively easy to
+derive how much time was spent in the various parts of the program.
+
+The opposite technique is generally referred to as @emph{tracing}.  With
+tracing, the target is instrumented with specific calls that collect the
+requested information.
+
+These are some of the pros and cons of PC sampling verus tracing:
+
+@itemize
+
+@item
+Since there is no need to recompile, existing executables can be used
+and the profile measures the behaviour of exactly the same executable that is
+used in production runs.
+
+With sampling, one inherently profiles a different executable, because
+the calls to the instrumentation library may affect the compiler optimizations
+and run time behaviour.
+
+@item
+With sampling, there are very few restrictions on what can be profiled and even without
+access to the source code, a basic profile can be made.
+
+@item
+A downside of sampling is that, depending on the sampling frequency, small
+functions may be missed or not captured accurately.  Although this is rare,
+this may happen and is the reason why the user has control over the sampling rate.
+
+@item
+While tracing produces precise information, sampling is statistical in nature.
+As a result, small variations may occur across seemingly identical runs.  We
+have not observed more than a few percent deviation though.  Especially if
+the target job executed for a sufficiently long time.
+
+@item
+With sampling, it is not possible to get an accurate count how often
+functions are called.
+
+@end itemize
+
+@c -- A new node --------------------------------------------------------------
+@node    Steps Needed to Create a Profile
+@section Steps Needed to Create a Profile
+@c ----------------------------------------------------------------------------
+
+Creating a profile takes two steps.  First the profile data needs to be
+generated.  This is followed by a viewing step to create a report from the
+information that has been gathered.
+
+Every @ProductName{} command starts with @ToolName{}, the name of the driver.
+This is followed by a keyword to define the high level functionality.  Depending
+on this keyword, a third qualifier may be needed to further narrow down the request.
+This combination is then followed by options that are specific to the functionality
+desired.
+
+The command to gather, or ``collect'', the performance data is called
+@CollectApp{}.  Aside from numerous options, this command takes the name
+of the target executable as an input parameter.
+
+Upon completion of the run, the performance data can be
+found in the newly created
+@cindex Experiment directory
+experiment directory.
+
+Unless explicitly specified otherwise, a default
+name for this directory is chosen.  The name is @file{test.<n>.er} where
+@var{<n>} is the first integer number not in use yet for such a name.
+
+For example, the first time @CollectApp{} is invoked, an experiment
+directory with the name @file{test.1.er} is created.
+Upon a subsequent invocation of @CollectApp{} in the same directory,
+an experiment directory with the name @file{test.2.er} will be created,
+and so forth.
+
+Note that @CollectApp{} supports an option to explicitly name the experiment
+directory.
+Aside from the restriction that the name of this directory has to end
+with @samp{.er}, any valid directory name can be used for this.
+
+Now that we have the performance data, the next step is to display it.
+
+@IndexSubentry{@code{gprofng}, @code{display text}}
+The most commonly used command to view the performance information is
+@DisplayText{}.   This is a very extensive and customizable tool that
+produces the information in ASCII format.
+
+@IndexSubentry{@code{gprofng}, @code{display html}}
+Another option is to use @DisplayHTML{}.  This tool generates a directory with
+files in html format.  These can be viewed in a browser, allowing for easy
+navigation through the profile data.
+
+@c -- A new node --------------------------------------------------------------
+@node    A Mini Tutorial
+@chapter A Mini Tutorial
+@c ----------------------------------------------------------------------------
+
+In this chapter we present and discuss the main functionality of @ToolName{}.
+This will be a practical approach, using an example code to generate profile
+data and show how to get various performance reports.
+
+@menu
+* Getting Started::                 The basics of profiling with @ProductName().
+* Support for Multithreading::      Commands specific to multithreaded applications.
+* View Multiple Experiments::       Analyze multiple experiments simultaneously.
+* Profile Hardware Event Counters:: How to use hardware event counters.
+* Java Profiling::                  How to profile a Java application.
+@end menu
+
+@c -- A new node --------------------------------------------------------------
+@node    Getting Started
+@section Getting Started
+@c ----------------------------------------------------------------------------
+
+The information presented here provides a good and common basis for many
+profiling tasks, but there are more features that you may want to leverage.
+
+These are covered in subsequent sections in this chapter.
+
+@menu
+* The Example Program::                        A description of the example program used.
+* A First Profile::                            How to get the first profile.
+* The Source Code View::                       Display the metrics in the source code.
+* The Disassembly View::                       Display the metrics at the instruction level.
+* Display and Define the Metrics::             An example how to customize the metrics.
+* Customization of the Output::                An example how to customize the output.
+* Name the Experiment Directory::              Change the name of the experiment directory.
+* Control the Number of Lines in the Output::  Change the number of lines in the tables.
+* Sorting the Performance Data::               How to set the metric to sort by.
+* Scripting::                                  Use a script to execute the commands.
+* A More Elaborate Example::                   An example of customization.
+* The Call Tree::                              Display the dynamic call tree.
+* More Information on the Experiment::         How to get additional statistics.
+* Control the Sampling Frequency::             How to control the sampling granularity.
+* Information on Load Objects::                How to get more information on load objects.
+@end menu
+
+@c -- A new node --------------------------------------------------------------
+@node       The Example Program
+@subsection The Example Program
+@c ----------------------------------------------------------------------------
+
+Throughout this guide we use the same example C code that implements the
+multiplication of a vector of length @math{n} by an @math{m} by @math{n}
+matrix.  The result is stored in a vector of length @math{m}.
+@cindex Pthreads
+@cindex Posix Threads
+The algorithm has been parallelized using Posix Threads, or Pthreads for short.
+
+The code was built using the @code{gcc} compiler and the name of the executable
+is
+@cindex mxv-pthreads
+@command{mxv-pthreads}.
+
+The matrix sizes can be set through the @code{-m} and @code{-n} options.  The
+number of threads is set with the @code{-t} option.  These are additional threads
+that are used in the multiplication. To increase the duration of the run, the
+computations are executed repeatedly.
+
+This is an example that multiplies a @math{8000} by @math{4000} matrix with
+a vector of length @math{4000}.  Although this is a multithreaded application,
+initially we will be using @math{1} threads.  Later on we will show examples
+using multiple threads.
+
+@smallexample
+@verbatim
+$ ./mxv-pthreads -m 8000 -n 4000 -t 1
+mxv: error check passed - rows = 8000 columns = 4000 threads = 1
+$
+@end verbatim
+@end smallexample
+
+The program performs an internal check to verify that the computed results
+are correct.  The result of this check is printed, as well as the matrix
+sizes and the number of threads used.
+
+@c -- A new node --------------------------------------------------------------
+@node       A First Profile
+@subsection A First Profile
+@c ----------------------------------------------------------------------------
+
+The first step is to collect the performance data.  It is important to remember
+that much more information is gathered than may be shown by default.  Often a
+single data collection run is sufficient to get a lot of insight.
+
+The @CollectApp{} command is used for the data collection.  Nothing needs to be
+changed in the way the application is executed.  The only difference is that it
+is now run under control of the tool, as shown below:
+
+@cartouche
+@smallexample
+$ gprofng collect app ./mxv-pthreads -m 8000 -n 4000 -t 1
+@end smallexample
+@end cartouche
+
+@noindent
+This produces the following output:
+
+@smallexample
+@verbatim
+Creating experiment directory test.1.er (Process ID: 2749878) ...
+mxv: error check passed - rows = 8000 columns = 4000 threads = 1
+@end verbatim
+@end smallexample
+
+We see a message that an experiment directory with the name @file{test.1.er}
+has been created.  The process id is also echoed. The application completes
+as usual and we have our first experiment directory that can be analyzed.
+
+The tool we use for this is called @DisplayText{}.  It takes the name of
+the experiment directory as an argument.
+
+@cindex Interpreter mode
+If invoked this way, the tool starts in the interactive @emph{interpreter} mode.
+While in this environment, commands can be given and the tool responds.  This is
+illustrated below:
+
+@smallexample
+@verbatim
+$ gprofng display text test.1.er
+Warning: History and command editing is not supported on this system.
+(gp-display-text) quit
+$
+@end verbatim
+@end smallexample
+
+@cindex Command line mode
+While useful in certain cases, we prefer to use this tool in command line mode
+by specifying the commands to be issued when invoking the tool.  The way to do
+this is to prepend the command(s) with a hyphen (@samp{-}) if used on the
+command line.
+
+Since this makes the command appear to be an option, they are also sometimes
+referred to as such, but technically they are commands.  This is the
+terminology we will use in this user guide, but for convenience the commands
+are also listed as options in the index.
+
+For example,
+@IndexSubentry{Options,  @code{-functions}}
+@IndexSubentry{Commands, @code{functions}}
+below we use the @command{functions} command to request a list of the functions
+that have been executed, plus their respective CPU times:
+
+@cartouche
+@smallexample
+$ gprofng display text -functions test.1.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+$ gprofng display text -functions test.1.er
+
+Functions sorted by metric: Exclusive Total CPU Time
+
+Excl. Total   Incl. Total    Name
+CPU           CPU
+ sec.      %   sec.      %
+9.367 100.00  9.367 100.00   <Total>
+8.926  95.30  8.926  95.30   mxv_core
+0.210   2.24  0.420   4.49   init_data
+0.080   0.85  0.210   2.24   drand48
+0.070   0.75  0.130   1.39   erand48_r
+0.060   0.64  0.060   0.64   __drand48_iterate
+0.010   0.11  0.020   0.21   _int_malloc
+0.010   0.11  0.010   0.11   sysmalloc
+0.      0.    8.926  95.30   <static>@0x47960 (<libgp-collector.so>)
+0.      0.    0.440   4.70   __libc_start_main
+0.      0.    0.020   0.21   allocate_data
+0.      0.    8.926  95.30   driver_mxv
+0.      0.    0.440   4.70   main
+0.      0.    0.020   0.21   malloc
+0.      0.    8.926  95.30   start_thread
+@end verbatim
+@end smallexample
+
+As easy and simple as these steps are, we do have a first profile of our program!
+
+There are five columns.  The first four contain the
+@cindex Total CPU time
+''Total CPU Time'', which
+is the sum of the user and system time. @xref{Inclusive and Exclusive Metrics}
+for an explanation of ``exclusive'' and ``inclusive'' times.
+
+The first line echoes the metric that is used to sort the output.  By default,
+this is the exclusive CPU time, but through the @command{sort} command, the sort
+metric can be changed by the user.
+
+Next, there are four columns with the exclusive and inclusive CPU times and the
+respective percentages.  This is followed by the name of the function.
+
+@IndexSubentry{Miscellaneous, @code{<Total>}}
+The function with the name @code{<Total>} is not a user function. It is a 
+pseudo function introduced by @ToolName{}.  It is used to display the
+accumulated measured metric values.  In this example, we see that the total
+CPU time of this job was 9.367 seconds and it is scaled to 100%.  All
+other percentages in the same column are relative to this number.
+
+@c -- If the metric is derived, for example the @code{IPC}, the value shown under
+@c -- @code{<Total>} is based upon the total values of the that are metrics used to
+@c -- compute the derived metric.
+@c -- @IndexSubentry{Hardware event counters, IPC}
+
+With 8.926 seconds, function @code{mxv_core} takes 95.30% of the
+total time and is by far the most time consuming function.  
+The exclusive and inclusive metrics are identical, which means that is a 
+leaf function not calling any other functions.
+
+The next function in the list is @code{init_data}.  Although with 4.49%,
+the CPU time spent in this part is modest, this is an interesting entry because
+the inclusive CPU time of 0.420 seconds is twice the exclusive CPU time
+of 0.210 seconds.  Clearly this function is calling another function,
+or even more than one function and collectively this takes 0.210 seconds.
+Below we show the call tree feature that provides more details on the call
+structure of the application.
+
+The function @code{<static>@@0x47960 (<libgp-collector.so>)} does odd and
+certainly not familiar.  It is one of the internal functions used by
+@CollectApp{} and can be ignored.  Also, while the inclusive time is high,
+the exclusive time is zero.  This means it doesn't contribute to the
+performance.
+
+The question is how we know where this function originates from?  There are
+several commands to dig deeper an get more details on a function.
+@xref{Information on Load Objects}.
+
+@c -- A new node --------------------------------------------------------------
+@node       The Source Code View
+@subsection The Source Code View
+@c ----------------------------------------------------------------------------
+
+In general, the tuning efforts are best focused on the most time consuming
+part(s) of an application.  In this case that is easy, since over 95% of
+the total CPU time is spent in function @code{mxv_core}.
+It is now time to dig deeper and look
+@cindex Source level metrics
+at the metrics distribution at the source code level.  Since we measured
+CPU times, these are the metrics shown.
+
+@IndexSubentry{Options,  @code{-source}}
+@IndexSubentry{Commands, @code{source}}
+The @code{source} command is used to accomplish this.  It takes the name of the
+function, not the source filename, as an argument.  This is demonstrated
+below, where the @DisplayText{} command is used to show the annotated
+source listing of function @code{mxv_core}.
+
+Be aware that when using the @command{gcc} compiler, the source code has to
+be compiled with the @code{-g} option in order for the source code feature
+to work.  Otherwise the location(s) can not be determined.  For other compilers
+we recommend to check the documentation for such an option.
+
+Below the command to display the source code of a function is shown.  Since at
+this point we are primarily interested in the timings only, we use the
+@code{metrics} command to request the exclusive and inclusive total CPU
+timings only. @xref{Display and Define the Metrics} for more information
+how to define the metrics to be displayed.
+
+@cartouche
+@smallexample
+$ gprofng display text -metrics ei.totalcpu -source mxv_core test.1.er
+@end smallexample
+@end cartouche
+
+The output is shown below.  It has been somewhat modified to fit the formatting
+constraints and reduce the number of lines.
+
+@smallexample
+@verbatim
+Current metrics: e.totalcpu:i.totalcpu:name
+Current Sort Metric: Exclusive Total CPU Time ( e.totalcpu )
+Source file: <apath>/mxv.c
+Object file: mxv-pthreads (found as test.1.er/archives/...)
+Load Object: mxv-pthreads (found as test.1.er/archives/...)
+
+   Excl.     Incl.
+   Total     Total
+   CPU sec.  CPU sec.
+
+   <lines deleted>
+                                <Function: mxv_core>
+                            43. void __attribute__ ((noinline)) 
+                                     mxv_core (int64_t row_index_start,
+                            44.                int64_t row_index_end,
+                            45.                int64_t m,
+                            46.                int64_t n,
+                            47.                double **restrict A,
+                            48.                double *restrict b,
+                            49.                double *restrict c)
+                            50. {
+   0.        0.             50. {
+   0.        0.             51.   for (int64_t i=row_index_start;
+                                                 i<=row_index_end; i++)
+                            52.     {
+   0.        0.             53.       double row_sum = 0.0;
+## 4.613     4.613          54.       for (int64_t j=0; j<n; j++)
+## 4.313     4.313          55.         row_sum += A[i][j] * b[j];
+   0.        0.             56.       c[i] = row_sum;
+                            57.     }
+   0.        0.             58. }
+@end verbatim
+@end smallexample
+
+The first line echoes the metrics that have been selected.  The second line
+is not very meaningful when looking at the source code listing, but it shows
+the metric that is used to sort the data.
+
+The next three lines provide information on the location of the source file,
+the object file and the load object (@xref{Load Objects and Functions}).
+
+Function @code{mxv_core} is part of a source file that has other functions
+as well.  These functions will be shown with the values for the metrics, but
+for lay-out purposes they have been removed in the output shown above.
+
+The header is followed by the annotated source code listing.  The selected
+metrics are shown first, followed by a source line number, and the source code.
+@IndexSubentry{Miscellaneous ,@code{##}}
+The most time consuming line(s) are marked with the @code{##} symbol.  In
+this way they are easier to identify and find with a search.
+
+What we see is that all of the time is spent in lines 54-55.
+
+@IndexSubentry{Options,  @code{-lines}}
+@IndexSubentry{Commands, @code{lines}}
+A related command sometimes comes handy as well.  It is called @code{lines}
+and displays a list of the source lines and their metrics, ordered according
+to the current sort metric (@xref{Sorting the Performance Data}).
+
+Below the command and the output.  For lay-out reasons, only the top 10 is
+shown here and the last part of the text on some lines has been replaced
+by dots.  The full text is @samp{instructions without line numbers} and
+means that the line number information for that function was not found.
+
+@cartouche
+@smallexample
+$ gprofng display text -lines test.1.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Lines sorted by metric: Exclusive Total CPU Time
+
+Excl. Total   Incl. Total    Name
+CPU           CPU
+ sec.      %   sec.      %
+9.367 100.00  9.367 100.00   <Total>
+4.613  49.25  4.613  49.25   mxv_core, line 54 in "mxv.c"
+4.313  46.05  4.313  46.05   mxv_core, line 55 in "mxv.c"
+0.160   1.71  0.370   3.95   init_data, line 118 in "manage_data.c"
+0.080   0.85  0.210   2.24   <Function: drand48, instructions ...>
+0.070   0.75  0.130   1.39   <Function: erand48_r, instructions ...>
+0.060   0.64  0.060   0.64   <Function: __drand48_iterate, ...>
+0.040   0.43  0.040   0.43   init_data, line 124 in "manage_data.c"
+0.010   0.11  0.020   0.21   <Function: _int_malloc, instructions ...>
+0.010   0.11  0.010   0.11   <Function: sysmalloc, instructions ...>
+@end verbatim
+@end smallexample
+
+What this overview immediately highlights is that the third most time consuming
+source line takes 0.370 seconds only.  This means that the inclusive time is
+only 3.95% and clearly this branch of the code hardly impacts the performance.
+
+@c -- A new node --------------------------------------------------------------
+@node       The Disassembly View
+@subsection The Disassembly View
+@c ----------------------------------------------------------------------------
+
+The source view is very useful to obtain more insight where the time is spent,
+but sometimes this is not sufficient.  The disassembly view provides more
+details since it shows the metrics at the instruction level.
+
+This view is displayed with the
+@IndexSubentry{Options,  @code{-disasm}}
+@IndexSubentry{Commands, @code{disasm}}
+@command{disasm}
+command and as with the source view, it displays an annotated listing.  In this
+@cindex Instruction level metrics
+case it shows the instructions with the metrics, interleaved with the
+source lines.  The
+instructions have a reference in square brackets (@code{[} and @code{]})
+to the source line they correspond to.
+
+@noindent
+We again focus on the tmings only and set the metrics accordingly:
+
+@cartouche
+@smallexample
+$ gprofng display text -metrics ei.totalcpu -disasm mxv_core test.1.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Current metrics: e.totalcpu:i.totalcpu:name
+Current Sort Metric: Exclusive Total CPU Time ( e.totalcpu )
+Source file: <apath>/src/mxv.c
+Object file: mxv-pthreads (found as test.1.er/archives/...)
+Load Object: mxv-pthreads (found as test.1.er/archives/...)
+
+   Excl.     Incl.
+   Total     Total
+   CPU sec.  CPU sec.
+
+   <lines deleted>
+                      43. void __attribute__ ((noinline)) 
+                               mxv_core (int64_t row_index_start,
+                      44.                int64_t row_index_end,
+                      45.                int64_t m,
+                      46.                int64_t n,
+                      47.                double **restrict A,
+                      48.                double *restrict b,
+                      49.                double *restrict c)
+                      50. {
+                          <Function: mxv_core>
+   0.        0.           [50]   401d56:  mov    0x8(%rsp),%r10
+                      51.   for (int64_t i=row_index_start;
+                                           i<=row_index_end; i++)
+   0.        0.           [51]   401d5b:  cmp    %rsi,%rdi
+   0.        0.           [51]   401d5e:  jg     0x47
+   0.        0.           [51]   401d60:  add    $0x1,%rsi
+   0.        0.           [51]   401d64:  jmp    0x36
+                      52.     {
+                      53.       double row_sum = 0.0;
+                      54.       for (int64_t j=0; j<n; j++)
+                      55          row_sum += A[i][j] * b[j];
+   0.        0.           [55]   401d66:  mov    (%r8,%rdi,8),%rdx
+   0.        0.           [54]   401d6a:  mov    $0x0,%eax
+   0.        0.           [53]   401d6f:  pxor   %xmm1,%xmm1
+   0.110     0.110        [55]   401d73:  movsd  (%rdx,%rax,8),%xmm0
+   1.921     1.921        [55]   401d78:  mulsd  (%r9,%rax,8),%xmm0
+   2.282     2.282        [55]   401d7e:  addsd  %xmm0,%xmm1
+## 4.613     4.613        [54]   401d82:  add    $0x1,%rax
+   0.        0.           [54]   401d86:  cmp    %rax,%rcx
+   0.        0.           [54]   401d89:  jne    0xffffffffffffffea
+                      56.       c[i] = row_sum;
+   0.        0.           [56]   401d8b:  movsd  %xmm1,(%r10,%rdi,8)
+   0.        0.           [51]   401d91:  add    $0x1,%rdi
+   0.        0.           [51]   401d95:  cmp    %rsi,%rdi
+   0.        0.           [51]   401d98:  je     0xd
+   0.        0.           [53]   401d9a:  pxor   %xmm1,%xmm1
+   0.        0.           [54]   401d9e:  test   %rcx,%rcx
+   0.        0.           [54]   401da1:  jg     0xffffffffffffffc5
+   0.        0.           [54]   401da3:  jmp    0xffffffffffffffe8
+                      57.     }
+                      58. }
+   0.        0.           [58]   401da5:  ret
+@end verbatim
+@end smallexample
+
+For each instruction, the timing values are given and we can immediately 
+identify the most expensive instructions.  As with the source level view,
+these are marked with the @code{##} symbol.
+
+It comes as no surprise that the time consuming instructions originate from
+the source code at lines 54-55.
+One thing to note is that the source line numbers no longer appear in
+sequential order. 
+This is because the compiler has re-ordered the instructions as part of
+the code optimizations it has performed. 
+
+As illustrated below and similar to the @command{lines} command, we can get
+an overview of the instructions executed by using the
+@IndexSubentry{Options,  @code{-pcs}}
+@IndexSubentry{Commands, @code{pcs}}
+@command{pcs}
+command.
+
+@noindent
+Below the command and the output, which again has been restricted
+to 10 lines.  As before, some lines have been shortened for lay-out
+purposes.
+
+@cartouche
+@smallexample
+$ gprofng display text -pcs test.1.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+PCs sorted by metric: Exclusive Total CPU Time
+
+Excl. Total   Incl. Total    Name
+CPU           CPU
+ sec.      %   sec.      %
+9.367 100.00  9.367 100.00   <Total>
+4.613  49.25  4.613  49.25   mxv_core + 0x0000002C, line 54 in "mxv.c"
+2.282  24.36  2.282  24.36   mxv_core + 0x00000028, line 55 in "mxv.c"
+1.921  20.51  1.921  20.51   mxv_core + 0x00000022, line 55 in "mxv.c"
+0.150   1.60  0.150   1.60   init_data + 0x000000AC, line 118 in ...
+0.110   1.18  0.110   1.18   mxv_core + 0x0000001D, line 55 in "mxv.c"
+0.040   0.43  0.040   0.43   drand48 + 0x00000022
+0.040   0.43  0.040   0.43   init_data + 0x000000F1, line 124 in ...
+0.030   0.32  0.030   0.32   __drand48_iterate + 0x0000001E
+0.020   0.21  0.020   0.21   __drand48_iterate + 0x00000038
+@end verbatim
+@end smallexample
+
+@noindent
+What we see is that the top three instructions take 94% of the total CPU time
+and any optimizations should focus on this part of the code..
+
+@c -- A new node --------------------------------------------------------------
+@node       Display and Define the Metrics
+@subsection Display and Define the Metrics
+@c ----------------------------------------------------------------------------
+
+The metrics shown by @DisplayText{} are useful, but there is more recorded
+than displayed by default.  We can customize the values shown by defining the
+metrics ourselves.
+
+There are two commands related to changing the metrics shown:
+@IndexSubentry{Options,  @code{-metric_list}}
+@IndexSubentry{Commands, @code{metric_list}}
+@command{metric_list} and
+@IndexSubentry{Options,  @code{-metrics}}
+@IndexSubentry{Commands, @code{metrics}}
+@command{metrics}.
+
+The first command shows the currently selected metrics, plus all the metrics
+that have been stored as part of the experiment.  The second command may be
+used to define the metric list.
+
+@noindent
+This is the way to get the information about the metrics:
+
+@IndexSubentry{Options,  @code{-metric_list}}
+@IndexSubentry{Commands, @code{metric_list}}
+@cartouche
+@smallexample
+$ gprofng display text -metric_list test.1.er
+@end smallexample
+@end cartouche
+
+@noindent
+This is the output:
+
+@smallexample
+@verbatim
+Current metrics: e.%totalcpu:i.%totalcpu:name
+Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
+Available metrics:
+Exclusive Total CPU Time: e.%totalcpu
+Inclusive Total CPU Time: i.%totalcpu
+                    Size: size
+              PC Address: address
+                    Name: name
+@end verbatim
+@end smallexample
+
+This shows the metrics that are currently used, the metric that is used to sort
+the data and all the metrics that have been recorded, but are not necessarily
+shown.
+
+@cindex Default metrics
+In this case, the current metrics are set to the exclusive and inclusive
+total CPU times, the respective percentages, and the name of the function,
+or load object.
+
+@IndexSubentry{Options,  @code{-metrics}}
+@IndexSubentry{Commands, @code{metrics}}
+The @code{metrics} command is used to define the metrics that need to be
+displayed.
+
+For example, to swap the exclusive and inclusive metrics, use the following
+metric definition: @code{i.%totalcpu:e.%totalcpu}.
+
+Since the metrics can be tailored for different views, there is also a way
+to reset them to the default.  This is done through the special keyword
+@code{default} for the metrics definition (@command{-metrics default}).
+@IndexSubentry{Metrics, Reset to default}
+
+@c -- A new node --------------------------------------------------------------
+@node       Customization of the Output
+@subsection Customization of the Output
+@c ----------------------------------------------------------------------------
+
+With the information just given, the function overview can be customized.
+For sake of the example, we would like to display the name of the function
+first, only followed by the exclusive CPU time, given as an absolute number
+and a percentage.
+
+Note that the commands are parsed in order of appearance.  This is why we
+need to define the metrics @emph{before} requesting the function overview:
+
+@cartouche
+@smallexample
+$ gprofng display text -metrics name:e.%totalcpu -functions test.1.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Current metrics: name:e.%totalcpu
+Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
+Functions sorted by metric: Exclusive Total CPU Time
+
+Name                                     Excl. Total
+                                         CPU
+                                          sec.      %
+ <Total>                                 9.367 100.00
+ mxv_core                                8.926  95.30
+ init_data                               0.210   2.24
+ drand48                                 0.080   0.85
+ erand48_r                               0.070   0.75
+ __drand48_iterate                       0.060   0.64
+ _int_malloc                             0.010   0.11
+ sysmalloc                               0.010   0.11
+ <static>@0x47960 (<libgp-collector.so>) 0.      0.
+ __libc_start_main                       0.      0.
+ allocate_data                           0.      0.
+ driver_mxv                              0.      0.
+ main                                    0.      0.
+ malloc                                  0.      0.
+ start_thread                            0.      0.
+@end verbatim
+@end smallexample
+
+This was a first and simple example how to customize the output.  Note that we
+did not rerun our profiling job and merely modified the display settings.
+Below we will show other and also more advanced examples of customization.
+
+@c -- A new node --------------------------------------------------------------
+@node       Name the Experiment Directory
+@subsection Name the Experiment Directory
+@c ----------------------------------------------------------------------------
+
+When using @CollectApp{}, the default names for experiments work fine, but
+they are quite generic.  It is often more convenient to select a more
+descriptive name.  For example, one that reflects conditions for the experiment
+conducted, like the number of threads used.
+
+For this, the mutually exclusive @code{-o} and @code{-O} options come in handy.
+Both may be used to provide a name for the experiment directory, but the
+behaviour of @CollectApp{} is different.
+
+With the
+@IndexSubentry{Options, @code{-o}}
+@samp{-o}
+option, an existing experiment directory is not overwritten.  Any directory
+with the same name either needs to be renamed, moved, or removed, before the
+experiment can be conducted.
+
+This is in contrast with the behaviour for the
+@IndexSubentry{Options, @code{-O}}
+@samp{-O}
+option.  Any existing directory with the same name is silently overwritten.
+
+Be aware that the name of the experiment directory has to end with @file{.er}.
+
+@c -- A new node --------------------------------------------------------------
+@node       Control the Number of Lines in the Output
+@subsection Control the Number of Lines in the Output
+@c ----------------------------------------------------------------------------
+
+@IndexSubentry{Options,  @code{-limit}}
+@IndexSubentry{Commands, @code{limit}}
+The @command{limit} @var{<n>} command can be used to control the number of lines
+printed in various views. For example it impacts the function view, but also
+takes effect for other display commands, like @command{lines}.
+
+The argument @var{<n>} should be a positive integer number.  It sets the number
+of lines in the (function) view.  A value of zero resets the limit to the
+default.
+
+Be aware that the pseudo-function @code{<Total>} counts as a regular function.
+For example @command{limit 10} displays nine user level functions.
+
+@c -- A new node --------------------------------------------------------------
+@node       Sorting the Performance Data
+@subsection Sorting the Performance Data
+@c ----------------------------------------------------------------------------
+
+@IndexSubentry{Options,  @code{-sort}}
+@IndexSubentry{Commands, @code{sort}}
+The @command{sort} @var{<key>} command sets the key to be used when sorting the
+performance data.
+
+The key is a valid metric definition, but the
+@IndexSubentry{Metrics, Visibility field}
+visibility field
+(@xref{Metric Definitions})
+in the metric
+definition is ignored, since this does not affect the outcome of the sorting
+operation.
+For example if the sort key is set to @code{e.totalcpu}, the values
+will be sorted in descending order with respect to the exclusive total
+CPU time.
+
+@IndexSubentry{Sort, Reverse order}
+The data can be sorted in reverse order by prepending the metric definition
+with a minus (@samp{-}) sign.  For example @command{sort -e.totalcpu}.
+
+@IndexSubentry{Sort, Reset to default}
+A default metric for the sort operation has been defined and since this is
+a persistent command, this default can be restored with @code{default} as
+the key (@command{sort default}).
+
+@c -- A new node --------------------------------------------------------------
+@node       Scripting
+@subsection Scripting
+@c ----------------------------------------------------------------------------
+
+@cindex Script files
+The list with commands for @DisplayText{} can be very long.  This is tedious
+and also error prone.  Luckily, there is an easier and elegant way to control
+the output of this tool.
+
+@IndexSubentry{Options,  @code{-script}}
+@IndexSubentry{Commands, @code{script}}
+Through the @command{script} command, the name of a file with commands can be
+passed in.  These commands are parsed and executed as if they appeared on
+the command line in the same order as encountered in the file.  The commands
+in this script file can actually be mixed with commands on the command line
+and multiple script files may be used.
+The difference between the commands in the script file and those used on the
+command line is that the latter require a leading dash (@samp{-}) symbol.
+
+Comment lines in a script file are supported.  They need to start with the
+@samp{#} symbol.
+
+@c -- A new node --------------------------------------------------------------
+@node       A More Elaborate Example
+@subsection A More Elaborate Example
+@c ----------------------------------------------------------------------------
+
+With the information presented so far, we can customize our data
+gathering and display commands.
+
+As an example, we would like to use @file{mxv.1.thr.er} as the name for the
+experiment directory.  In this way, the name of the algorithm and the
+number of threads that were used are included in the name.
+We also don't mind to overwrite an existing
+experiment directory with the same name.
+
+All that needs to be done is to use the
+@IndexSubentry{Options, @code{-O}}
+@samp{-O}
+option, followed by the directory name of choice when running @CollectApp{}:
+
+@cartouche
+@smallexample
+$ exe=mxv-pthreads
+$ m=8000
+$ n=4000
+$ gprofng collect app -O mxv.1.thr.er ./$exe -m $m -n $n -t 1
+@end smallexample
+@end cartouche
+
+Since we want to customize the profile and prefer to keep the command line
+short, the commands to generate the profile are put into a file with the
+name @file{my-script}:
+
+@smallexample
+@verbatim
+$ cat my-script
+# This is my first gprofng script
+# Set the metrics
+metrics i.%totalcpu:e.%totalcpu:name
+# Use the exclusive time to sort
+sort e.totalcpu
+# Limit the function list to 5 lines
+limit 5
+# Show the function list
+functions
+@end verbatim
+@end smallexample
+
+This script file is specified as input to the @DisplayText{} command
+that is used to display the performance information stored in experiment
+directory @file{mxv.1.thr.er}:
+
+@cartouche
+@smallexample
+$ gprofng display text -script my-script mxv.1.thr.er
+@end smallexample
+@end cartouche
+
+This command produces the following output:
+
+@smallexample
+@verbatim
+# This is my first gprofng script
+# Set the metrics
+Current metrics: i.%totalcpu:e.%totalcpu:name
+Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
+# Use the exclusive time to sort
+Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
+# Limit the function list to 5 lines
+Print limit set to 5
+# Show the function list
+Functions sorted by metric: Exclusive Total CPU Time
+
+Incl. Total   Excl. Total    Name
+CPU           CPU
+ sec.      %   sec.      %
+9.703 100.00  9.703 100.00   <Total>
+9.226  95.09  9.226  95.09   mxv_core
+0.455   4.69  0.210   2.17   init_data
+0.169   1.75  0.123   1.26   erand48_r
+0.244   2.52  0.075   0.77   drand48
+@end verbatim
+@end smallexample
+
+In the first part of the output the comment lines in the script file are
+echoed.  These are interleaved with an acknowledgement message for the commands.
+
+This is followed by a profile consisting of 5 lines only.  For both metrics,
+the percentages plus the timings are given.  The numbers are sorted with respect
+to the exclusive total CPU time.  Although this is the default, for
+demonstration purposes we use the @command{sort} command to explicitly define
+the metric for the sort.
+
+While we executed the same job as before and only changed the name of the
+experiment directory, the results are somewhat different.  This is sampling
+in action.  The numbers are not all that different though.
+It is seen that function @code{mxv_core} is responsbile for
+95% of the CPU time and @code{init_data} takes 4.5% only.
+
+@c -- A new node --------------------------------------------------------------
+@node       The Call Tree
+@subsection The Call Tree
+@c ----------------------------------------------------------------------------
+
+The call tree shows the dynamic structure of the application by displaying the
+functions executed and their parent.  The CPU time attributed to each function
+is shown as well. This view helps to find the most expensive
+execution path in the program.
+
+@IndexSubentry{Options,  @code{-calltree}}
+@IndexSubentry{Commands, @code{calltree}}
+This feature is enabled through the @command{calltree} command.  For example,
+this is how to get the call tree for our current experiment:
+
+@cartouche
+@smallexample
+$ gprofng display text -calltree mxv.1.thr.er
+@end smallexample
+@end cartouche
+
+This displays the following structure:
+
+@smallexample
+@verbatim
+Functions Call Tree. Metric: Attributed Total CPU Time
+
+Attr. Total    Name
+CPU
+ sec.      %
+9.703 100.00   +-<Total>
+9.226  95.09     +-start_thread
+9.226  95.09     |  +-<static>@0x47960 (<libgp-collector.so>)
+9.226  95.09     |    +-driver_mxv
+9.226  95.09     |      +-mxv_core
+0.477   4.91     +-__libc_start_main
+0.477   4.91       +-main
+0.455   4.69         +-init_data
+0.244   2.52         |  +-drand48
+0.169   1.75         |    +-erand48_r
+0.047   0.48         |      +-__drand48_iterate
+0.021   0.22         +-allocate_data
+0.021   0.22         |  +-malloc
+0.021   0.22         |    +-_int_malloc
+0.006   0.06         |      +-sysmalloc
+0.003   0.03         |        +-__default_morecore
+0.003   0.03         |          +-sbrk
+0.003   0.03         |            +-brk
+0.001   0.01         +-pthread_create
+0.001   0.01           +-__pthread_create_2_1
+@end verbatim
+@end smallexample
+
+At first sight this may not be what is expected and some explanation is in
+place.
+
+@c ----------------------------------------------------------------------------
+@c TBD: Revise this text when we have user and machine mode.
+@c ----------------------------------------------------------------------------
+The top function is the pseudo-function @code{<Total>} that we have seen
+before.  It is introduced and shown here to provide the total value of the
+metric(s).
+
+We also see function @code{<static>@@0x47960} in the call tree and apparently
+it is from @code{libgp-collector.so}, a library that is internal to
+@ToolName{}.
+The @code{<static>} marker, followed by the program counter, is shown if the
+name of the function cannot be found.  This function is part of the
+implementation of the data collection process and should be hidden to the
+user.  This is part of a planned future enhancement.
+
+In general, if a view has a function that does not appear to be part of the
+user code, or seems odd anyhow, the @command{objects} and @command{fsingle}
+@IndexSubentry{Options,  @code{-objects}}
+@IndexSubentry{Commands, @code{objects}}
+@IndexSubentry{Options,  @code{-fsingle}}
+@IndexSubentry{Commands, @code{fsingle}}
+commands are very useful
+to find out more about load objects in general, but also to help identify
+an unknown entry in the function overview. @xref{Load Objects and Functions}.
+
+Another thing to note is that there are two main branches.  The one under
+@code{<static>@@0x47960} and the second one under @code{__libc_start_main}.
+This reflects the fact that this is a multithreaded program and the
+threaded part shows up as a separate branch in the call tree.
+
+The way to interpret this structure is as follows.  The program starts
+under control of @code{__libc_start_main}.  This executes the main program
+called @code{main}, which at the top level executes functions
+@code{init_data}, @code{allocate_data}, and @code{pthread_create}.
+The latter function creates and executes the additional thread(s).
+
+For this multithreaded part of the code, we need to look at the branch
+under function @code{start_thread} that calls the driver code for the
+matrix-vector multiplication (@code{driver_mxv}), which executes the function
+that performs the actual multiplication (@code{mxv_core}).
+
+There are two things worth noting for the call tree feature:
+
+@itemize
+
+@item
+This is a dynamic tree and since sampling is used, it most likely looks
+slighlty different across seemingly identical profile runs.  In case the
+run times are short, it is worth considering to use a high resolution
+through the
+@IndexSubentry{Options, @code{-p}}
+@samp{-p}
+option.  For example use @samp{-p hi} to increase the sampling rate.
+
+@item
+In case hardware event counters have been enabled
+(@xref{Profile Hardware Event Counters}), these values are also displayed
+in the call tree view.
+
+@end itemize
+
+@c -- A new node --------------------------------------------------------------
+@node       More Information on the Experiment
+@subsection More Information on the Experiment
+@c ----------------------------------------------------------------------------
+
+The experiment directory not only contains performance related data.  Several
+system characteristics, the profiling command executed, plus some global
+performance statistics are stored and can be displayed.
+
+@IndexSubentry{Options,  @code{-header}}
+@IndexSubentry{Commands, @code{header}}
+The @command{header} command displays information about the experiment(s).
+For example, this is command is used to extract this data from for our
+experiment directory:
+
+@cartouche
+@smallexample
+$ gprofng display text -header mxv.1.thr.er
+@end smallexample
+@end cartouche
+
+The above command prints the following information.  Note that some of the
+lay-out and the information has been modified.  Directory paths have been
+replaced @code{<apath>} for example.  Textual changes are
+marked with the @samp{<} and @samp{>} symbols.
+
+@smallexample
+@verbatim
+Experiment: mxv.1.thr.er
+No errors
+No warnings
+Archive command ` /usr/bin/gp-archive -n -a on --outfile
+                     <apath>/archive.log <apath>/mxv.1.thr.er'
+
+Target command (64-bit): './mxv-pthreads -m 8000 -n 4000 -t 1'
+Process pid 2750071, ppid 2750069, pgrp 2749860, sid 2742080
+Current working directory: <apath>
+Collector version: `2.40.00'; experiment version 12.4 (64-bit)
+Host `<the-host-name>', OS `Linux <version>', page size 4096,
+                     architecture `x86_64'
+  4 CPUs, clock speed 2294 MHz.
+  Memory: 3506491 pages @  4096 = 13697 MB.
+Data collection parameters:
+  Clock-profiling, interval = 997 microsecs.
+  Periodic sampling, 1 secs.
+  Follow descendant processes from: fork|exec|combo
+
+Experiment started <date and time>
+
+Experiment Ended: 9.801216173
+Data Collection Duration: 9.801216173
+@end verbatim
+@end smallexample
+
+The output above may assist in troubleshooting, or to verify some of the
+operational conditions and we recommend to include this command when
+generating a profile.
+
+@IndexSubentry{Options, @code{-C}}
+Related to this command there is a useful option to record comment(s) in
+an experiment.
+To this end, use the @samp{-C} option on the @CollectApp{} tool to
+specify a comment string.  Up to ten comment lines can be included.
+These comments are displayed with the @command{header} command on
+the @DisplayText{} tool.
+
+@IndexSubentry{Options,  @code{-overview}}
+@IndexSubentry{Commands, @code{overview}}
+The @command{overview} command displays information on the experiment(s) and
+also shows a summary of the values for the metric(s) used.  This is an example
+how to use it on the newly created experiment directory:
+
+@cartouche
+@smallexample
+$ gprofng display text -overview mxv.1.thr.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Experiment(s):
+
+Experiment      :mxv.1.thr.er
+  Target        : './mxv-pthreads -m 8000 -n 4000 -t 1'
+  Host          : <hostname> (<ISA>, Linux <version>)
+  Start Time    : <date and time>
+  Duration      : 9.801 Seconds
+
+Metrics:
+
+  Experiment Duration (Seconds): [9.801]
+  Clock Profiling
+    [X]Total CPU Time - totalcpu (Seconds): [*9.703]
+
+Notes: '*' indicates hot metrics, '[X]' indicates currently enabled
+       metrics.
+       The metrics command can be used to change selections. The
+       metric_list command lists all available metrics.
+@end verbatim
+@end smallexample
+
+This command provides a dashboard overview that helps to easily identify
+where the time is spent and in case hardware event counters are used, it
+shows their total values.
+
+@c -- A new node --------------------------------------------------------------
+@node       Control the Sampling Frequency
+@subsection Control the Sampling Frequency
+@c ----------------------------------------------------------------------------
+
+@cindex Sampling frequency
+So far we did not go into details on the frequency of the sampling process,
+but in some cases it is useful to change the default of 10 milliseconds.
+
+The advantage of increasing the sampling frequency is that functions that
+do not take much time per invocation are more accurately captured.  The
+downside is that more data is gathered.  This has an impact on the overhead
+of the collection process and more disk space is required.
+
+In general this is not an immediate concern, but with heavily threaded
+applications that run for an extended period of time, increasing the
+frequency may have a more noticeable impact.
+
+@IndexSubentry{Options, @code{-p}}
+The @code{-p} option on the @CollectApp{} tool is used to enable or disable
+clock based profiling, or to explicitly set the sampling rate.
+@cindex Sampling interval
+This option takes one of the following keywords:
+
+@table @code
+
+@item off
+Disable clock based profiling.
+
+@item on
+Enable clock based profiling with a per thread sampling interval of 10 ms.
+This is the default.
+
+@item lo
+Enable clock based profiling with a per thread sampling interval of 100 ms.
+
+@item hi
+Enable clock based profiling with a per thread sampling interval of 1 ms.
+
+@item @var{value}
+@cindex Sampling interval
+Enable clock based profiling with a per thread sampling interval of
+@var{value}.
+
+@end table
+
+It may seem unnecessary to have an option to disable clock based profiling,
+but there is a good reason to support this.
+By default, clock profiling  is enabled when conducting hardware event counter
+experiments (@xref{Profile Hardware Event Counters}).
+With the @code{-p off} option, this can be disabled.
+
+If an explicit value is set for the sampling, the number can be an integer or a
+floating-point number.
+A suffix of @samp{u} for microseconds, or @samp{m} for milliseconds is supported.
+If no suffix is used, the value is assumed to be in milliseconds.
+
+For example, the following command sets the sampling rate to
+5123.4 microseconds:
+
+@cartouche
+@smallexample
+$ gprofng collect app -p 5123.4u ./mxv-pthreads -m 8000 -n 4000 -t 1
+@end smallexample
+@end cartouche
+
+If the value is smaller than the clock profiling minimum, a warning message is issued
+and it is set to the minimum.
+In case it is not a multiple of the clock profiling resolution, it is silently rounded
+down to the nearest multiple of the clock resolution.
+If the value exceeds the clock profiling maximum, is negative, or zero, an error is
+reported.
+
+@IndexSubentry{Options,  @code{-header}}
+@IndexSubentry{Commands, @code{header}}
+@noindent
+Note that the @code{header} command echoes the sampling rate used.
+
+@c -- A new node --------------------------------------------------------------
+@node       Information on Load Objects
+@subsection Information on Load Objects
+@c ----------------------------------------------------------------------------
+
+It may happen that the function view shows a function that is not known to
+the user.  This can easily happen with library functions for example.
+Luckily there are three commands that come in handy then.
+
+@IndexSubentry{Options,  @code{-objects}}
+@IndexSubentry{Commands, @code{objects}}
+@IndexSubentry{Options,  @code{-fsingle}}
+@IndexSubentry{Commands, @code{fsingle}}
+@IndexSubentry{Options,  @code{-fsummary}}
+@IndexSubentry{Commands, @code{fsummary}}
+These commands are @command{objects}, @command{fsingle}, and @command{fsummary}.
+They provide details on
+@cindex Load objects
+load objects (@xref{Load Objects and Functions}).
+
+The @command{objects} command lists all load objects that have been referenced
+during the performance experiment.
+Below we show the command and the result for our profile job.  Like before,
+some path names in the output have been shortened and replaced by the
+@IndexSubentry{Miscellaneous, @code{<apath>}}
+@code{<apath>} symbol that represents an absolute directory path.
+
+@cartouche
+@smallexample
+$ gprofng display text -objects mxv.1.thr.er
+@end smallexample
+@end cartouche
+
+The output includes the name and path of the target executable:
+
+@smallexample
+@verbatim
+<Unknown> (<Unknown>)
+<mxv-pthreads> (<apath>/mxv-pthreads)
+<libdl-2.28.so> (/usr/lib64/libdl-2.28.so)
+<librt-2.28.so> (/usr/lib64/librt-2.28.so)
+<libc-2.28.so> (/usr/lib64/libc-2.28.so)
+<libpthread-2.28.so> (/usr/lib64/libpthread-2.28.so)
+<libm-2.28.so> (/usr/lib64/libm-2.28.so)
+<libgp-collector.so> (/usr/lib64/gprofng/libgp-collector.so)
+<ld-2.28.so> (/usr/lib64/ld-2.28.so)
+<DYNAMIC_FUNCTIONS> (DYNAMIC_FUNCTIONS)
+@end verbatim
+@end smallexample
+
+@IndexSubentry{Options,  @code{-fsingle}}
+@IndexSubentry{Commands, @code{fsingle}}
+The @command{fsingle} command may be used to get more details on a specific entry
+in the function view, say.  For example, the command below provides additional
+information on the @code{pthread_create} function shown in the function overview.
+
+@cartouche
+@smallexample
+$ gprofng display text -fsingle pthread_create mxv.1.thr.er
+@end smallexample
+@end cartouche
+
+Below the output from this command.  It has been somewhat modified to match the
+display requirements.
+
+@smallexample
+@verbatim
++ gprofng display text -fsingle pthread_create mxv.1.thr.er
+pthread_create
+        Exclusive Total CPU Time: 0.    (  0. %)
+        Inclusive Total CPU Time: 0.001 (  0.0%)
+                            Size:   258
+                      PC Address: 8:0x00049f60
+                     Source File: (unknown)
+                     Object File: (unknown)
+                     Load Object: /usr/lib64/gprofng/libgp-collector.so
+                    Mangled Name:
+                         Aliases:
+@end verbatim
+@end smallexample
+
+In this table we not only see how much time was spent in this function, we
+also see where it originates from.  In addition to this, the size and start
+address are given as well.  If the source code location is known it is also
+shown here.
+
+@IndexSubentry{Options,  @code{-fsummary}}
+@IndexSubentry{Commands, @code{fsummary}}
+The related @code{fsummary} command displays the same information as
+@code{fsingle}, but for all functions in the function overview,
+including @code{<Total>}:
+
+@cartouche
+@smallexample
+$ gprofng display text -fsummary mxv.1.thr.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Functions sorted by metric: Exclusive Total CPU Time
+
+<Total>
+        Exclusive Total CPU Time: 9.703 (100.0%)
+        Inclusive Total CPU Time: 9.703 (100.0%)
+                            Size:     0
+                      PC Address: 1:0x00000000
+                     Source File: (unknown)
+                     Object File: (unknown)
+                     Load Object: <Total>
+                    Mangled Name:
+                         Aliases:
+
+mxv_core
+        Exclusive Total CPU Time: 9.226 ( 95.1%)
+        Inclusive Total CPU Time: 9.226 ( 95.1%)
+                            Size:    80
+                      PC Address: 2:0x00001d56
+                     Source File: <apath>/src/mxv.c
+                     Object File: mxv.1.thr.er/archives/mxv-pthreads_ss_pf53V__5
+                     Load Object: <apath>/mxv-pthreads
+                    Mangled Name:
+                         Aliases:
+
+          ... etc ...
+@end verbatim
+@end smallexample
+
+@c -- A new node --------------------------------------------------------------
+@node    Support for Multithreading
+@section Support for Multithreading
+@c ----------------------------------------------------------------------------
+
+In this chapter the support for multithreading is introduced and discussed.
+As is shown below, nothing needs to be changed when collecting the performance
+data.
+
+The difference is that additional commands are available to get more
+information on the multithreading details, plus that several filters allow
+the user to zoom in on specific threads.
+
+@c -- A new node --------------------------------------------------------------
+@node       Creating a Multithreading Experiment
+@subsection Creating a Multithreading Experiment
+@c ----------------------------------------------------------------------------
+
+We demonstrate the support for multithreading using the same code and settings
+as before, but this time 2 threads are used:
+
+@cartouche
+@smallexample
+$ exe=mxv-pthreads
+$ m=8000
+$ n=4000
+$ gprofng collect app -O mxv.2.thr.er ./$exe -m $m -n $n -t 2
+@end smallexample
+@end cartouche
+
+First of all, in as far as @ProductName{} is concerned, no changes are needed.
+Nothing special is needed to profile a multithreaded job when using @ToolName{}.
+
+The same is true when displaying the performance results.  The same commands
+that were used before work unmodified.  For example, this is all that is needed to
+get a function overview:
+
+@cartouche
+@smallexample
+$ gprofng display text -limit 5 -functions mxv.2.thr.er
+@end smallexample
+@end cartouche
+
+This produces the following familiar looking output:
+
+@smallexample
+@verbatim
+Print limit set to 5
+Functions sorted by metric: Exclusive Total CPU Time
+
+Excl. Total   Incl. Total    Name
+CPU           CPU
+ sec.      %   sec.      %
+9.464 100.00  9.464 100.00   <Total>
+8.961  94.69  8.961  94.69   mxv_core
+0.224   2.37  0.469   4.95   init_data
+0.105   1.11  0.177   1.88   erand48_r
+0.073   0.77  0.073   0.77   __drand48_iterate
+@end verbatim
+@end smallexample
+
+@c -- A new node --------------------------------------------------------------
+@node       Commands Specific to Multithreading
+@subsection Commands Specific to Multithreading
+@c ----------------------------------------------------------------------------
+
+The function overview shown above shows the results aggregated over all the
+threads.  The interesting new element is that we can also look at the
+performance data for the individual threads.
+
+@IndexSubentry{Options,  @code{-thread_list}}
+@IndexSubentry{Commands, @code{thread_list}}
+The @command{thread_list} command displays how many threads have been used:
+
+@cartouche
+@smallexample
+$ gprofng display text -thread_list mxv.2.thr.er
+@end smallexample
+@end cartouche
+
+This produces the following output, showing that three threads have
+been used:
+
+@smallexample
+@verbatim
+Exp Sel Total
+=== === =====
+  1 all     3
+@end verbatim
+@end smallexample
+
+The output confirms there is one experiment and that by default all
+threads are selected.
+
+It may seem surprising to see three threads here, since we used the
+@code{-t 2} option, but it is common for a Pthreads program to use one
+additional thread.
+Typically, there is one main thread that runs from start to finish.
+It handles the sequential portions of the code, as well as thread
+management related tasks.
+It is no different in the example code.  At some point, the main thread
+creates and activates the two threads that perform the multiplication
+of the matrix with the vector.  Upon completion of this computation,
+the main thread continues.
+
+@IndexSubentry{Options,  @code{-threads}}
+@IndexSubentry{Commands, @code{threads}}
+The @command{threads} command is simple, yet very powerful.  It shows the
+total value of the metrics for each thread.
+
+@cartouche
+@smallexample
+$ gprofng display text -threads mxv.2.thr.er
+@end smallexample
+@end cartouche
+
+@noindent
+The command above produces the following overview:
+
+@smallexample
+@verbatim
+Objects sorted by metric: Exclusive Total CPU Time
+
+Excl. Total    Name
+CPU
+ sec.      %
+9.464 100.00   <Total>
+4.547  48.05   Process 1, Thread 3
+4.414  46.64   Process 1, Thread 2
+0.502   5.31   Process 1, Thread 1
+@end verbatim
+@end smallexample
+
+The first line gives the total CPU time accumulated over the threads
+selected.  This is followed by the metric value(s) for each thread.
+
+From this it is clear that the main thread is responsible for a
+little over 5% of the total CPU time, while the other two threads
+take 47-48% each.
+
+This view is ideally suited to verify if there are any load balancing
+issues and also to find the most time consuming thread(s).
+
+@IndexSubentry{Filters, Thread selection}
+While useful, often more information than this is needed.  This is
+@IndexSubentry{Options,  @code{-thread_select}}
+@IndexSubentry{Commands, @code{thread_select}}
+where the thread selection filter comes in.  Through the
+@command{thread_select}
+command, one or more threads may be selected.
+@xref{The Selection List} how to define the selection list.
+
+Since it is most common to use this command in a script, we do so as
+well here.  Below the script we are using:
+
+@cartouche
+@smallexample
+# Define the metrics
+metrics e.%totalcpu
+# Limit the output to 5 lines
+limit 5
+# Get the function overview for thread 1
+thread_select 1
+functions
+# Get the function overview for thread 2
+thread_select 2
+functions
+# Get the function overview for thread 3
+thread_select 3
+functions
+@end smallexample
+@end cartouche
+
+The definition of the metrics and the output limit have been shown and
+explained earlier.  The new command to focus on is @command{thread_select}.
+
+This command takes a list (@xref{The Selection List}) to select specific
+threads.  In this case, the individual thread numbers that were
+obtained earlier with the @command{thread_list} command are selected.
+
+This restricts the output of the @command{functions} command to the thread
+number(s) specified.  This means that the script above shows which
+function(s) each thread executes and how much CPU time they consumed.
+Both the exclusive timings and their percentages are given.
+
+Note that technically this command is a filter and persistent.  The 
+selection remains active until changed through another thread selection
+command, or when it is reset with the @samp{all} selection list.
+
+@noindent
+This is the relevant part of the output for the first thread:
+
+@smallexample
+@verbatim
+Exp Sel Total
+=== === =====
+  1 1       3
+Functions sorted by metric: Exclusive Total CPU Time
+
+Excl. Total    Name
+CPU
+ sec.      %
+0.502 100.00   <Total>
+0.224  44.64   init_data
+0.105  20.83   erand48_r
+0.073  14.48   __drand48_iterate
+0.067  13.29   drand48
+@end verbatim
+@end smallexample
+
+As usual, the comment lines are echoed.  This is followed by a confirmation
+of the selection.  The first table shows that one experiment is loaded and
+that thread 1 out of the three threads has been selected.  What is
+displayed next is the function overview for this particular thread.  Due to
+the @code{limit 5} command, there are only five functions in this list.
+
+Clearly, this thread handles the data initialization part and as we know
+from the call tree output, function @code{init_data} executes the 3 other
+functions shown in this profile.
+
+Below are the overviews for threads 2 and 3 respectively.  It is seen that all
+of the CPU time is spent in function @code{mxv_core} and that this time
+is approximately the same for both threads.
+
+@smallexample
+@verbatim
+# Get the function overview for thread 2
+Exp Sel Total
+=== === =====
+  1 2       3
+Functions sorted by metric: Exclusive Total CPU Time
+
+Excl. Total    Name
+CPU
+ sec.      %
+4.414 100.00   <Total>
+4.414 100.00   mxv_core
+0.      0.     <static>@0x48630 (<libgp-collector.so>)
+0.      0.     driver_mxv
+0.      0.     start_thread
+
+# Get the function overview for thread 3
+Exp Sel Total
+=== === =====
+  1 3       3
+Functions sorted by metric: Exclusive Total CPU Time
+
+Excl. Total    Name
+CPU
+ sec.      %
+4.547 100.00   <Total>
+4.547 100.00   mxv_core
+0.      0.     <static>@0x48630 (<libgp-collector.so>)
+0.      0.     driver_mxv
+0.      0.     start_thread
+@end verbatim
+@end smallexample
+
+When analyzing the performance of a multithreaded application, it is sometimes
+useful to know whether threads have mostly executed on the same core, say, or
+if they have wandered across multiple cores.  This sort of stickiness is usually
+referred to as
+@cindex Thread affinity
+@emph{thread affinity}.
+
+Similar to the commands for the threads, there are several commands related
+to the usage of the cores, or @emph{CPUs} as they are called in @ToolName{}
+(@xref{The Concept of a CPU in @ProductName{}}).
+
+@IndexSubentry{Options,  @code{-cpu_list}}
+@IndexSubentry{Commands, @code{cpu_list}}
+Similar to the @command{thread_list} command, the @command{cpu_list} command
+displays how many CPUs have been used.
+@IndexSubentry{Options,  @code{-cpus}}
+@IndexSubentry{Commands, @code{cpus}}
+The equivalent of the @command{threads} threads command, is the @command{cpus}
+command, which shows the numbers of the CPUs that were used and the metric values
+for each one of them.  Both commands are demonstrated below.
+
+@cartouche
+@smallexample
+$ gprofng display text -cpu_list -cpus mxv.2.thr.er
+@end smallexample
+@end cartouche
+
+@noindent
+This command produces the following output:
+
+@smallexample
+@verbatim
++ gprofng display text -cpu_list -cpus mxv.2.thr.er
+Exp Sel Total
+=== === =====
+  1 all     4
+Objects sorted by metric: Exclusive Total CPU Time
+
+Excl. Total    Name
+CPU
+ sec.      %
+9.464 100.00   <Total>
+4.414  46.64   CPU 2
+2.696  28.49   CPU 0
+1.851  19.56   CPU 1
+0.502   5.31   CPU 3
+@end verbatim
+@end smallexample
+
+The first table shows that there is only one experiment and that all of the
+four CPUs have been selected.  The second table shows the exclusive metrics
+for each of the CPUs that have been used.
+
+As also echoed in the output, the data is sorted with respect to the 
+exclusive CPU time, but it is very easy to sort the data by the CPU id
+@IndexSubentry{Options, -sort}
+@IndexSubentry{Commands, sort}
+by using the @command{sort} command:
+
+@cartouche
+@smallexample
+$ gprofng display text -cpu_list -sort name -cpus mxv.2.thr.er
+@end smallexample
+@end cartouche
+
+@noindent
+With the @command{sort} added, the output is as follows:
+
+@smallexample
+@verbatim
+Exp Sel Total
+=== === =====
+  1 all     4
+Current Sort Metric: Name ( name )
+Objects sorted by metric: Name
+
+Excl. Total    Name
+CPU
+ sec.      %
+9.464 100.00   <Total>
+2.696  28.49   CPU 0
+1.851  19.56   CPU 1
+4.414  46.64   CPU 2
+0.502   5.31   CPU 3
+@end verbatim
+@end smallexample
+
+While the table with thread times shown earlier may point at a load imbalance
+in the application, this overview has a different purpose.
+
+For example, we see that 4 CPUs have been used, but we know that the
+application uses 3 threads only.
+We will now demonstrate how filters can be used to help answer the
+question why 4 CPUs are used, while the application has 3 threads only.
+This means that at least one thread has executed on more than one CPU.
+
+Recall the thread level timings:
+
+@smallexample
+@verbatim
+Excl. Total    Name
+CPU
+ sec.      %
+9.464 100.00   <Total>
+4.547  48.05   Process 1, Thread 3
+4.414  46.64   Process 1, Thread 2
+0.502   5.31   Process 1, Thread 1
+@end verbatim
+@end smallexample
+
+Compared to the CPU timings above, it seems very likely that thread 3 has
+used more than one CPU, because the thread and CPU timings are the same
+for both other threads.
+
+The command below selects thread number 3 and then requests the CPU
+utilization for this thread:
+
+@cartouche
+@smallexample
+$ gprofng display text -thread_select 3 -sort name -cpus mxv.2.thr.er
+@end smallexample
+@end cartouche
+
+The output shown below confirms that thread 3 is selected and then displays
+the CPU(s) that have been used by this thread:
+
+@smallexample
+@verbatim
+Exp Sel Total
+=== === =====
+  1 3       3
+
+Objects sorted by metric: Exclusive Total CPU Time
+
+Excl. Total    Name
+CPU
+ sec.      %
+4.547 100.00   <Total>
+2.696  59.29   CPU 0
+1.851  40.71   CPU 1
+@end verbatim
+@end smallexample
+
+The results show that this thread has used CPU 0 nearly 60% of the time
+and CPU 1 for the remaining 40%.
+
+To confirm that this is the only thread that has used more than one CPU, the
+same approach can be used for threads 1 and 2:
+
+@smallexample
+@verbatim
+$ gprofng display text -thread_select 1 -cpus mxv.2.thr.er
+Exp Sel Total
+=== === =====
+  1 1       3
+Objects sorted by metric: Exclusive Total CPU Time
+
+Excl. Total    Name
+CPU
+ sec.      %
+0.502 100.00   <Total>
+0.502 100.00   CPU 3
+@end verbatim
+@end smallexample
+
+@smallexample
+@verbatim
+$ gprofng display text -thread_select 2 -cpus mxv.2.thr.er
+Exp Sel Total
+=== === =====
+  1 2       3
+Objects sorted by metric: Exclusive Total CPU Time
+
+Excl. Total    Name
+CPU
+ sec.      %
+4.414 100.00   <Total>
+4.414 100.00   CPU 2
+@end verbatim
+@end smallexample
+
+@noindent
+The output above shows that indeed threads 1 and 2 each have used a single
+CPU only.
+
+@c -- A new node --------------------------------------------------------------
+@node    View Multiple Experiments
+@section View Multiple Experiments
+@c ----------------------------------------------------------------------------
+
+One thing we did not cover sofar is that @ToolName{} fully supports the analysis
+of multiple experiments.  The @DisplayText{} tool accepts a list of experiments.
+The data can either be aggregated across the experiments, or used in a
+comparison.
+
+The default is to aggregate the metric values across the experiments that have
+been loaded.  The @command{compare} command can be used to enable the
+@IndexSubentry{Options, @code{-compare}}
+@IndexSubentry{Commands, @code{compare}}
+comparison of results.
+
+In this section both modes are illustrated with an example.
+
+@c -- A new node --------------------------------------------------------------
+@node       Aggregation of Experiments
+@subsection Aggregation of Experiments
+@c ----------------------------------------------------------------------------
+
+If the data for multiple experiments is aggregrated, the @DisplayText{} tool
+shows the combined results.
+For example, below is the script to show the function view for the data
+aggregated over two experiments, drop the first experiment and then show
+the function view fo the second experiment only. 
+We will call it @file{my-script-agg}.
+
+@cartouche
+@smallexample
+# Define the metrics
+metrics e.%totalcpu
+# Limit the output to 5 lines
+limit 5
+# Get the list with experiments
+experiment_list
+# Get the function overview for all
+functions
+# Drop the first experiment
+drop_exp mxv.2.thr.er
+# Get the function overview for exp #2
+functions
+@end smallexample
+@end cartouche
+
+@IndexSubentry{Options,  @code{-experiment_list}}
+@IndexSubentry{Commands, @code{experiment_list}}
+With the exception of the @command{experiment_list} command, all commands
+used have been discussed earlier.
+
+The @command{experiment_list} command provides a list of the experiments
+that have been loaded.  This may be used to get the experiment IDs and
+to verify the correct experiments are loaded for the aggregation.
+
+@noindent
+Below is an example that loads two experiments and uses the above
+script to display different function views.
+
+@cartouche
+@smallexample
+$ gprofng display text -script my-script-agg mxv.2.thr.er mxv.4.thr.er
+@end smallexample
+@end cartouche
+
+@noindent
+This produces the following output:
+
+@smallexample
+@verbatim
+# Define the metrics
+Current metrics: e.%totalcpu:name
+Current Sort Metric: Exclusive Total CPU Time ( e.%totalcpu )
+# Limit the output to 5 lines
+Print limit set to 5
+# Get the list with experiments
+ID Sel     PID Experiment
+== === ======= ============
+ 1 yes 1339450 mxv.2.thr.er
+ 2 yes 3579561 mxv.4.thr.er
+# Get the function overview for all
+Functions sorted by metric: Exclusive Total CPU Time
+
+Excl. Total     Name
+CPU
+  sec.      %
+20.567 100.00   <Total>
+19.553  95.07   mxv_core
+ 0.474   2.30   init_data
+ 0.198   0.96   erand48_r
+ 0.149   0.72   drand48
+
+# Drop the first experiment
+Experiment mxv.2.thr.er has been dropped
+# Get the function overview for exp #2
+Functions sorted by metric: Exclusive Total CPU Time
+
+Excl. Total     Name
+CPU
+  sec.      %
+11.104 100.00   <Total>
+10.592  95.39   mxv_core
+ 0.249   2.24   init_data
+ 0.094   0.84   erand48_r
+ 0.082   0.74   drand48
+@end verbatim
+@end smallexample
+
+The first five lines should look familiar.  The five lines following echo
+the comment line in the script and show the overview of the experiments.
+This confirms two experiments have been loaded and that both are active.
+This is followed by the function overview.  The timings have been summed
+up and the percentages are adjusted accordingly.
+
+@c -- A new node --------------------------------------------------------------
+@node       Comparison of Experiments
+@subsection Comparison of Experiments
+@c ----------------------------------------------------------------------------
+
+The support for multiple experiments really shines in comparison mode.  
+@cindex Compare experiments
+In comparison mode, the data for the various experiments is shown side by
+side, as illustrated below where we compare the results for the multithreaded
+experiments using two and four threads respectively.
+
+This
+feature is controlled through the
+@IndexSubentry{Options,  @code{-compare}}
+@IndexSubentry{Commands, @code{compare}}
+@code{compare} command.
+
+The comparison mode is enabled through @command{compare on} and with
+@command{compare off} it is disabled again.
+In addition to @samp{on}, or @samp{off}, this command also supports
+the @samp{delta} and @samp{ratio} keywords.
+
+This is the script that will be used in our example.  It sets the comparison
+mode to @samp{on}:
+
+@smallexample
+@verbatim
+# Define the metrics
+metrics e.%totalcpu
+# Limit the output to 5 lines
+limit 5
+# Set the comparison mode to differences
+compare on
+# Get the function overview
+functions
+@end verbatim
+@end smallexample
+
+Assuming this script file is called @file{my-script-comp}, this is how
+it is used to display the differences:
+
+@cartouche
+@smallexample
+$ gprofng display text -script my-script-comp mxv.2.thr.er mxv.4.thr.er
+@end smallexample
+@end cartouche
+
+@noindent
+This produces the output shown below.  The data for the first experiment
+is shown as absolute numbers.  The timings for the other experiment are
+shown as a delta relative to these reference numbers:
+
+@smallexample
+@verbatim
+
+mxv.2.thr.er  mxv.4.thr.er
+Excl. Total   Excl. Total     Name
+CPU           CPU
+ sec.      %    sec.      %
+9.464 100.00  11.104 100.00   <Total>
+8.961  94.69  10.592  95.39   mxv_core
+0.224   2.37   0.249   2.24   init_data
+0.105   1.11   0.094   0.84   erand48_r
+0.073   0.77   0.060   0.54   __drand48_iterate
+@end verbatim
+@end smallexample
+
+This table is already helpful to more easily compare (two) profiles, but
+there is more that we can do here.
+
+By default, in comparison mode, all measured values are shown.  Often
+profiling is about comparing performance data.  It is therefore
+sometimes more useful to look at differences or ratios, using one
+experiment as a reference.
+
+The values shown are relative to this difference.  For example if a ratio
+is below one, it means the reference value was higher.
+
+In the example below, we use the same two experiments used in the comparison
+above. The script is also nearly identical.  The only change is that we now
+use the @samp{delta} keyword.
+
+As before, the number of lines is restricted to 5 and we focus on
+the exclusive timings plus percentages.  For the comparison part we are
+interested in the differences.
+
+This is the script that produces such an overview:
+
+@smallexample
+@verbatim
+# Define the metrics
+metrics e.%totalcpu
+# Limit the output to 5 lines
+limit 5
+# Set the comparison mode to differences
+compare delta
+# Get the function overview
+functions
+@end verbatim
+@end smallexample
+
+Assuming this script file is called @file{my-script-comp2}, this is how we
+get the table displayed on our screen:
+
+@cartouche
+@smallexample
+$ gprofng display text -script my-script-comp2 mxv.2.thr.er mxv.4.thr.er
+@end smallexample
+@end cartouche
+
+Leaving out some of the lines printed, but we have seen before, we get
+the following table:
+
+@smallexample
+@verbatim
+mxv.2.thr.er  mxv.4.thr.er
+Excl. Total   Excl. Total     Name
+CPU           CPU
+ sec.      %   delta      %
+9.464 100.00  +1.640 100.00   <Total>
+8.961  94.69  +1.631  95.39   mxv_core
+0.224   2.37  +0.025   2.24   init_data
+0.105   1.11  -0.011   0.84   erand48_r
+0.073   0.77  -0.013   0.54   __drand48_iterate
+@end verbatim
+@end smallexample
+
+It is now easier to see that the CPU times for the most time consuming
+functions in this code are practically the same.
+
+It is also possible to show ratio's through the @command{compare ratio}
+@IndexSubentry{Options, @code{-compare}}
+@IndexSubentry{Commands, @code{compare}}
+command.  The first colum is used as a reference and the values for
+the other columns with metrics are derived by dividing the value by
+the reference.  The result for such a comparison is shown below:
+
+@smallexample
+@verbatim
+mxv.2.thr.er  mxv.4.thr.er
+Excl. Total   Excl. Total CPU    Name
+CPU
+ sec.      %      ratio      %
+9.464 100.00  x   1.173 100.00   <Total>
+8.961  94.69  x   1.182  95.39   mxv_core
+0.224   2.37  x   1.111   2.24   init_data
+0.105   1.11  x   0.895   0.84   erand48_r
+0.073   0.77  x   0.822   0.54   __drand48_iterate
+@end verbatim
+@end smallexample
+
+Note that the comparison feature is supported at the function, source, and
+disassembly level.  There is no practical limit on the number of experiments
+that can be used in a comparison.
+
+@c -- A new node --------------------------------------------------------------
+@node    Profile Hardware Event Counters
+@section Profile Hardware Event Counters
+@c ----------------------------------------------------------------------------
+
+Many processors provide a set of hardware event counters and @ToolName{}
+provides support for this feature.
+@xref{Hardware Event Counters Explained} for those readers that are not
+familiar with such counters and like to learn more.
+
+In this section we explain how to get the details on the event counter
+support for the processor used in the experiment(s), and show several
+examples.
+
+@c -- A new node --------------------------------------------------------------
+@node       Getting Information on the Counters Supported
+@subsection Getting Information on the Counters Supported
+@c ----------------------------------------------------------------------------
+
+The first step is to check if the processor used for the experiments is
+supported by @ToolName{}.
+@IndexSubentry{Options, @code{-h}}
+The @code{-h} option on @CollectApp{} will show the event counter
+information:
+
+@cartouche
+@smallexample
+$ gprofng collect app -h
+@end smallexample
+@end cartouche
+
+In case the counters are supported, a list with the events is printed.
+Otherwise, a warning message will be issued.
+
+For example, below we show this command and the output on an Intel Xeon
+Platinum 8167M (aka ``Skylake'') processor.  The output has been split
+into several sections and each section is commented upon separately.
+
+@smallexample
+@verbatim
+Run "gprofng collect app --help" for a usage message.
+
+Specifying HW counters on `Intel Arch PerfMon v2 on Family 6 Model 85'
+(cpuver=2499):
+
+  -h {auto|lo|on|hi}
+	turn on default set of HW counters at the specified rate
+  -h <ctr_def> [-h <ctr_def>]...
+  -h <ctr_def>[,<ctr_def>]...
+	specify HW counter profiling for up to 4 HW counters
+@end verbatim
+@end smallexample
+
+The first line shows how to get a usage overview.  This is followed by
+some information on the target processor.
+The next five lines explain in what ways the @code{-h} option can be
+used to define the events to be monitored.
+
+The first version shown above enables a default set of counters.  This
+default depends on the processor this command is executed on.  The
+keyword following the @code{-h} option defines the sampling rate:
+
+@table @code
+
+@item auto
+Match the sample rate of used by clock profiling.  If the latter is disabled,
+Use a per thread sampling rate of approximately 100 samples per second.
+This setting is the default and preferred.
+
+@item on
+Use a per thread sampling rate of approximately 100 samples per second.
+
+@item lo
+Use a per thread sampling rate of approximately 10 samples per second.
+
+@item hi
+Use a per thread sampling rate of approximately 1000 samples per second.
+
+@end table
+
+The second and third variant define the events to be monitored.  Note
+that the number of simultaneous events supported is printed.  In this
+case we can monitor four events in a single profiling job.
+
+It is a matter of preference whether you like to use the @code{-h}
+option for each event, or use it once, followed by a comma separated
+list.
+
+There is one slight catch though.  The counter definition below has
+mandatory comma (@code{,}) between the event and the rate.  While a
+default can be used for the rate, the comma cannot be omitted.
+This may result in a somewhat awkward counter definition in case
+the default sampling rate is used.
+
+For example, the following two commands are equivalent.  Note
+the double comma in the second command.  This is not a typo.
+
+@cartouche
+@smallexample
+$ gprofng collect app -h cycles -h insts ...
+$ gprofng collect app -h cycles,,insts ...
+@end smallexample
+@end cartouche
+
+In the first command this comma is not needed, because a
+comma (``@code{,}'') immediately followed by white space may
+be omitted.
+
+This is why we prefer the this syntax and in the remainder will
+use the first version of this command.
+
+@IndexSubentry{Hardware event counters, counter definition}
+The counter definition takes an event name, plus optionally one or
+more attributes, followed by a comma, and optionally the sampling rate.
+The output section below shows the formal definition.
+
+@cartouche
+@smallexample
+  <ctr_def> == <ctr>[[~<attr>=<val>]...],[<rate>]
+@end smallexample
+@end cartouche
+
+The printed help then explains this syntax.  Below we have summarized
+and expanded this output:
+
+@table @code
+
+@item @var{<ctr>}
+The counter name must be selected from the available counters listed
+as part of the output printed with the @code{-h} option.
+On most systems, if a counter is not listed, it may still be specified
+by its numeric value.
+
+@item @var{~<attr>=<val>}
+This is an optional attribute that depends on the processor.  The list
+of supported attributes is printed in the output.  Examples of
+attributes are ``user'', or ``system''.  The value can given in decimal
+or hexadecimal format.
+Multiple attributes may be specified, and each must be preceded
+by a ~.
+
+@item @var{<rate>}
+
+The sampling rate is one of the following:
+
+@table @code
+
+@item auto
+This is the default and matches the rate used by clock profiling.
+If clock profiling is disabled, use @samp{on}.
+
+@item on
+Set the per thread maximum sampling rate to ~100 samples/second
+
+@item lo
+Set the per thread maximum sampling rate to ~10 samples/second
+
+@item hi
+Set the per thread maximum sampling rate to ~1000 samples/second
+
+@item @var{<interval>}
+Define the sampling interval.
+@xref{Control the Sampling Frequency} how to define this.
+
+@end table
+
+@end table
+
+After the section with the formal definition of events and counters, a
+processor specific list is displayed.  This part starts with an overview
+of the default set of counters and the aliased names supported
+@emph{on this specific processor}.
+
+@smallexample
+@verbatim
+Default set of HW counters:
+
+    -h cycles,,insts,,llm
+
+Aliases for most useful HW counters:
+
+ alias    raw name                   type units regs description
+
+ cycles   unhalted-core-cycles   CPU-cycles 0123 CPU Cycles
+ insts    instruction-retired        events 0123 Instructions Executed
+ llm      llc-misses                 events 0123 Last-Level Cache Misses
+ br_msp   branch-misses-retired      events 0123 Branch Mispredict
+ br_ins   branch-instruction-retired events 0123 Branch Instructions
+@end verbatim
+@end smallexample
+
+@noindent
+The definitions given above may or may not be available on other processors.
+
+The table above shows the default set of counters defined for this processor,
+and the aliases.  For each alias the full ``raw'' name is given, plus the
+unit of the number returned by the counter (CPU cycles, or a raw count),
+the hardware counter the event is allowed to be mapped onto, and a short
+description.
+
+The last part of the output contains all the events that can be monitored:
+
+@smallexample
+@verbatim
+Raw HW counters:
+
+    name                                type      units regs description
+
+    unhalted-core-cycles                     CPU-cycles 0123
+    unhalted-reference-cycles                    events 0123
+    instruction-retired                          events 0123
+    llc-reference                                events 0123
+    llc-misses                                   events 0123
+    branch-instruction-retired                   events 0123
+    branch-misses-retired                        events 0123
+    ld_blocks.store_forward                      events 0123
+    ld_blocks.no_sr                              events 0123
+    ld_blocks_partial.address_alias              events 0123
+    dtlb_load_misses.miss_causes_a_walk          events 0123
+    dtlb_load_misses.walk_completed_4k           events 0123
+
+    <many lines deleted>
+
+    l2_lines_out.silent                          events 0123
+    l2_lines_out.non_silent                      events 0123
+    l2_lines_out.useless_hwpf                    events 0123
+    sq_misc.split_lock                           events 0123
+@end verbatim
+@end smallexample
+
+As can be seen, these names are not always easy to correlate to a specific
+event of interest.  The processor manual should provide more clarity on this.
+
+@c -- A new node --------------------------------------------------------------
+@node       Examples Using Hardware Event Counters
+@subsection Examples Using Hardware Event Counters
+@c ----------------------------------------------------------------------------
+
+The previous section may give the impression that these counters are hard to
+use, but as we will show now, in practice it is quite simple.
+
+With the information from the @code{-h} option, we can easily set up our first
+event counter experiment.
+
+We start by using the default set of counters defined for our processor and we
+use 2 threads:
+
+@cartouche
+@smallexample
+$ exe=mxv-pthreads
+$ m=8000
+$ n=4000
+$ exp=mxv.hwc.def.2.thr.er
+$ gprofng collect app -O $exp -h auto ./$exe -m $m -n $n -t 2
+@end smallexample
+@end cartouche
+
+@IndexSubentry{Options, @code{-h}}
+@IndexSubentry{Hardware event counters, @code{auto} option}
+The new option here is @code{-h auto}.  The @code{auto} keyword enables
+hardware event counter profiling and selects the default set of counters
+defined for this processor.
+
+As before, we can display the information, but there is one practical hurdle
+to take.  Unless we like to view all metrics recorded, we would need to know
+the names of the events that have been enabled.  This is tedious and also not
+portable in case we would like to repeat this experiment on another processor.
+
+@IndexSubentry{Hardware event counters, @code{hwc} metric}
+This is where the special @code{hwc} metric comes very handy.  It
+automatically expands to the active set of events used.
+
+With this, it is very easy to display the event counter values.  Note that
+although the regular clock based profiling was enabled, we only want to see
+the counter values.  We also request to see the percentages and limit the
+output to the first 5 lines:
+
+@cartouche
+@smallexample
+$ exp=mxv.hwc.def.2.thr.er
+$ gprofng display text -metrics e.%hwc -limit 5 -functions $exp
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Current metrics: e.%cycles:e+%insts:e+%llm:name
+Current Sort Metric: Exclusive CPU Cycles ( e.%cycles )
+Print limit set to 5
+Functions sorted by metric: Exclusive CPU Cycles
+
+Excl. CPU     Excl. Instructions  Excl. Last-Level   Name
+Cycles        Executed            Cache Misses
+ sec.      %                  %                 %
+2.691 100.00  7906475309 100.00   122658983 100.00   <Total>
+2.598  96.54  7432724378  94.01   121745696  99.26   mxv_core
+0.035   1.31   188860269   2.39       70084   0.06   erand48_r
+0.026   0.95    73623396   0.93      763116   0.62   init_data
+0.018   0.66    76824434   0.97       40040   0.03   drand48
+@end verbatim
+@end smallexample
+
+As we have seen before, the first few lines echo the settings.
+This includes a list with the hardware event counters used by
+default.
+
+The table that follows makes it very easy to get an overview where the
+time is spent and how many of the target events have occurred.
+
+As before, we can drill down deeper and see the same metrics at the source
+line and instruction level.  Other than using @code{hwc} in the metrics
+definitions, nothing has changed compared to the previous examples:
+
+@cartouche
+@smallexample
+$ exp=mxv.hwc.def.2.thr.er
+$ gprofng display text -metrics e.hwc -source mxv_core $exp
+@end smallexample
+@end cartouche
+
+This is the relevant part of the output.  Since the lines get very long,
+we have somewhat modified the lay-out:
+
+@smallexample
+@verbatim
+   Excl. CPU Excl.        Excl.
+   Cycles    Instructions Last-Level
+    sec.     Executed     Cache Misses
+                                         <Function: mxv_core>
+   0.                 0          0   32. void __attribute__ ((noinline))
+                                         mxv_core(...)
+   0.                 0          0   33. {
+   0.                 0          0   34.   for (uint64_t i=...) {
+   0.                 0          0   35.     double row_sum = 0.0;
+## 1.872     7291879319   88150571   36.     for (int64_t j=0; j<n; j++)
+   0.725      140845059   33595125   37.        row_sum += A[i][j]*b[j];
+   0.                 0          0   38.     c[i] = row_sum;
+                                     39.    }
+   0.                 0          0   40. }
+@end verbatim
+@end smallexample
+
+In a smiliar way we can display the event counter values at the instruction
+level.  Again we have modified the lay-out due to page width limitations:
+
+@cartouche
+@smallexample
+$ exp=mxv.hwc.def.2.thr.er
+$ gprofng display text -metrics e.hwc -disasm mxv_core $exp
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+   Excl. CPU Excl.        Excl.
+   Cycles    Instructions Last-Level
+    sec.     Executed     Cache Misses
+                                                <Function: mxv_core>
+   0.                 0          0  [33] 4021ba: mov   0x8(%rsp),%r10
+                                    34.   for (uint64_t i=...) {
+   0.                 0          0  [34] 4021bf: cmp   %rsi,%rdi
+   0.                 0          0  [34] 4021c2: jbe   0x37
+   0.                 0          0  [34] 4021c4: ret
+                                    35.       double row_sum = 0.0;
+                                    36.       for (int64_t j=0; j<n; j++)
+                                    37.         row_sum += A[i][j]*b[j];
+   0.                 0          0  [37] 4021c5: mov   (%r8,%rdi,8),%rdx
+   0.                 0          0  [36] 4021c9: mov   $0x0,%eax
+   0.                 0          0  [35] 4021ce: pxor  %xmm1,%xmm1
+   0.002       12804230     321394  [37] 4021d2: movsd (%rdx,%rax,8),%xmm0
+   0.141       60819025    3866677  [37] 4021d7: mulsd (%r9,%rax,8),%xmm0
+   0.582       67221804   29407054  [37] 4021dd: addsd %xmm0,%xmm1
+## 1.871     7279075109   87989870  [36] 4021e1: add   $0x1,%rax
+   0.002       12804210      80351  [36] 4021e5: cmp   %rax,%rcx
+   0.                 0          0  [36] 4021e8: jne   0xffffffffffffffea
+                                    38.       c[i] = row_sum;
+   0.                 0          0  [38] 4021ea: movsd %xmm1,(%r10,%rdi,8)
+   0.                 0          0  [34] 4021f0: add   $0x1,%rdi
+   0.                 0          0  [34] 4021f4: cmp   %rdi,%rsi
+   0.                 0          0  [34] 4021f7: jb    0xd
+   0.                 0          0  [35] 4021f9: pxor  %xmm1,%xmm1
+   0.                 0          0  [36] 4021fd: test  %rcx,%rcx
+   0.                 0      80350  [36] 402200: jne   0xffffffffffffffc5
+   0.                 0          0  [36] 402202: jmp   0xffffffffffffffe8
+                                    39.   }
+                                    40. }
+   0.                 0          0  [40]  402204:  ret
+@end verbatim
+@end smallexample
+
+So far we have used the default settings for the event counters.  It is
+quite straightforward to select specific counters.  For sake of the
+example, let's assume we would like to count how many branch instructions
+and retired memory load instructions that missed in the L1 cache have been
+executed.  We also want to count these events with a high resolution.
+
+This is the command to do so:
+
+@cartouche
+@smallexample
+$ exe=mxv-pthreads
+$ m=8000
+$ n=4000
+$ exp=mxv.hwc.sel.2.thr.er
+$ hwc1=br_ins,hi
+$ hwc2=mem_load_retired.l1_miss,hi
+$ gprofng collect app -O $exp -h $hwc1 -h $hwc2 $exe -m $m -n $n -t 2
+@end smallexample
+@end cartouche
+
+As before, we get a table with the event counts.  Due to the very
+long name for the second counter, we have somewhat modified the
+output.
+
+@cartouche
+@smallexample
+$ gprofng display text -limit 10 -functions mxv.hwc.sel.2.thr.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Functions sorted by metric: Exclusive Total CPU Time
+Excl.     Incl.     Excl. Branch  Excl.                 Name
+Total     Total     Instructions  mem_load_retired.l1_miss
+CPU sec.  CPU sec.                Events
+2.597     2.597     1305305319    4021340               <Total>
+2.481     2.481     1233233242    3982327               mxv_core
+0.040     0.107       19019012       9003               init_data
+0.028     0.052       23023048      15006               erand48_r
+0.024     0.024       19019008       9004               __drand48_iterate
+0.015     0.067       11011009       2998               drand48
+0.008     0.010              0       3002               _int_malloc
+0.001     0.001              0          0               brk
+0.001     0.002              0          0               sysmalloc
+0.        0.001              0          0               __default_morecore
+@end verbatim
+@end smallexample
+
+@IndexSubentry{Options,  @code{-compare}}
+@IndexSubentry{Commands, @code{compare}}
+When using event counters, the values could be very large and it is not easy
+to compare the numbers.  As we will show next, the @code{ratio} feature is
+very useful when comparing such profiles.
+
+To demonstrate this, we have set up another event counter experiment where
+we would like to compare the number of last level cache miss and the number
+of branch instructions executed when using a single thread, or two threads.
+
+These are the commands used to generate the experiment directories:
+
+@cartouche
+@smallexample
+$ exe=./mxv-pthreads
+$ m=8000
+$ n=4000
+$ exp1=mxv.hwc.comp.1.thr.er
+$ exp2=mxv.hwc.comp.2.thr.er
+$ gprofng collect app -O $exp1 -h llm -h br_ins $exe -m $m -n $n -t 1
+$ gprofng collect app -O $exp2 -h llm -h br_ins $exe -m $m -n $n -t 2
+@end smallexample
+@end cartouche
+
+The following script has been used to get the tables.  Due to lay-out
+restrictions, we have to create two tables, one for each counter.
+
+@cartouche
+@smallexample
+# Limit the output to 5 lines
+limit 5
+# Define the metrics
+metrics name:e.llm
+# Set the comparison to ratio
+compare ratio
+functions
+# Define the metrics
+metrics name:e.br_ins
+# Set the comparison to ratio
+compare ratio
+functions
+@end smallexample
+@end cartouche
+
+Note that we print the name of the function first, followed by the counter
+data.
+The new element is that we set the comparison mode to @code{ratio}.  This
+divides the data in a column by its counterpart in the reference experiment.
+
+This is the command using this script and the two experiment directories as
+input:
+
+@cartouche
+@smallexample
+$ gprofng display text -script my-script-comp-counters \
+  mxv.hwc.comp.1.thr.er \
+  mxv.hwc.comp.2.thr.er
+@end smallexample
+@end cartouche
+
+By design, we get two tables, one for each counter:
+
+@smallexample
+@verbatim
+Functions sorted by metric: Exclusive Last-Level Cache Misses
+
+                              mxv.hwc.comp.1.thr.er  mxv.hwc.comp.2.thr.er
+Name                          Excl. Last-Level       Excl. Last-Level
+                              Cache Misses           Cache Misses
+                                                         ratio
+ <Total>                      122709276              x   0.788
+ mxv_core                     121796001              x   0.787
+ init_data                       723064              x   1.055
+ erand48_r                       100111              x   0.500
+ drand48                          60065              x   1.167
+
+Functions sorted by metric: Exclusive Branch Instructions
+
+                              mxv.hwc.comp.1.thr.er  mxv.hwc.comp.2.thr.er
+Name                          Excl. Branch           Excl. Branch
+                              Instructions           Instructions
+                                                       ratio
+ <Total>                      1307307316             x 0.997
+ mxv_core                     1235235239             x 0.997
+ erand48_r                      23023033             x 0.957
+ drand48                        20020009             x 0.600
+ __drand48_iterate              17017028             x 0.882
+@end verbatim
+@end smallexample
+
+A ratio less than one in the second column, means that this counter
+value was smaller than the value from the reference experiment shown
+in the first column.
+
+This kind of presentation of the results makes it much easier to
+quickly interpret the data.
+
+We conclude this section with thread-level event counter overviews,
+but before we go into this, there is an important metric we need to
+mention.
+
+@c -- TBD Explain <Total> for IPC
+
+@IndexSubentry{Hardware event counters, IPC}
+In case it is known how many instructions and CPU cycles have been executed,
+the value for the IPC (``Instructions Per Clockycle'') can be computed.
+@xref{Hardware Event Counters Explained}.
+This is a derived metric that gives an indication how well the processor
+is utilized.  The inverse of the IPC is called CPI.
+@IndexSubentry{Hardware event counters, CPI}
+
+The @DisplayText{} command automatically computes the IPC and CPI values
+if an experiment contains the event counter values for the instructions
+and CPU cycles executed.  These are part of the metric list and can be
+displayed, just like any other metric.
+
+@IndexSubentry{Options,  @code{-metric_list}}
+@IndexSubentry{Commands, @code{metric_list}}
+This can be verified through the @command{metric_list} command.  If we go
+back to our earlier experiment with the default event counters, we get
+the following result.
+
+@cartouche
+@smallexample
+$ gprofng display text -metric_list mxv.hwc.def.2.thr.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Current metrics: e.totalcpu:i.totalcpu:e.cycles:e+insts:e+llm:name
+Current Sort Metric: Exclusive Total CPU Time ( e.totalcpu )
+Available metrics:
+         Exclusive Total CPU Time: e.%totalcpu
+         Inclusive Total CPU Time: i.%totalcpu
+             Exclusive CPU Cycles: e.+%cycles
+             Inclusive CPU Cycles: i.+%cycles
+  Exclusive Instructions Executed: e+%insts
+  Inclusive Instructions Executed: i+%insts
+Exclusive Last-Level Cache Misses: e+%llm
+Inclusive Last-Level Cache Misses: i+%llm
+ Exclusive Instructions Per Cycle: e+IPC
+ Inclusive Instructions Per Cycle: i+IPC
+ Exclusive Cycles Per Instruction: e+CPI
+ Inclusive Cycles Per Instruction: i+CPI
+                             Size: size
+                       PC Address: address
+                             Name: name
+@end verbatim
+@end smallexample
+
+Among the other metrics, we see the new metrics for the IPC and CPI
+listed.
+
+In the script below, we use this information and add the IPC and CPI
+to the metrics to be displayed.  We also use a the thread filter to
+display these values for the individual threads.
+
+This is the complete script we have used.  Other than a different selection
+of the metrics, there are no new features.
+
+@cartouche
+@smallexample
+# Define the metrics
+metrics e.insts:e.%cycles:e.IPC:e.CPI
+# Sort with respect to cycles
+sort e.cycles
+# Limit the output to 5 lines
+limit 5
+# Get the function overview for all threads
+functions
+# Get the function overview for thread 1
+thread_select 1
+functions
+# Get the function overview for thread 2
+thread_select 2
+functions
+# Get the function overview for thread 3
+thread_select 3
+functions
+@end smallexample
+@end cartouche
+
+In the metrics definition on the second line, we explicitly request the
+counter values for the instructions (@code{e.insts}) and CPU cycles
+(@code{e.cycles}) executed.  These names can be found in output from the
+@IndexSubentry{Options, @code{-metric_list}}
+@IndexSubentry{Commands, @code{metric_list}}
+@command{metric_list} command above.
+In addition to these metrics, we also request the IPC and CPI to be shown.
+
+@IndexSubentry{Options, @code{-limit}}
+@IndexSubentry{Commands, @code{limit}}
+As before, we used the @command{limit} command to control the number of
+functions displayed.  We then request an overview for all the threads,
+followed by three sets of two commands to select a thread and display the
+function overview.
+
+The script above is used as follows:
+
+@cartouche
+@smallexample
+$ gprofng display text -script my-script-ipc mxv.hwc.def.2.thr.er
+@end smallexample
+@end cartouche
+
+@noindent
+This script produces four tables.  We list them separately below,
+and have left out the additional output.
+
+@noindent
+The first table shows the accumulated values across the three
+threads that have been active.
+
+@smallexample
+@verbatim
+Functions sorted by metric: Exclusive CPU Cycles
+
+Excl.         Excl. CPU     Excl.  Excl.   Name
+Instructions  Cycles        IPC    CPI
+Executed       sec.      %
+7906475309    2.691 100.00  1.473  0.679   <Total>
+7432724378    2.598  96.54  1.434  0.697   mxv_core
+ 188860269    0.035   1.31  2.682  0.373   erand48_r
+  73623396    0.026   0.95  1.438  0.696   init_data
+  76824434    0.018   0.66  2.182  0.458   drand48
+@end verbatim
+@end smallexample
+
+@noindent
+This shows that IPC of this program is completely dominated
+by function @code{mxv_core}.  It has a fairly low IPC value
+of 1.43.
+
+@noindent
+The next table is for thread 1 and shows the values for the
+main thread.
+
+@smallexample
+@verbatim
+Exp Sel Total
+=== === =====
+  1 1       3
+Functions sorted by metric: Exclusive CPU Cycles
+
+Excl.         Excl. CPU     Excl.  Excl.   Name
+Instructions  Cycles        IPC    CPI
+Executed       sec.      %
+473750931     0.093 100.00  2.552  0.392   <Total>
+188860269     0.035  37.93  2.682  0.373   erand48_r
+ 73623396     0.026  27.59  1.438  0.696   init_data
+ 76824434     0.018  18.97  2.182  0.458   drand48
+134442832     0.013  13.79  5.250  0.190   __drand48_iterate
+@end verbatim
+@end smallexample
+
+@noindent
+Although this thread hardly uses any CPU cycles, the overall IPC
+of 2.55 is not all that bad.
+
+@noindent
+Last, we show the tables for threads 2 and 3:
+
+@smallexample
+@verbatim
+Exp Sel Total
+=== === =====
+  1 2       3
+Functions sorted by metric: Exclusive CPU Cycles
+
+Excl.         Excl. CPU     Excl.  Excl.   Name
+Instructions  Cycles        IPC    CPI
+Executed       sec.      %
+3716362189    1.298 100.00  1.435  0.697   <Total>
+3716362189    1.298 100.00  1.435  0.697   mxv_core
+         0    0.      0.    0.     0.      collector_root
+         0    0.      0.    0.     0.      driver_mxv
+
+Exp Sel Total
+=== === =====
+  1 3       3
+Functions sorted by metric: Exclusive CPU Cycles
+
+Excl.         Excl. CPU     Excl.  Excl.   Name
+Instructions  Cycles        IPC    CPI
+Executed       sec.      %
+3716362189    1.300 100.00  1.433  0.698   <Total>
+3716362189    1.300 100.00  1.433  0.698   mxv_core
+         0    0.      0.    0.     0.      collector_root
+         0    0.      0.    0.     0.      driver_mxv
+@end verbatim
+@end smallexample
+
+It is seen that both execute the same number of instructions and
+take about the same number of CPU cycles.  As a result, the IPC is
+the same for both threads.
+
+@c -- A new node --------------------------------------------------------------
+@c TBD @node    Additional Features
+@c TBD @section Additional Features
+@c ----------------------------------------------------------------------------
+
+@c -- A new node --------------------------------------------------------------
+@c TBD @node    More Filtering Capabilities
+@c TBD @subsection More Filtering Capabilities
+@c ----------------------------------------------------------------------------
+
+@c TBD Cover @code{samples} and @code{seconds}
+
+@c -- A new node --------------------------------------------------------------
+@node    Java Profiling
+@section Java Profiling
+@c ----------------------------------------------------------------------------
+
+@IndexSubentry{Options, @code{-j}}
+@IndexSubentry{Java profiling, @code{-j on/off}}
+The @CollectApp{} command supports Java profiling.  The @code{-j on} option
+can be used for this, but since this feature is enabled by default, there is
+no need to set this explicitly.  Java profiling may be disabled through the
+@code{-j off} option.
+
+The program is compiled as usual and the experiment directory is created
+similar to what we have seen before.  The only difference with a C/C++
+application is that the program has to be explicitly executed by java.
+
+For example, this is how to generate the experiment data for a Java
+program that has the source code stored in file @code{Pi.java}:
+
+@cartouche
+@smallexample
+$ javac Pi.java
+$ gprofng collect app -j on -O pi.demo.er java Pi < pi.in
+@end smallexample
+@end cartouche
+
+Regarding which java is selected to generate the data, @ToolName{}
+first looks for the JDK in the path set in either the
+@IndexSubentry{Java profiling, @code{JDK_HOME}}
+@code{JDK_HOME} environment variable, or in the
+@IndexSubentry{Java profiling, @code{JAVA_PATH}}
+@code{JAVA_PATH} environment variable.  If neither of these variables is
+set, it checks for a JDK in the search path (set in the PATH
+environment variable).  If there is no JDK in this path, it checks for
+the java executable in @code{/usr/java/bin/java}.
+
+In case additional options need to be passed on to the JVM, the
+@IndexSubentry{Options, @code{-J}}
+@IndexSubentry{Java profiling, @code{-J <string>}}
+@code{-J <string>} option can be used.  The string with the
+option(s) has to be delimited by quotation marks in case
+there is more than one argument.
+
+The @DisplayText{} command may be used to view the performance data.  There is
+no need for any special options and the same commands as previously discussed
+are supported.
+
+@IndexSubentry{Options,  @code{-viewmode}}
+@IndexSubentry{Commands, @code{viewmode}}
+@IndexSubentry{Java profiling, different view modes}
+The @code{viewmode} command
+@xref{The Viewmode}
+is very useful to examine the call stacks.
+
+For example, this is how one can see the native call stacks.  For
+lay-out purposes we have restricted the list to the first five entries:
+
+@cartouche
+@smallexample
+$ gprofng display text -limit 5 -viewmode machine -calltree pi.demo.er
+@end smallexample
+@end cartouche
+
+@smallexample
+@verbatim
+Print limit set to 5
+Viewmode set to machine
+Functions Call Tree. Metric: Attributed Total CPU Time
+
+Attr.      Name
+Total
+CPU sec.
+1.381      +-<Total>
+1.171        +-Pi.calculatePi(double)
+0.110        +-collector_root
+0.110        |  +-JavaMain
+0.070        |    +-jni_CallStaticVoidMethod
+@end verbatim
+@end smallexample
+
+@noindent
+Note that the selection of the viewmode is echoed in the output.
+
+@c -- A new node --------------------------------------------------------------
+@node    The gprofng Tools
+@chapter The gprofng Tools
+@c ----------------------------------------------------------------------------
+
+Several tools are included in @ProductName{}.  In subsequent chapters these
+are discussed in detail.  Below a brief description is given, followed by an
+overview of the environment variables that are supported.
+
+@c -- A new node --------------------------------------------------------------
+@node    Tools Overview
+@section Tools Overview
+@c ----------------------------------------------------------------------------
+
+The following tools are supported by @ProductName{}:
+
+@table @code
+
+@item @CollectApp{}
+@IndexSubentry{@code{gprofng}, @code{collect app}}
+
+Collects the performance data and stores the results in an experiment
+directory.  There are many options on this tool, but quite often the
+defaults are sufficient.
+An experiment directory is required for the subsequent analysis of
+the results.
+
+@item @DisplayText{}
+@IndexSubentry{@code{gprofng}, @code{display text}}
+Generates performance reports in ASCII format.  Commandline
+options, and/or commands in a script file are used to control the contents
+and lay-out of the generated report(s).
+
+@item @DisplayHTML{}
+@IndexSubentry{@code{gprofng}, @code{display html}}
+Takes one or more experiment directories and generates a directory with
+HTML files.  Starting from the index.html file, the performance data
+may be examined in a browser.
+
+@item @DisplaySRC{}
+@IndexSubentry{@code{gprofng}, @code{display src}}
+Displays the source code, interleaved with the disassembled instructions.
+
+@item @Archive{}
+@IndexSubentry{@code{gprofng}, @code{archive}}
+Archives an experiment directory by (optionally) including source code and
+object files, as well as the shared libraries that have been used.
+
+@end table
+
+@c -- A new section -----------------------------------------------------------
+@node    The gprofng.rc file with default settings
+@section The gprofng.rc file with default settings
+@c ----------------------------------------------------------------------------
+The @file{gprofng.rc}
+@cindex gprofng.rc
+file is used to define default settings for the @DisplayText{} and
+@DisplaySRC{} tools, but the user can override these defaults through local
+configuration files.
+
+There are three files that are checked when the tool starts up.  The first
+file has pre-defined settings and comes with the installation, but through
+a hidden file called @file{.gprofng.rc}, the user can (re)define the defaults:
+
+These are the locations and files that are checked upon starting the above
+mentioned tools:
+
+@enumerate
+
+@item
+The system-wide filename is called @file{gprofng.rc} and is located in
+the top level @file{/etc} directory.
+
+If @ProductName{} has been built from the source, this file is in
+subdirectory @file{etc} in the top level installation directory.
+
+@item
+The user's home directory may have a hidden file called @file{.gprofng.rc}.
+
+@item
+The directory where @DisplayText{} (or @DisplaySRC{}) is invoked from may
+have a hidden file called @file{.gprofng.rc}.
+
+@end enumerate
+
+The settings of each file override the settings of the file(s) read before it.
+Defaults in the system-wide file are overruled by the file in the user home
+directory (if any) and any settings in the @file{.gprofng.rc} file in the
+current directory override those.
+
+Note that the settings in these files only affect the defaults.  Unlike
+the commands used in a script file, they are not commands for the tools.
+
+@c -- TBD indxobj_define, 
+@c -- TBD object_show, 
+@c -- TBD object_hide, 
+@c -- TBD object_api, 
+
+The @file{.gprofng.rc} configuration files can contain the
+@command{addpath},
+@command{compare}, 
+@command{dthresh}, 
+@command{name}, 
+@command{pathmap},
+@command{printmode}, 
+@command{sthresh},
+and
+@command{viewmode} 
+commands as described in this user guide.
+
+They can also contain the following commands, @emph{which cannot be used on
+either the command line, or in a script file}:
+
+@table @code
+
+@item dmetrics @var{metric-spec}
+@IndexSubentry{Commands, @code{dmetrics}}
+
+Specify the default metrics to be displayed or printed in the function list.
+The syntax and use of the metric list is described in section
+@ref{Metric Definitions}.
+The order of the metric keywords in the list determines the order in which
+the metrics are presented.
+
+Default metrics for the @code{callers-callees} list are derived from the
+function list default metrics by adding the corresponding attributed metric
+before the first occurrence of each metric name in the list.
+
+@item dsort @var{metric-spec}
+@IndexSubentry{Commands, @code{dsort}}
+
+Specify the default metric by which the function list is sorted.  The sort
+metric is the first metric in this list that matches a metric in any loaded
+experiment, subject to the following conditions:
+
+@itemize @bullet
+
+@item
+If the entry in @var{metric-spec} has a visibility string of an exclamation
+point (@samp{!}), the first metric whose name matches is used, regardless of
+whether it is visible.
+
+@item
+If the entry in @var{metric-spec} has any other visibility string, the first
+visible metric whose name matches is used.
+
+@end itemize
+
+The syntax and use of the metric list is described in section
+@ref{Metric Definitions}.
+The default sort metric for the @code{callers-callees} list is the attributed
+metric corresponding to the default sort metric for the function list.
+
+@item en_desc @{on | off | =@var{regex}@}
+@IndexSubentry{Commands, @code{en_desc}}
+
+Set the mode for reading descendant experiments to @samp{on} (enable all
+descendants) or @samp{off} to disable all descendants. If
+@samp{=}@var{regex} is used, enable data from those experiments whose
+executable name matches the regular expression.
+
+The default setting is @samp{on} to follow all descendants.  In reading
+experiments with descendants, any sub-experiments that contain little or
+no performance data are ignored by @DisplayText{}.
+
+@end table
+
+@c -- A new section -----------------------------------------------------------
+@node    Filters
+@section Filters
+@c ----------------------------------------------------------------------------
+
+Various filter commands are supported by @DisplayText{}.
+Thanks to the use of filters, the user can zoom in on a certain area of 
+interest.  With filters, it is possible to select one or more threads to
+focus on, define a window in time, select specific call stacks, etc.
+@IndexSubentry{Filters, Intro}
+
+While already powerful by themselves, filters may be combined to further
+narrow down the view into the data.
+
+@IndexSubentry{Filters, Persistence}
+It is important to note that filters are @emph{persistent}.  A filter is
+active until it is reset.  This means that successive filter commands
+increasingly narrow down the view until one or more are reset.
+
+@noindent
+An example is the following:
+
+@cartouche
+@smallexample
+$ gprofng display text -thread_select 1 -functions \
+                       -cpu_select 2 -functions @dots{}
+@end smallexample
+@end cartouche
+
+This command selects thread 1 and requests the function view for this thread.
+The third (@command{cpu_select 2}) command @emph{adds} the
+constraint that only the events on CPU 2 are to be selected.  This means
+that the next function view selects events that were executed by thread 1 and
+have been running on CPU 2.
+
+@noindent
+In contrast with this single command line, the two commands below look similar,
+but behave very differently:
+
+@cartouche
+@smallexample
+$ gprofng display text -thread_select 1 -functions @dots{}
+$ gprofng display text -cpu_select 2 -functions @dots{}
+@end smallexample
+@end cartouche
+
+The first command displays the function view for thread 1.  The second
+command shows the function view for CPU 2 for @emph{all} threads that have
+been running on this CPU.
+
+As the following example demonstrates, things get a little more tricky in
+case a script file is used.  Consider the following script file:
+
+@smallexample
+@verbatim
+thread_select 1
+functions
+cpu_select 2
+functions
+@end verbatim
+@end smallexample
+
+This script file displays the function view for thread 1 first.  This is
+followed by those functions that were executed by thread 1 @emph{and} have
+been run on CPU 2.
+
+If however, the script should behave like the two command line invocations
+shown above, the thread selection filter needs to be reset before CPU 2 is
+selected:
+
+@smallexample
+@verbatim
+thread_select 1
+functions
+# Reset the thread selection filter:
+thread_select all
+cpu_select 2
+functions
+@end verbatim
+@end smallexample
+
+In general, filters behave differently than commands or options. In 
+particular there may be an interaction between different filter definitions.
+
+For example, as explained above, in the first script file the
+@command{thread_select} and @command{cpu_select} commands interact.
+
+For a list of all the predefined filters see @ref{Predefined Filters}.
+
+@c -- A new section -----------------------------------------------------------
+@node    Supported Environment Variables
+@section Supported Environment Variables
+@c ----------------------------------------------------------------------------
+
+Various environment variables are supported.  We refer to the man page for
+gprofng(1) for an overview and description
+(@xref{Man page for gprofng}).
+
+@c -- A new chapter -----------------------------------------------------------
+@node    Performance Data Collection
+@chapter Performance Data Collection
+@c ----------------------------------------------------------------------------
+
+The @CollectApp{} command is used to gather the application performance data
+while the application executes.
+At regular intervals, program execution is halted and the required data is
+recorded.
+@cindex Experiment directory
+An experiment directory is created when the tool starts.  This directory is
+used to store the relevant information and forms the basis for a subsequent
+analysis with one of the viewing tools.
+
+@c -- A new section -----------------------------------------------------------
+@node    The @CollectApp{} command
+@section The @CollectApp{} command
+@c ----------------------------------------------------------------------------
+
+This is the command to collect the performance information for the target
+application.  The usage is as follows:
+
+@cartouche
+@smallexample
+$ gprofng collect app [OPTION(S)] TARGET [TARGET_ARGUMENTS]
+@end smallexample
+@end cartouche
+
+@noindent
+Options to the command are passed in first.  This is followed by the name of
+the target, which is typically a binary executable or a script, followed by
+any options that may be required by the target.
+
+@c -- A new section -----------------------------------------------------------
+@node    View the Performance Information
+@chapter View the Performance Information
+@c ----------------------------------------------------------------------------
+Various tools to view the performance data stored in one or more experiment
+directories are available.  In this chapter, these will all be covered in
+detail.
+
+@c -- A new section -----------------------------------------------------------
+@node    The @code{gprofng display text} Tool
+@section The @code{gprofng display text} Tool
+@c ----------------------------------------------------------------------------
+
+This tool displays the performance information in ASCII format.  It supports
+a variety of views into the data recorded.
+These views can be specified in two ways and both may be used simultaneously:
+
+@itemize @bullet
+
+@item
+Command line options start with a dash (@samp{-}) symbol and may take an
+argument.
+
+@item
+Options may also be included in a file, the ``script file''.  In this case,
+the dash symbol should @emph{not} be included.  Multiple script files can
+be used on the same command line.
+
+@end itemize
+
+While they may appear as an option, they are really commands and this is
+why they will be referred to as @emph{commands}
+@cindex Commands
+in the documentation.
+
+As a general rule, @emph{the order of options matters} and if the same option,
+or command, occurs multiple times, the rightmost setting is selected.
+
+@c -- A new sub section -------------------------------------------------------
+@node       The @code{gprofng display text} Commands
+@subsection The @code{gprofng display text} Commands
+@c ----------------------------------------------------------------------------
+
+The most commonly used commands are documented in the man page for this tool
+(@xref{gprofng display text}).  In this section we list and describe all other
+commands that are supported.
+
+@c -- A new sub subsection ----------------------------------------------------
+@node                Commands that List Experiment Details
+@unnumberedsubsubsec Commands that List Experiment Details
+@c ----------------------------------------------------------------------------
+
+@table @code
+
+@item experiment_ids
+@ifclear man
+@IndexSubentry{Options,  @code{-experiment_ids}}
+@IndexSubentry{Commands, @code{experiment_ids}}
+@end ifclear
+
+For each experiment that has been loaded, show the totals of the metrics
+recorded, plus some other operational characteristics like the name of
+the executable, PID, etc.  The top line contains the accumulated totals
+for the metrics.
+
+@item experiment_list
+@ifclear man
+@IndexSubentry{Options,  @code{-experiment_list}}
+@IndexSubentry{Commands, @code{experiment_list}}
+@end ifclear
+
+Display the list of experiments that are loaded.  Each experiment is listed
+with an index, which is used when selecting samples, threads, or LWPs, and
+a process id (PID), which can be used for advanced filtering.
+
+@item cpu_list
+@IndexSubentry{Options,  @code{-cpu_list}}
+@IndexSubentry{Commands, @code{cpu_list}}
+
+Display the total number of CPUs that have been used during the experiment(s).
+
+@item cpus
+@IndexSubentry{Options,  @code{-cpus}}
+@IndexSubentry{Commands, @code{cpus}}
+
+Show a list of CPUs that were used by the application, along with the metrics
+that have been recorded. The CPUs are represented by a CPU number and show the
+Total CPU time by default.
+
+Note that since the data is sorted with respect to the default metric, it may
+be useful to use the @command{sort name} command to show the list sorted with
+respect to the CPU id.
+
+@item GCEvents
+@IndexSubentry{Options,  @code{-GCEvents}}
+@IndexSubentry{Commands, @code{GCEvents}}
+
+This commands is for Java applications only.  It shows any Garbage Collection
+(GC) events that have occurred while the application was executing..
+
+@item lwp_list
+@IndexSubentry{Options,  @code{-lwp_list}}
+@IndexSubentry{Commands, @code{lwp_list}}
+
+Displays the list of LWPs processed during the experiment(s).
+
+@item processes
+@IndexSubentry{Options,  @code{-processes}}
+@IndexSubentry{Commands, @code{processes}}
+
+For each experiment that has been loaded, this command displays a list of
+processes that were created by the application, along with their metrics.
+The processes are represented by process ID (PID) numbers and show the
+Total CPU time metric by default. If additional metrics are recorded in
+an experiment, these are shown as well.
+
+@item samples
+@IndexSubentry{Options,  @code{-samples}}
+@IndexSubentry{Commands, @code{samples}}
+
+Display a list of sample points and their metrics,  which reflect the
+microstates recorded at each sample point in the loaded experiment.
+The samples are represented by sample numbers and show the Total CPU time
+by default.  Other metrics might also be displayed if enabled.
+
+@item sample_list
+@IndexSubentry{Options,  @code{-sample_list}}
+@IndexSubentry{Commands, @code{sample_list}}
+
+For each experiment loaded, display the list of samples currently selected.
+
+@item seconds
+@IndexSubentry{Options,  @code{-seconds}}
+@IndexSubentry{Commands, @code{seconds}}
+
+Show each second of the profiling run that was captured in the experiment,
+along with the metrics collected in that second.  The seconds view differs
+from the samples view in that it shows periodic samples that occur every
+second beginning at 0 and the interval cannot be changed.
+
+The seconds view lists the seconds of execution with the Total CPU time by
+default.  Other metrics might also be displayed if the metrics are present
+in the loaded experiments.
+
+@item threads
+@IndexSubentry{Options,  @code{-threads}}
+@IndexSubentry{Commands, @code{threads}}
+
+Show a list of threads and their metrics.  The threads are represented
+by a process and thread pair and show the Total CPU time by default.
+Other metrics might also be displayed by default if the metrics are
+present in the loaded experiment.
+
+@item thread_list
+@IndexSubentry{Options,  @code{-thread_list}}
+@IndexSubentry{Commands, @code{thread_list}}
+
+Display the list of threads currently selected for the analysis.
+
+@end table
+
+@noindent
+@emph{The commands below are for use in scripts and interactive mode only.
+They are not allowed on the command line.}
+
+@table @code
+
+@item add_exp @var{exp-name}
+@IndexSubentry{Commands, @code{add_exp}}
+
+Add the named experiment to the current session.
+
+@item drop_exp @var{exp-name}
+@IndexSubentry{Commands, @code{drop_exp}}
+
+Drop the named experiment from the current session.
+
+@item open_exp @var{exp-name}
+@IndexSubentry{Commands, @code{open_exp}}
+
+Drop all loaded experiments from the session, and then load the named
+experiment.
+
+@end table
+
+@c -- A new sub subsection ----------------------------------------------------
+@node                Commands that Affect Listings and Output
+@unnumberedsubsubsec Commands that Affect Listings and Output
+@c ----------------------------------------------------------------------------
+
+@table @code
+
+@item dthresh @var{value}
+@IndexSubentry{Options,  @code{-dthresh}}
+@IndexSubentry{Commands, @code{dthresh}}
+
+Specify the threshold percentage for highlighting metrics in the annotated
+disassembly code.  If the value of any metric is equal to or greater than
+@var{value} as a percentage of the maximum value of that metric for any
+instruction line in the file, the line on which the metrics occur has a
+@samp{##} marker inserted at the beginning of the line.  The default is 75.
+
+@item printmode @{text | html | @var{single-char}@}
+@IndexSubentry{Options,  @code{-printmode}}
+@IndexSubentry{Commands, @code{printmode}}
+
+Set the print mode. If the keyword is @code{text}, printing will be done in
+tabular form using plain text.  In case the @code{html} keyword is selected,
+the output is formatted as an HTML table.
+
+Alternatively, @var{single-char} may be used in a delimiter separated list,
+with the single character @var{single-char} as the delimiter.
+
+The printmode setting is used only for those commands that generate tables,
+such as @command{functions}.  The setting is ignored for other printing
+commands, including those showing source and disassembly listings.
+
+@item sthresh @var{value}
+@IndexSubentry{Options,  @code{-sthresh}}
+@IndexSubentry{Commands, @code{sthresh}}
+
+Specify the threshold percentage for highlighting metrics in the annotated
+source code.  If the value of any metric is equal to or greater than
+@var{value} (as a percentage) of the maximum value of that metric for any
+source line in the file, the line on which the metrics occur has a @samp{##}
+marker inserted at the beginning of the line.  The default is 75.
+
+@end table
+
+@c -- A new sub subsection ----------------------------------------------------
+@node                Predefined Filters
+@unnumberedsubsubsec Predefined Filters
+@c ----------------------------------------------------------------------------
+
+The filters below use a list, the selection list, to define a sequence of
+numbers. @xref{The Selection List}.
+Note that this selection is persistent, but the filter can be reset by using
+@samp{all} as the @var{selection-list}.
+
+@table @code
+
+@item cpu_select @var{selection-list}
+@IndexSubentry{Options,  @code{-cpu_select}}
+@IndexSubentry{Commands, @code{cpu_select}}
+
+Select the CPU ids specified in the @var{selection-list}.
+
+@item lwp_select @var{selection-list}
+@IndexSubentry{Options,  @code{-lwp_select}}
+@IndexSubentry{Commands, @code{lwp_select}}
+
+Select the LWPs specified in the @var{selection-list}.
+
+@item sample_select @var{selection-list}
+@IndexSubentry{Options,  @code{-sample-select}}
+@IndexSubentry{Commands, @code{sample-select}}
+
+@item thread_select @var{selection-list}
+@IndexSubentry{Options,  @code{-thread_select}}
+@IndexSubentry{Commands, @code{thread_select}}
+
+Select a series of threads, or just one, to be used in subsequent views.
+The @var{selection-list} consists of a sequence of comma separated numbers.
+This may include a range of the form @samp{n-m}.
+
+@end table
+
+
+@c -- A new sub subsection ----------------------------------------------------
+@node                Commands to Set and Change Search Paths
+@unnumberedsubsubsec Commands to Set and Change Search Paths
+@c ----------------------------------------------------------------------------
+
+@table @code
+
+@item addpath @var{path-list}
+@IndexSubentry{Options,  @code{-addpath}}
+@IndexSubentry{Commands, @code{addpath}}
+
+Append @var{path-list} to the current setpath settings.  Note that multiple
+@command{addpath} commands can be used in @file{.gprofng.rc} files, and will
+be concatenated.
+
+@item pathmap @var{old-prefix} @var{new-prefix}
+@IndexSubentry{Options,  @code{-pathmap}}
+@IndexSubentry{Commands, @code{pathmap}}
+
+If a file cannot be found using the path list set by @command{addpath}, or
+the @command{setpath} command, one or more path remappings may be set with the 
+@command{pathmap} command.
+
+With path mapping, the user can specify how to replace the leading component
+in a full path by a different string.
+
+With this command, any path name for a source file, object file, or shared
+object that begins with the prefix specified with @var{old-prefix}, the
+old prefix is replaced by the prefix specified with @var{new-prefix}.
+The resulting path is used to find the file.
+
+For example, if a source file located in directory @file{/tmp}
+is shown in the @DisplayText{} output, but should instead be taken from
+@file{/home/demo}, the following @file{pathmap} command redefines the
+path: 
+
+@smallexample
+$ gprofng diplay text -pathmap /tmp /home/demo -source ...
+@end smallexample
+
+Note that multiple @command{pathmap} commands can be supplied, and each is
+tried until the file is found.
+
+@item setpath @var{path-list}
+@IndexSubentry{Options,  @code{-setpath}}
+@IndexSubentry{Commands, @code{setpath}}
+
+Set the path used to find source and object files. The path is defined
+through the @var{path-list} keyword. It is a colon separated list of
+directories, jar files, or zip files.
+If any directory has a colon character in it, escape it with a
+backslash (@samp{\}).
+
+The special directory name @code{$expts}, refers
+to the set of current experiments in the order in which they were loaded.
+You can abbreviate it with a single @samp{$} character.
+
+The default path is @samp{$expts:..} which is the directories of the
+loaded experiments and the current working directory.
+
+Use @command{setpath} with no argument to display the current path. 
+
+Note that @command{setpath} commands @emph{are not allowed .gprofng.rc
+configuration files}.
+
+@end table
+
+@c -- A new subsection --------------------------------------------------------
+@c -- TBD @node       Usage examples for @code{gprofng display text}
+@c -- TBD @subsection Usage examples for @code{gprofng display text}
+@c ----------------------------------------------------------------------------
+
+@c -- TBD In this section we present usage examples.
+
+@c -- A new chapter -----------------------------------------------------------
+@c  TBD @node    The @code{gprofng display html} Tool
+@c  TBD @section The @code{gprofng display html} Tool
+@c ----------------------------------------------------------------------------
+@c  TBD The options are documented in the man page for this tool. In this section we
+@c  TBD present usage examples.
+
+@c -- A new chapter -----------------------------------------------------------
+@c  TBD @node    Display Source Code
+@c  TBD @chapter Display Source Code
+@c ----------------------------------------------------------------------------
+@c  TBD The options are documented in the man page for this tool. In this section we
+@c  TBD present usage examples.
+
+
+@c -- A new chapter -----------------------------------------------------------
+@c  TBD @node    Archive Experiment Data
+@c  TBD @chapter Archive Experiment Data
+@c ----------------------------------------------------------------------------
+@c  TBD The options are documented in the man page for this tool. In this section we
+@c  TBD present usage examples.
+
+@c -- A new chapter -----------------------------------------------------------
+@node    Terminology
+@chapter Terminology
+@c ----------------------------------------------------------------------------
+
+Throughout this manual, certain terminology specific to profiling tools,
+or @ToolName{}, or even to this document only, is used.  In this chapter
+this terminology is explained in detail.
+
+@menu
+* The Program Counter::                    What is a Program Counter?
+* Inclusive and Exclusive Metrics::        An explanation of inclusive and exclusive metrics.
+* Metric Definitions::                     Definitions associated with metrics.
+* The Viewmode::                           Select the way call stacks are presented.
+* The Selection List::                     How to define a selection.
+* Load Objects and Functions::             The components in an application.
+* The Concept of a CPU in gprofng::        The definition of a CPU.
+* Hardware Event Counters Explained::      What are event counters?
+* apath::                                  Our generic definition of a path.
+@end menu
+
+@c ----------------------------------------------------------------------------
+@node    The Program Counter
+@section The Program Counter
+@c ----------------------------------------------------------------------------
+
+@cindex PC
+@cindex Program Counter
+The @emph{Program Counter}, or PC for short, keeps track where program execution is.
+The address of the next instruction to be executed is stored in a special
+purpose register in the processor, or core.
+
+@cindex Instruction pointer
+The PC is sometimes also referred to as the @emph{instruction pointer}, but
+we will use Program Counter or PC throughout this document.
+
+@c ----------------------------------------------------------------------------
+@node    Inclusive and Exclusive Metrics
+@section Inclusive and Exclusive Metrics
+@c ----------------------------------------------------------------------------
+
+In the remainder, these two concepts occur quite often and for lack of a better
+place, they are explained here.
+
+@cindex Inclusive metric
+The @emph{inclusive} value for a metric includes all values that are part of
+the dynamic extent of the target function.  For example if function @code{A}
+calls functions @code{B} and @code{C}, the inclusive CPU time for @code{A}
+includes the CPU time spent in @code{B} and @code{C}.
+
+@cindex Exclusive metric
+In contrast with this, the @emph{exclusive} value for a metric is computed
+by excluding the metric values used by other functions called.  In our imaginary
+example, the exclusive CPU time for function @code{A} is the time spent outside
+calling functions @code{B} and @code{C}.
+
+@cindex Leaf function
+In case of a @emph{leaf function}, the inclusive and exclusive values for the
+metric are the same since by definition, it is not calling any other
+function(s).
+
+Why do we use these two different values? The inclusive metric shows the most
+expensive path, in terms of this metric, in the application.  For example, if
+the metric is cache misses, the function with the highest inclusive metric
+tells you where most of the cache misses come from.
+
+Within this branch of the application, the exclusive metric points to the
+functions that contribute and help to identify which part(s) to consider
+for further analysis.
+
+@c ----------------------------------------------------------------------------
+@node    Metric Definitions
+@section Metric Definitions
+@c ----------------------------------------------------------------------------
+The metrics displayed in the various views are highly customizable.  In this
+section it is explained how to construct the metrics definition(s).
+
+@IndexSubentry{Options,  @code{-metrics}}
+@IndexSubentry{Commands, @code{metrics}}
+The @command{metrics} command takes a colon (@samp{:}) separated list, where
+each item in the list consists of the following three fields:
+@var{<flavor>}@var{<visibility>}@var{<metric-name>}.
+
+@cindex Flavor field
+@cindex Visibility field
+@cindex Metric name field
+@IndexSubentry{Metrics, Flavor field}
+@IndexSubentry{Metrics, Visibility field}
+@IndexSubentry{Metrics, Metric name field}
+The @var{<flavor>} field is either @samp{e} for ``exclusive'', and/or
+@samp{i} for ``inclusive''.  The @var{<metric-name>} field is the name of
+the metric and the @var{<visibility>} field consists of one ore more characters
+from the following table:
+
+@table @code
+
+@item .
+Show the metric as time.  This applies to timing metrics and hardware event
+counters that measure cycles.  Interpret as @samp{+} for other metrics.
+
+@item %
+Show the metric as a percentage of the total value for this metric.
+
+@item +
+Show the metric as an absolute value.  For hardware event counters this is
+the event count.  Interpret as @samp{.} for timing metrics.
+
+@item !
+Do not show any metric value.  Cannot be used with other visibility characters.
+This visibility is meant to be used in a @command{dmetrics} command to set
+default metrics that override the built-in visibility defaults
+for each type of metric.
+
+@end table
+
+Both the @var{<flavor>} and @var{<visibility>} strings may have more than one
+character.  If both strings have more than one character, the @var{<flavor>}
+string is expanded first.  For example, @code{ie.%user} is first expanded to
+@code{i.%user:e.%user}, which is then expanded into
+@code{i.user:i%user:e.user:e%user}.
+
+@c ----------------------------------------------------------------------------
+@node    The Viewmode
+@section The Viewmode
+
+@cindex Viewmode
+@IndexSubentry{Options,  @code{-viewmode}}
+@IndexSubentry{Commands, @code{viewmode}}
+
+There are different ways to view a call stack in Java.  In @ToolName{}, this
+is called the @emph{viewmode} and the setting is controlled through a command
+with the same name.
+
+The @code{viewmode} command takes one of the following keywords:
+
+@table @code
+
+@item user
+This is the default and shows the Java call stacks for Java threads.
+No call stacks for any housekeeping threads are shown.  The function
+list contains a function
+@IndexSubentry{Java profiling, @code{<JVM-System>}}
+@code{<JVM-System>} that represents the aggregated time from non-Java
+threads.
+When the JVM software does not report a Java call stack, time is reported
+against the function
+@IndexSubentry{Java profiling, @code{<no Java callstack recorded>}}
+@code{<no Java callstack recorded>}.
+
+@item expert
+Show the Java call stacks for Java threads when the Java code from the
+user is executed and machine call stacks when JVM code is executed, or
+when the JVM software does not report a Java call stack.
+Show the machine call stacks for housekeeping threads.
+
+@item machine
+Show the actual native call stacks for all threads.
+
+@end table
+
+@c ----------------------------------------------------------------------------
+@node    The Selection List
+@section The Selection List
+@c ----------------------------------------------------------------------------
+
+@cindex Selection list
+@cindex List specification
+Several commands allow the user to specify a sequence of numbers called the
+@emph{selection list}.  Such a list may for example be used to select specific
+threads from all the threads that have been used when conducting the
+experiment(s).
+
+A selection list (or ``list'' in the remainder of this section) can be a
+single number, a contiguous range of numbers with the start and end numbers
+separated by a hyphen (@samp{-}), a comma-separated list of numbers and
+ranges, or the @code{all} keyword that resets the filter.
+@IndexSubentry{Filters, Reset to default}
+Lists must not contain spaces.
+
+Each list can optionally be preceded by an experiment list with a similar
+format, separated from the list by a colon (:).
+If no experiment list is included, the list applies to all experiments.
+
+Multiple lists can be concatenated by separating the individual lists
+by a plus sign.
+
+These are some examples of various filters using a list:
+
+@table @code
+
+@item thread_select 1
+Select thread 1 from all experiments.
+
+@item thread_select all:1
+Select thread 1 from all experiments.
+
+@item thread_select 1:all
+Select all the threads from the first experiment loaded.
+
+@item thread_select 1:2+3:4
+Select thread 2 from experiment 1 and thread 4 from experiment 3.
+
+@item cpu_select all:1,3,5
+Selects cores 1, 3, and 5 from all experiments.
+
+@item cpu_select 1,2:all
+Select all cores from experiments 1 and 2.
+
+@end table
+
+Recall that there are several list commands that show the mapping between the
+numbers and the targets. 
+
+@IndexSubentry{Options, @code{-experiment_list}}
+@IndexSubentry{Commands, @code{experiment_list}}
+For example, the @command{experiment_list} command shows the name(s) of the
+experiment(s) loaded and the associated number. In this example it is used
+to get this information for a range of experiments:
+
+@cartouche
+@smallexample
+$ gprofng display text -experiment_list mxv.?.thr.er
+@end smallexample
+@end cartouche
+
+@noindent
+This is the output, showing for each experiment the ID, the PID, and the name:
+
+@smallexample
+@verbatim
+ID Sel     PID Experiment
+== === ======= ============
+ 1 yes 2750071 mxv.1.thr.er
+ 2 yes 1339450 mxv.2.thr.er
+ 3 yes 3579561 mxv.4.thr.er
+@end verbatim
+@end smallexample
+
+@c ----------------------------------------------------------------------------
+@node    Load Objects and Functions
+@section Load Objects and Functions
+@c ----------------------------------------------------------------------------
+
+An application consists of various components.  The source code files are
+compiled into object files.  These are then glued together at link time to form
+the executable.
+During execution, the program may also dynamically load objects.
+
+@cindex Load object
+A @emph{load object} is defined to be an executable, or shared object.  A shared
+library is an example of a load object in @ToolName{}.
+
+Each load object, contains a text section with the instructions generated by the
+compiler, a data section for data, and various symbol tables.
+All load objects must contain an
+@cindex ELF
+ELF
+symbol table, which gives the names and addresses of all the globally known
+functions in that object.
+
+Load objects compiled with the -g option contain additional symbolic information
+that can augment the ELF symbol table and provide information about functions that
+are not global, additional information about object modules from which the functions
+came, and line number information relating addresses to source lines.
+
+The term
+@cindex Function
+@emph{function}
+is used to describe a set of instructions that represent a high-level operation
+described in the source code.  The term also covers methods as used in C++ and in
+the Java programming language.
+
+In the @ToolName{} context, functions are provided in source code format.
+Normally their names appear in the symbol table representing a set of addresses.
+@cindex Program Counter
+@cindex PC
+If the Program Counter (PC) is within that set, the program is executing within that function.
+
+In principle, any address within the text segment of a load object can be mapped to a
+function.  Exactly the same mapping is used for the leaf PC and all the other PCs on the
+call stack.
+
+Most of the functions correspond directly to the source model of the program, but
+there are exceptions.  This topic is however outside of the scope of this guide.
+
+@c ----------------------------------------------------------------------------
+@node    The Concept of a CPU in @ProductName{}
+@section The Concept of a CPU in @ProductName{}
+@c ----------------------------------------------------------------------------
+
+@cindex CPU
+In @ProductName{}, there is the concept of a CPU.  Admittedly, this is not the
+best word to describe what is meant here and may be replaced in the future.
+
+The word CPU is used in many of the displays.
+In the context of @ProductName{}, it is meant to denote a part of the
+processor that is capable of executing instructions and with its own state,
+like the program counter.
+
+For example, on a contemporary processor, a CPU could be a core.  In case
+hardware threads are supported within a core, a CPU is one of those
+hardware threads.
+
+To see which CPUs have been used in the experiment, use the @command{cpu}
+command in @DisplayText{}.
+
+@c ----------------------------------------------------------------------------
+@node    Hardware Event Counters Explained
+@section Hardware Event Counters Explained
+@c ----------------------------------------------------------------------------
+
+@IndexSubentry{Hardware event counters, description}
+For quite a number of years now, many microprocessors have supported hardware
+event counters.
+
+On the hardware side, this means that in the processor there are one or more
+registers dedicated to count certain activities, or ``events''.
+Examples of such events are the number of instructions executed, or the number
+of cache misses at level 2 in the memory hierarchy.
+
+While there is a limited set of such registers, the user can map events onto
+them.  In case more than one register is available, this allows for the
+simultaenous measurement of various events.
+
+A simple, yet powerful, example is to simultaneously count the number of CPU
+cycles and the number of instructions excuted.  These two numbers can then be
+used to compute the
+@cindex IPC
+@emph{IPC} value.  IPC stands for ``Instructions Per Clockcycle'' and each processor
+has a maximum.  For example, if this maximum number is 2, it means the
+processor is capable of executing two instructions every clock cycle.
+
+Whether this is actually achieved, depends on several factors, including the
+instruction characteristics.
+However, in case the IPC value is well below this maximum in a time critical
+part of the application and this cannot be easily explained, further
+investigation is probably warranted.
+
+@cindex CPI
+A related metric is called @emph{CPI}, or ``Clockcycles Per Instruction''.
+It is the inverse of the CPI and can be compared against the theoretical
+value(s) of the target instruction(s).  A significant difference may point
+at a bottleneck.
+
+One thing to keep in mind is that the value returned by a counter can either
+be the number of times the event occured, or a CPU cycle count.  In case of
+the latter it is possible to convert this number to time.
+
+@IndexSubentry{Hardware event counters, variable CPU frequency}
+This is often easier to interpret than a simple count, but there is one
+caveat to keep in mind.  The CPU frequency may not have been constant while
+the experimen was recorded and this impacts the time reported.
+
+These event counters, or ``counters'' for short, provide great insight into
+what happens deep inside the processor.  In case higher level information does
+not provide the insight needed, the counters provide the information to get
+to the bottom of a performance problem.
+
+There are some things to consider though.
+
+@itemize @bullet
+
+@item
+The event definitions and names vary across processors and it may even happen
+that some events change with an update.
+Unfortunately and this is luckily rare, there are sometimes bugs causing the
+wrong count to be returned.
+
+@IndexSubentry{Hardware event counters, alias name}
+In @ToolName{}, some of the processor specific event names have an alias
+name.  For example @code{insts} measures the instructions executed.
+These aliases not only makes it easier to identify the functionality, but also
+provide portability of certain events across processors.
+
+@item
+Another complexity is that there are typically many events one can monitor.
+There may up to hundreds of events available and it could require several
+experiments to zoom in on the root cause of a performance problem.
+
+@item
+There may be restrictions regarding the mapping of event(s) onto the
+counters.  For example, certain events may be restricted to specific
+counters only.  As a result, one may have to conduct additional experiments
+to cover all the events of interest.
+
+@item
+The names of the events may also not be easy to interpret.  In such cases,
+the description can be found in the architecture manual for the processor.
+
+@end itemize
+
+Despite these drawbacks, hardware event counters are extremely useful and
+may even turn out to be indispensable.
+
+@c ----------------------------------------------------------------------------
+@node    apath
+@section What is <apath>?
+@c ----------------------------------------------------------------------------
+
+In most cases, @ToolName{} shows the absolute pathnames of directories.  These
+tend to be rather long, causing display issues in this document.
+
+Instead of wrapping these long pathnames over multiple lines, we decided to
+represent them by the @code{<apath>} symbol, which stands for ``an absolute
+pathname''.
+
+Note that different occurrences of @code{<apath>} may represent different
+absolute pathnames.
+
+@c -- A new node --------------------------------------------------------------
+@node    Other Document Formats
+@chapter Other Document Formats
+@c ----------------------------------------------------------------------------
+
+@emph{This chapter is applicable when building gprofng from the
+binutils source.}
+
+This document is written in Texinfo and the source text is made available as
+part of the binutils distribution.  The file name is @code{gprofng.texi} and
+can be found in subdirectory @code{gprofng/doc} of the top level binutils
+directory.
+
+The default installation procedure creates a file in the @code{info} format and
+stores it in the documentation section of binutils.
+This source file can however also be used to generate the document in the
+@code{html} and @code{pdf} formats.  These may be easier to read and search.
+
+To generate this documentation file in a different format, go to the directory
+that was used to build the tools.  The make file to build the other formats is
+in the @code{gprofng/doc} subdirectory.
+
+For example, if you have set the build directory to be @var{<my-build-dir>},
+go to subdirectory @var{<my-build-dir>/gprofng/doc}.
+
+This subdirectory has a single filed called @file{Makefile} that can be used to
+build the documentation in various formats.  We recommend to use these commands.
+
+There are four commands to generate the documentation in the @code{html} or
+@code{pdf} format.  It is assumed that you are in directory @code{gprofng/doc}
+under the main directory @var{<my-build-dir>}.
+
+@table @code
+
+@item make html
+Create the html file in the current directory.
+
+@item make pdf
+Create the pdf file in the current directory.
+
+@item make install-html
+Create and install the html file in the binutils documentation directory.
+
+@item make install-pdf
+Creat and install the pdf file in the binutils documentation directory.
+
+@end table
+
+For example, to install this document in the binutils documentation directory, the
+commands below may be executed.  In this notation, @var{<format>}
+is one of @code{html}, or @code{pdf}:
+
+@smallexample
+@verbatim
+$ cd <my-build-dir>/gprofng/doc
+$ make install-<format>
+@end verbatim
+@end smallexample
+
+The binutils installation directory is either the default @code{/usr/local} or the one
+that has been set with the @code{--prefix} option as part of the @code{configure}
+command.  In this example we symbolize this location with @code{<install>}.
+
+The documentation directory is @code{<install>/share/doc/gprofng} in case
+@code{html} or @code{pdf} is selected and @code{<install>/share/info} for the
+file in the @code{info} format.
+
+@noindent
+Some things to note:
+
+@itemize
+
+@item
+For the @code{pdf} file to be generated, the @code{texi2dvi} tool is required.
+@cindex texi2dvi
+It is for example available as part of the @code{texinfo-tex} package.
+
+@item
+Instead of generating a single file in the @code{html} format, it is also
+possible to create a directory with individual files for the various chapters.
+To do so, remove the use of @code{--no-split} in variable @code{MAKEINFOHTML}
+in the make file in the @code{<my-build-dir/gprofng/doc} directory.
+
+@end itemize
+
+@c -- An appendix -------------------------------------------------------------
+@node     The @ProductName{} Man Pages
+@appendix The @ProductName{} Man Pages
+@c ----------------------------------------------------------------------------
+
+In this appendix the man pages for the various @ProductName{} tools are listed.
+
+@c -- A new node --------------------------------------------------------------
+@c  @node    gprofng driver
+@node    Man page for gprofng
+@section Man page for @command{gprofng}
+@c ----------------------------------------------------------------------------
+
+@include gprofng.texi
+
+@c -- A new node --------------------------------------------------------------
+@page
+@node    gprofng collect app
+@section Man page for @command{gprofng collect app}
+@c ----------------------------------------------------------------------------
+
+@include gp-collect-app.texi
+
+@c -- A new node --------------------------------------------------------------
+@page
+@node    gprofng display text
+@section Man page for @command{gprofng display text}
+@c ----------------------------------------------------------------------------
+
+@include gp-display-text.texi
+
+@c -- A new node --------------------------------------------------------------
+@page
+@node    gprofng display html
+@section Man page for @command{gprofng display html}
+@c ----------------------------------------------------------------------------
+
+@include gp-display-html.texi
+
+@c -- A new node --------------------------------------------------------------
+@page
+@node    gprofng display src
+@section Man page for @command{gprofng display src}
+@c ----------------------------------------------------------------------------
+
+@include gp-display-src.texi
+
+@c -- A new node --------------------------------------------------------------
+@page
+@node    gprofng archive
+@section Man page for @command{gprofng archive}
+@c ----------------------------------------------------------------------------
+
+@include gp-archive.texi
+
+@ifnothtml
+@node       Index
+@unnumbered Index
+@printindex cp
+@end ifnothtml
+
+@bye
diff --git a/gprofng/doc/version.texi b/gprofng/doc/version.texi
index 5bf2b08..18585d8 100644
--- a/gprofng/doc/version.texi
+++ b/gprofng/doc/version.texi
@@ -1,4 +1,4 @@
-@set UPDATED 28 September 2022
-@set UPDATED-MONTH September 2022
+@set UPDATED 14 April 2022
+@set UPDATED-MONTH April 2022
 @set EDITION 2.40.50
-@set VERSION 2.40.50
+@set VERSION 2.0
diff --git a/gprofng/gp-display-html/Makefile.am b/gprofng/gp-display-html/Makefile.am
index c79ecc8..f9b214e 100644
--- a/gprofng/gp-display-html/Makefile.am
+++ b/gprofng/gp-display-html/Makefile.am
@@ -27,35 +27,3 @@ do_subst = sed -e 's/BINUTILS_VERSION/$(VERSION)/'
 gp-display-html: gp-display-html.in Makefile
 	$(do_subst) < $(srcdir)/gp-display-html.in > $@
 	chmod +x $@
-
-if BUILD_MAN
-
-man_MANS = gp-display-html.1
-MAINTAINERCLEANFILES = $(man_MANS)
-
-# Use this if the man pages depend on the version number. 
-# common_mandeps = $(top_srcdir)/../bfd/version.m4
-#
-# Also change the dependence line below to this:
-# gp-display-html.1: $(common_mandeps) gp-display-html
-#
-# Currently, the version number shown in the man page is derived from
-# the output printed with --version.
-
-# These variables are used by help2man to generate the man pages.
-
-INFO_PAGE             = "gprofng"
-MANUAL                = "User Commands"
-TEXT_GP_DISPLAY_HTML  = "generate an HTML based directory structure to browse the profiles"
-
-HELP2MAN_OPT = --libtool --no-info --info-page=$(INFO_PAGE) --manual=$(MANUAL)
-H2M_FILTER = | sed 's/\.TP/\.TP\n.B/' | sed 's/Commands:/\.SH COMMANDS/' \
-  | sed 's/See also:/\.SH SEE ALSO/' | sed 's/Documentation:/.SH DOCUMENTATION/' \
-  | sed 's/Limitations:/.SH LIMITATIONS/'
-
-gp-display-html.1: gp-display-html
-	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-	  --name=$(TEXT_GP_DISPLAY_HTML) ./gp-display-html $(H2M_FILTER) > $@
-
-endif
-
diff --git a/gprofng/gp-display-html/Makefile.in b/gprofng/gp-display-html/Makefile.in
index 42886f7..21cc1c6 100644
--- a/gprofng/gp-display-html/Makefile.in
+++ b/gprofng/gp-display-html/Makefile.in
@@ -150,7 +150,7 @@ am__uninstall_files_from_dir = { \
     || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
          $(am__cd) "$$dir" && rm -f $$files; }; \
   }
-am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"
+am__installdirs = "$(DESTDIR)$(bindir)"
 SCRIPTS = $(bin_SCRIPTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -171,9 +171,6 @@ am__can_run_installinfo = \
     n|no|NO) false;; \
     *) (install-info --version) >/dev/null 2>&1;; \
   esac
-man1dir = $(mandir)/man1
-NROFF = nroff
-MANS = $(man_MANS)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
 am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/../mkinstalldirs
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -328,27 +325,6 @@ ACLOCAL_AMFLAGS = -I . -I .. -I ../..
 bin_SCRIPTS = gp-display-html
 CLEANFILES = $(bin_SCRIPTS)
 do_subst = sed -e 's/BINUTILS_VERSION/$(VERSION)/'
-@BUILD_MAN_TRUE@man_MANS = gp-display-html.1
-@BUILD_MAN_TRUE@MAINTAINERCLEANFILES = $(man_MANS)
-
-# Use this if the man pages depend on the version number. 
-# common_mandeps = $(top_srcdir)/../bfd/version.m4
-#
-# Also change the dependence line below to this:
-# gp-display-html.1: $(common_mandeps) gp-display-html
-#
-# Currently, the version number shown in the man page is derived from
-# the output printed with --version.
-
-# These variables are used by help2man to generate the man pages.
-@BUILD_MAN_TRUE@INFO_PAGE = "gprofng"
-@BUILD_MAN_TRUE@MANUAL = "User Commands"
-@BUILD_MAN_TRUE@TEXT_GP_DISPLAY_HTML = "generate an HTML based directory structure to browse the profiles"
-@BUILD_MAN_TRUE@HELP2MAN_OPT = --libtool --no-info --info-page=$(INFO_PAGE) --manual=$(MANUAL)
-@BUILD_MAN_TRUE@H2M_FILTER = | sed 's/\.TP/\.TP\n.B/' | sed 's/Commands:/\.SH COMMANDS/' \
-@BUILD_MAN_TRUE@  | sed 's/See also:/\.SH SEE ALSO/' | sed 's/Documentation:/.SH DOCUMENTATION/' \
-@BUILD_MAN_TRUE@  | sed 's/Limitations:/.SH LIMITATIONS/'
-
 all: all-am
 
 .SUFFIXES:
@@ -422,49 +398,6 @@ mostlyclean-libtool:
 
 clean-libtool:
 	-rm -rf .libs _libs
-install-man1: $(man_MANS)
-	@$(NORMAL_INSTALL)
-	@list1=''; \
-	list2='$(man_MANS)'; \
-	test -n "$(man1dir)" \
-	  && test -n "`echo $$list1$$list2`" \
-	  || exit 0; \
-	echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \
-	$(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \
-	{ for i in $$list1; do echo "$$i"; done;  \
-	if test -n "$$list2"; then \
-	  for i in $$list2; do echo "$$i"; done \
-	    | sed -n '/\.1[a-z]*$$/p'; \
-	fi; \
-	} | while read p; do \
-	  if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
-	  echo "$$d$$p"; echo "$$p"; \
-	done | \
-	sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
-	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
-	sed 'N;N;s,\n, ,g' | { \
-	list=; while read file base inst; do \
-	  if test "$$base" = "$$inst"; then list="$$list $$file"; else \
-	    echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
-	    $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
-	  fi; \
-	done; \
-	for i in $$list; do echo "$$i"; done | $(am__base_list) | \
-	while read files; do \
-	  test -z "$$files" || { \
-	    echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
-	    $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
-	done; }
-
-uninstall-man1:
-	@$(NORMAL_UNINSTALL)
-	@list=''; test -n "$(man1dir)" || exit 0; \
-	files=`{ for i in $$list; do echo "$$i"; done; \
-	l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \
-	  sed -n '/\.1[a-z]*$$/p'; \
-	} | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
-	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
-	dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir)
 tags TAGS:
 
 ctags CTAGS:
@@ -504,9 +437,9 @@ distdir: $(DISTFILES)
 	done
 check-am: all-am
 check: check-am
-all-am: Makefile $(SCRIPTS) $(MANS)
+all-am: Makefile $(SCRIPTS)
 installdirs:
-	for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \
+	for dir in "$(DESTDIR)$(bindir)"; do \
 	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
 	done
 install: install-am
@@ -540,7 +473,6 @@ distclean-generic:
 maintainer-clean-generic:
 	@echo "This command is intended for maintainers to use"
 	@echo "it deletes files that may require special tools to rebuild."
-	-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
 clean: clean-am
 
 clean-am: clean-generic clean-libtool mostlyclean-am
@@ -561,7 +493,7 @@ info: info-am
 
 info-am:
 
-install-data-am: install-man
+install-data-am:
 
 install-dvi: install-dvi-am
 
@@ -577,7 +509,7 @@ install-info: install-info-am
 
 install-info-am:
 
-install-man: install-man1
+install-man:
 
 install-pdf: install-pdf-am
 
@@ -605,9 +537,7 @@ ps: ps-am
 
 ps-am:
 
-uninstall-am: uninstall-binSCRIPTS uninstall-man
-
-uninstall-man: uninstall-man1
+uninstall-am: uninstall-binSCRIPTS
 
 .MAKE: install-am install-strip
 
@@ -617,12 +547,12 @@ uninstall-man: uninstall-man1
 	install install-am install-binSCRIPTS install-data \
 	install-data-am install-dvi install-dvi-am install-exec \
 	install-exec-am install-html install-html-am install-info \
-	install-info-am install-man install-man1 install-pdf \
-	install-pdf-am install-ps install-ps-am install-strip \
-	installcheck installcheck-am installdirs maintainer-clean \
+	install-info-am install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
 	maintainer-clean-generic mostlyclean mostlyclean-generic \
 	mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \
-	uninstall-am uninstall-binSCRIPTS uninstall-man uninstall-man1
+	uninstall-am uninstall-binSCRIPTS
 
 .PRECIOUS: Makefile
 
@@ -631,10 +561,6 @@ gp-display-html: gp-display-html.in Makefile
 	$(do_subst) < $(srcdir)/gp-display-html.in > $@
 	chmod +x $@
 
-@BUILD_MAN_TRUE@gp-display-html.1: gp-display-html
-@BUILD_MAN_TRUE@	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-@BUILD_MAN_TRUE@	  --name=$(TEXT_GP_DISPLAY_HTML) ./gp-display-html $(H2M_FILTER) > $@
-
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:
diff --git a/gprofng/src/Makefile.am b/gprofng/src/Makefile.am
index ab90bb0..219367f 100644
--- a/gprofng/src/Makefile.am
+++ b/gprofng/src/Makefile.am
@@ -121,7 +121,7 @@ QLParser.tab.cc QLParser.tab.hh: QLParser.yy
 	$(BISON) $^
 
 BUILT_SOURCES = QLParser.tab.hh
-EXTRA_DIST = QLParser.yy $(man_MANS)
+EXTRA_DIST = QLParser.yy
 
 
 lib_LTLIBRARIES = $(LIBGPROFNG)
@@ -157,60 +157,6 @@ gp_display_src_LDADD = $(LIBGPROFNG) $(CLOCK_GETTIME_LINK) $(ZLIB)
 gp_display_text_SOURCES = gp-display-text.cc ipc.cc ipcio.cc
 gp_display_text_LDADD = $(LIBGPROFNG) $(CLOCK_GETTIME_LINK) $(ZLIB)
 
-
-if BUILD_MAN
-
-man_MANS = \
-	gp-archive.1 \
-	gp-collect-app.1 \
-	gp-display-src.1 \
-	gp-display-text.1
-
-MAINTAINERCLEANFILES = $(man_MANS)
-
-# The man pages depend on the version number and on a help2man include file.
-common_mandeps = $(top_srcdir)/../bfd/version.m4
-
-# Use -o so that the `missing' program can infer the output file.
-# Embolden subcommand names in the output, and include a SEE ALSO.
-# Arrange to regenerate the output if we have help2man, but leave the
-# disted output there otherwise.
-# Some extra annoying complexity is in place so that people without
-# help2man dno't accidentally overwrite the manpage.
-
-INFO_PAGE            = "gprofng"
-MANUAL               = "User Commands"
-TEXT_GPROFNG         = "the driver for the gprofng tool suite"
-TEXT_GP_ARCHIVE      = "archive gprofng experiment data"
-TEXT_GP_COLLECT_APP  = "collect performance data for the target application"
-TEXT_GP_DISPLAY_SRC  = "display the source code, optionally interleaved with the disassembly of the target object"
-TEXT_GP_DISPLAY_TEXT = "display the performance data in plain text format"
-
-HELP2MAN_OPT = --libtool --no-info --info-page=$(INFO_PAGE) --manual=$(MANUAL)
-H2M_FILTER = | sed 's/\.TP/\.TP\n.B/' | sed 's/Commands:/\.SH COMMANDS/' \
-  | sed 's/See also:/\.SH SEE ALSO/' | sed 's/Documentation:/.SH DOCUMENTATION/' \
-  | sed 's/Limitations:/.SH LIMITATIONS/'
-
-gp-archive.1: $(srcdir)/gp-archive.cc $(common_mandeps) | ./gp-archive$(EXEEXT)
-	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-	  --name=$(TEXT_GP_ARCHIVE) ./gp-archive$(EXEEXT) $(H2M_FILTER) > $@
-
-gp-collect-app.1: $(srcdir)/gp-collect-app.cc $(common_mandeps) | ./gp-collect-app$(EXEEXT)
-	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-	  --name=$(TEXT_GP_COLLECT_APP) ./gp-collect-app$(EXEEXT) $(H2M_FILTER) > $@
-
-gp-display-src.1: $(srcdir)/gp-display-src.cc $(srcdir)/Command.cc \
-		$(common_mandeps) | ./gp-display-src$(EXEEXT)
-	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-	  --name=$(TEXT_GP_DISPLAY_SRC) ./gp-display-src$(EXEEXT) $(H2M_FILTER) > $@
-
-gp-display-text.1: $(srcdir)/gp-display-text.cc $(srcdir)/Command.cc \
-		$(common_mandeps) | ./gp-display-text$(EXEEXT)
-	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-	  --name=$(TEXT_GP_DISPLAY_TEXT) ./gp-display-text$(EXEEXT) $(H2M_FILTER) > $@
-
-endif
-
 # Distribution involves building the binaries to generate the manpage,
 # so ensure that the necessary libraries are built at dist time.
 dist-hook: $(LIBGPROFNG)
diff --git a/gprofng/src/Makefile.in b/gprofng/src/Makefile.in
index 605fa4f..b881268 100644
--- a/gprofng/src/Makefile.in
+++ b/gprofng/src/Makefile.in
@@ -156,7 +156,7 @@ am__uninstall_files_from_dir = { \
          $(am__cd) "$$dir" && rm -f $$files; }; \
   }
 am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \
-	"$(DESTDIR)$(man1dir)" "$(DESTDIR)$(dbedir)"
+	"$(DESTDIR)$(dbedir)"
 LTLIBRARIES = $(lib_LTLIBRARIES)
 am__DEPENDENCIES_1 =
 libgprofng_la_DEPENDENCIES = $(top_builddir)/../opcodes/libopcodes.la \
@@ -275,9 +275,6 @@ am__can_run_installinfo = \
     n|no|NO) false;; \
     *) (install-info --version) >/dev/null 2>&1;; \
   esac
-man1dir = $(mandir)/man1
-NROFF = nroff
-MANS = $(man_MANS)
 DATA = $(dbe_DATA)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
 # Read a list of newline-separated strings from the standard input,
@@ -547,7 +544,7 @@ AM_CFLAGS = $(GPROFNG_CFLAGS) $(PTHREAD_CFLAGS) \
 
 AM_CXXFLAGS = $(AM_CFLAGS)
 BUILT_SOURCES = QLParser.tab.hh
-EXTRA_DIST = QLParser.yy $(man_MANS)
+EXTRA_DIST = QLParser.yy
 lib_LTLIBRARIES = $(LIBGPROFNG)
 libgprofng_la_SOURCES = $(CCSOURCES) $(CSOURCES)
 libgprofng_la_LDFLAGS = -version-info 0:0:0
@@ -572,35 +569,6 @@ gp_display_src_SOURCES = gp-display-src.cc
 gp_display_src_LDADD = $(LIBGPROFNG) $(CLOCK_GETTIME_LINK) $(ZLIB)
 gp_display_text_SOURCES = gp-display-text.cc ipc.cc ipcio.cc
 gp_display_text_LDADD = $(LIBGPROFNG) $(CLOCK_GETTIME_LINK) $(ZLIB)
-@BUILD_MAN_TRUE@man_MANS = \
-@BUILD_MAN_TRUE@	gp-archive.1 \
-@BUILD_MAN_TRUE@	gp-collect-app.1 \
-@BUILD_MAN_TRUE@	gp-display-src.1 \
-@BUILD_MAN_TRUE@	gp-display-text.1
-
-@BUILD_MAN_TRUE@MAINTAINERCLEANFILES = $(man_MANS)
-
-# The man pages depend on the version number and on a help2man include file.
-@BUILD_MAN_TRUE@common_mandeps = $(top_srcdir)/../bfd/version.m4
-
-# Use -o so that the `missing' program can infer the output file.
-# Embolden subcommand names in the output, and include a SEE ALSO.
-# Arrange to regenerate the output if we have help2man, but leave the
-# disted output there otherwise.
-# Some extra annoying complexity is in place so that people without
-# help2man dno't accidentally overwrite the manpage.
-@BUILD_MAN_TRUE@INFO_PAGE = "gprofng"
-@BUILD_MAN_TRUE@MANUAL = "User Commands"
-@BUILD_MAN_TRUE@TEXT_GPROFNG = "the driver for the gprofng tool suite"
-@BUILD_MAN_TRUE@TEXT_GP_ARCHIVE = "archive gprofng experiment data"
-@BUILD_MAN_TRUE@TEXT_GP_COLLECT_APP = "collect performance data for the target application"
-@BUILD_MAN_TRUE@TEXT_GP_DISPLAY_SRC = "display the source code, optionally interleaved with the disassembly of the target object"
-@BUILD_MAN_TRUE@TEXT_GP_DISPLAY_TEXT = "display the performance data in plain text format"
-@BUILD_MAN_TRUE@HELP2MAN_OPT = --libtool --no-info --info-page=$(INFO_PAGE) --manual=$(MANUAL)
-@BUILD_MAN_TRUE@H2M_FILTER = | sed 's/\.TP/\.TP\n.B/' | sed 's/Commands:/\.SH COMMANDS/' \
-@BUILD_MAN_TRUE@  | sed 's/See also:/\.SH SEE ALSO/' | sed 's/Documentation:/.SH DOCUMENTATION/' \
-@BUILD_MAN_TRUE@  | sed 's/Limitations:/.SH LIMITATIONS/'
-
 all: $(BUILT_SOURCES)
 	$(MAKE) $(AM_MAKEFLAGS) all-am
 
@@ -885,49 +853,6 @@ mostlyclean-libtool:
 
 clean-libtool:
 	-rm -rf .libs _libs
-install-man1: $(man_MANS)
-	@$(NORMAL_INSTALL)
-	@list1=''; \
-	list2='$(man_MANS)'; \
-	test -n "$(man1dir)" \
-	  && test -n "`echo $$list1$$list2`" \
-	  || exit 0; \
-	echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \
-	$(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \
-	{ for i in $$list1; do echo "$$i"; done;  \
-	if test -n "$$list2"; then \
-	  for i in $$list2; do echo "$$i"; done \
-	    | sed -n '/\.1[a-z]*$$/p'; \
-	fi; \
-	} | while read p; do \
-	  if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
-	  echo "$$d$$p"; echo "$$p"; \
-	done | \
-	sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
-	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
-	sed 'N;N;s,\n, ,g' | { \
-	list=; while read file base inst; do \
-	  if test "$$base" = "$$inst"; then list="$$list $$file"; else \
-	    echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
-	    $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
-	  fi; \
-	done; \
-	for i in $$list; do echo "$$i"; done | $(am__base_list) | \
-	while read files; do \
-	  test -z "$$files" || { \
-	    echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
-	    $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
-	done; }
-
-uninstall-man1:
-	@$(NORMAL_UNINSTALL)
-	@list=''; test -n "$(man1dir)" || exit 0; \
-	files=`{ for i in $$list; do echo "$$i"; done; \
-	l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \
-	  sed -n '/\.1[a-z]*$$/p'; \
-	} | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
-	      -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
-	dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir)
 install-dbeDATA: $(dbe_DATA)
 	@$(NORMAL_INSTALL)
 	@list='$(dbe_DATA)'; test -n "$(dbedir)" || list=; \
@@ -1038,11 +963,11 @@ distdir: $(DISTFILES)
 check-am: all-am
 check: $(BUILT_SOURCES)
 	$(MAKE) $(AM_MAKEFLAGS) check-am
-all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(MANS) $(DATA)
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(DATA)
 install-binPROGRAMS: install-libLTLIBRARIES
 
 installdirs:
-	for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(dbedir)"; do \
+	for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(dbedir)"; do \
 	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
 	done
 install: $(BUILT_SOURCES)
@@ -1077,7 +1002,6 @@ maintainer-clean-generic:
 	@echo "This command is intended for maintainers to use"
 	@echo "it deletes files that may require special tools to rebuild."
 	-test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
-	-test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
 clean: clean-am
 
 clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \
@@ -1101,7 +1025,7 @@ info: info-am
 
 info-am:
 
-install-data-am: install-dbeDATA install-man
+install-data-am: install-dbeDATA
 
 install-dvi: install-dvi-am
 
@@ -1117,7 +1041,7 @@ install-info: install-info-am
 
 install-info-am:
 
-install-man: install-man1
+install-man:
 
 install-pdf: install-pdf-am
 
@@ -1148,9 +1072,7 @@ ps: ps-am
 ps-am:
 
 uninstall-am: uninstall-binPROGRAMS uninstall-dbeDATA \
-	uninstall-libLTLIBRARIES uninstall-man
-
-uninstall-man: uninstall-man1
+	uninstall-libLTLIBRARIES
 
 .MAKE: all check install install-am install-strip
 
@@ -1163,13 +1085,13 @@ uninstall-man: uninstall-man1
 	install-data-am install-dbeDATA install-dvi install-dvi-am \
 	install-exec install-exec-am install-html install-html-am \
 	install-info install-info-am install-libLTLIBRARIES \
-	install-man install-man1 install-pdf install-pdf-am install-ps \
+	install-man install-pdf install-pdf-am install-ps \
 	install-ps-am install-strip installcheck installcheck-am \
 	installdirs maintainer-clean maintainer-clean-generic \
 	mostlyclean mostlyclean-compile mostlyclean-generic \
 	mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
 	uninstall-am uninstall-binPROGRAMS uninstall-dbeDATA \
-	uninstall-libLTLIBRARIES uninstall-man uninstall-man1
+	uninstall-libLTLIBRARIES
 
 .PRECIOUS: Makefile
 
@@ -1177,24 +1099,6 @@ uninstall-man: uninstall-man1
 QLParser.tab.cc QLParser.tab.hh: QLParser.yy
 	$(BISON) $^
 
-@BUILD_MAN_TRUE@gp-archive.1: $(srcdir)/gp-archive.cc $(common_mandeps) | ./gp-archive$(EXEEXT)
-@BUILD_MAN_TRUE@	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-@BUILD_MAN_TRUE@	  --name=$(TEXT_GP_ARCHIVE) ./gp-archive$(EXEEXT) $(H2M_FILTER) > $@
-
-@BUILD_MAN_TRUE@gp-collect-app.1: $(srcdir)/gp-collect-app.cc $(common_mandeps) | ./gp-collect-app$(EXEEXT)
-@BUILD_MAN_TRUE@	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-@BUILD_MAN_TRUE@	  --name=$(TEXT_GP_COLLECT_APP) ./gp-collect-app$(EXEEXT) $(H2M_FILTER) > $@
-
-@BUILD_MAN_TRUE@gp-display-src.1: $(srcdir)/gp-display-src.cc $(srcdir)/Command.cc \
-@BUILD_MAN_TRUE@		$(common_mandeps) | ./gp-display-src$(EXEEXT)
-@BUILD_MAN_TRUE@	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-@BUILD_MAN_TRUE@	  --name=$(TEXT_GP_DISPLAY_SRC) ./gp-display-src$(EXEEXT) $(H2M_FILTER) > $@
-
-@BUILD_MAN_TRUE@gp-display-text.1: $(srcdir)/gp-display-text.cc $(srcdir)/Command.cc \
-@BUILD_MAN_TRUE@		$(common_mandeps) | ./gp-display-text$(EXEEXT)
-@BUILD_MAN_TRUE@	$(AM_V_GEN)_BUILDING_MANPAGE=1 $(HELP2MAN) $(HELP2MAN_OPT) \
-@BUILD_MAN_TRUE@	  --name=$(TEXT_GP_DISPLAY_TEXT) ./gp-display-text$(EXEEXT) $(H2M_FILTER) > $@
-
 # Distribution involves building the binaries to generate the manpage,
 # so ensure that the necessary libraries are built at dist time.
 dist-hook: $(LIBGPROFNG)