#!/usr/bin/env python3
# Copyright (C) 2023-2025 Free Software Foundation, Inc.
#
# Script to regenerate attr-urls.def from generated HTML.
#
# This file is part of GCC.
#
# GCC is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
#
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
# . */
DESCRIPTION = """
Parses the generated HTML (from "make html") to locate anchors
for attributes, and generates a gcc/attr-urls.def file in the source tree,
giving URLs for each attribute, where it can.
Usage (from build/gcc subdirectory):
../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src
To run unit tests:
../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src --unit-test
"""
import argparse
import json
import os
from pathlib import Path
from pprint import pprint
import sys
import re
import unittest
class Index:
def __init__(self):
self.entries = []
self.entries_by_kind = {}
def add_entry(self, url_suffix, name, kind, extra_text, verbose=False):
#if len(self.entries) > 5:
# return
self.entries.append( (url_suffix, name, kind, extra_text) )
if kind in self.entries_by_kind:
by_kind = self.entries_by_kind[kind]
else:
by_kind = []
self.entries_by_kind[kind] = by_kind
by_kind.append( (name, url_suffix, extra_text) )
def parse_attribute_index(self, input_filename, verbose=False):
with open(input_filename) as f:
for line in f:
self.parse_html_line_attribute_index(line, verbose)
def parse_html_line_attribute_index(self, line, verbose=False):
if verbose:
print(repr(line))
# Update for this in the GCC website's bin/preprocess process_html_file:
# | sed -e 's/_002d/-/g' -e 's/_002a/*/g' \
line = line.replace('_002d', '-')
line = line.replace('_002a', '*')
# e.g. access
function attribute
# e.g. nocompression
function attribute, MIPS
m = re.search(r']*>([\S]+)
(\S+) attribute([^<]*)', line)
if not m:
return
if verbose:
print(m.groups())
url_suffix, name, kind, extra_text = m.groups()
if extra_text.startswith(', '):
extra_text = extra_text[2:]
# Reject anchors where the name contains a paren
# e.g. 'target("3dnowa")':
if '(' in name:
return
self.add_entry(url_suffix, name, kind, extra_text)
def generate_file(self, dstpath):
with open(dstpath, 'w') as outf:
self.write_file(outf)
def write_file(self, outf):
outf.write("/* Autogenerated by regenerate-attr-urls.py. */\n\n")
for kind in sorted(self.entries_by_kind.keys()):
by_kind = self.entries_by_kind[kind]
outf.write("const attr_url_entry %s_attrs[] = {\n" % kind)
for name, url_suffix, extra_text in sorted(self.entries_by_kind[kind]):
outf.write(' { "%s", "gcc/%s", "%s", %i},\n'
% (name, url_suffix, extra_text, len(name)))
outf.write("};\n\n")
outf.write('static const struct attr_url_table {\n')
outf.write(' const attr_url_entry *m_table;\n')
outf.write(' const size_t m_table_sz;\n')
outf.write('} attr_url_tables[] = {\n')
for kind in sorted(self.entries_by_kind.keys()):
outf.write(" { %s_attrs, ARRAY_SIZE (%s_attrs) },\n" % (kind, kind))
outf.write("};\n")
INDEX_REL_PATH = 'gcc/Concept-and-Symbol-Index.html'
class TestParsingIndex(unittest.TestCase):
def test_function_attribute(self):
index = Index()
index.parse_html_line_attribute_index('access
function attribute')
self.assertEqual(index.entries, [('Common-Function-Attributes.html#index-access-function-attribute',
'access',
'function',
'')])
def test_function_attribute_with_target(self):
index = Index()
index.parse_html_line_attribute_index('nocompression
function attribute, MIPS')
self.assertEqual(index.entries, [('MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS',
'nocompression',
'function',
'MIPS')])
def test_reject_parens(self):
index = Index()
index.parse_html_line_attribute_index('target("3dnow")
function attribute, x86')
self.assertEqual(len(index.entries), 0)
def test_type_attribute(self):
index = Index()
index.parse_html_line_attribute_index('aligned
type attribute')
self.assertEqual(index.entries, [('Common-Type-Attributes.html#index-aligned-type-attribute',
'aligned',
'type',
'')])
def test_enumerator_attribute(self):
index = Index()
index.parse_html_line_attribute_index('deprecated
enumerator attribute')
self.assertEqual(index.entries, [('Enumerator-Attributes.html#index-deprecated-enumerator-attribute',
'deprecated',
'enumerator',
'')])
def test_label_attribute(self):
index = Index()
index.parse_html_line_attribute_index('cold
label attribute')
self.assertEqual(index.entries, [('Label-Attributes.html#index-cold-label-attribute',
'cold',
'label',
'')])
def test_statement_attribute(self):
index = Index()
index.parse_html_line_attribute_index('assume
statement attribute')
self.assertEqual(index.entries, [('Statement-Attributes.html#index-assume-statement-attribute',
'assume',
'statement',
'')])
def test_variable_attribute(self):
index = Index()
index.parse_html_line_attribute_index('absdata
variable attribute, AVR')
self.assertEqual(index.entries, [('AVR-Variable-Attributes.html#index-absdata-variable-attribute_002c-AVR',
'absdata',
'variable',
'AVR')])
def test_parse_attribute_index(self):
index = Index()
index.parse_attribute_index(INPUT_HTML_PATH / INDEX_REL_PATH)
self.assertEqual(index.entries_by_kind['enumerator'][0],
('deprecated',
'Enumerator-Attributes.html#index-deprecated-enumerator-attribute',
''))
self.assertEqual(index.entries_by_kind['label'][0],
('cold', 'Label-Attributes.html#index-cold-label-attribute', ''))
def main(args):
index = Index()
index.parse_attribute_index(args.base_html_dir / INDEX_REL_PATH)
dstpath = args.src_gcc_dir / 'gcc' / 'attr-urls.def'
index.generate_file(dstpath)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=DESCRIPTION,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('base_html_dir', type=Path)
parser.add_argument('src_gcc_dir', type=Path)
parser.add_argument('--unit-test', action='store_true')
args = parser.parse_args()
if args.unit_test:
INPUT_HTML_PATH = args.base_html_dir
unittest.main(argv=[sys.argv[0], '-v'])
else:
main(args)