#!/usr/bin/env python3 # Copyright (C) 2023-2025 Free Software Foundation, Inc. # # Script to regenerate attr-urls.def from generated HTML. # # This file is part of GCC. # # GCC is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 3, or (at your option) any later # version. # # GCC is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License # along with GCC; see the file COPYING3. If not see # . */ DESCRIPTION = """ Parses the generated HTML (from "make html") to locate anchors for attributes, and generates a gcc/attr-urls.def file in the source tree, giving URLs for each attribute, where it can. Usage (from build/gcc subdirectory): ../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src To run unit tests: ../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src --unit-test """ import argparse import json import os from pathlib import Path from pprint import pprint import sys import re import unittest class Index: def __init__(self): self.entries = [] self.entries_by_kind = {} def add_entry(self, url_suffix, name, kind, extra_text, verbose=False): #if len(self.entries) > 5: # return self.entries.append( (url_suffix, name, kind, extra_text) ) if kind in self.entries_by_kind: by_kind = self.entries_by_kind[kind] else: by_kind = [] self.entries_by_kind[kind] = by_kind by_kind.append( (name, url_suffix, extra_text) ) def parse_attribute_index(self, input_filename, verbose=False): with open(input_filename) as f: for line in f: self.parse_html_line_attribute_index(line, verbose) def parse_html_line_attribute_index(self, line, verbose=False): if verbose: print(repr(line)) # Update for this in the GCC website's bin/preprocess process_html_file: # | sed -e 's/_002d/-/g' -e 's/_002a/*/g' \ line = line.replace('_002d', '-') line = line.replace('_002a', '*') # e.g. access function attribute # e.g. nocompression function attribute, MIPS m = re.search(r']*>([\S]+) (\S+) attribute([^<]*)', line) if not m: return if verbose: print(m.groups()) url_suffix, name, kind, extra_text = m.groups() if extra_text.startswith(', '): extra_text = extra_text[2:] # Reject anchors where the name contains a paren # e.g. 'target("3dnowa")': if '(' in name: return self.add_entry(url_suffix, name, kind, extra_text) def generate_file(self, dstpath): with open(dstpath, 'w') as outf: self.write_file(outf) def write_file(self, outf): outf.write("/* Autogenerated by regenerate-attr-urls.py. */\n\n") for kind in sorted(self.entries_by_kind.keys()): by_kind = self.entries_by_kind[kind] outf.write("const attr_url_entry %s_attrs[] = {\n" % kind) for name, url_suffix, extra_text in sorted(self.entries_by_kind[kind]): outf.write(' { "%s", "gcc/%s", "%s", %i},\n' % (name, url_suffix, extra_text, len(name))) outf.write("};\n\n") outf.write('static const struct attr_url_table {\n') outf.write(' const attr_url_entry *m_table;\n') outf.write(' const size_t m_table_sz;\n') outf.write('} attr_url_tables[] = {\n') for kind in sorted(self.entries_by_kind.keys()): outf.write(" { %s_attrs, ARRAY_SIZE (%s_attrs) },\n" % (kind, kind)) outf.write("};\n") INDEX_REL_PATH = 'gcc/Concept-and-Symbol-Index.html' class TestParsingIndex(unittest.TestCase): def test_function_attribute(self): index = Index() index.parse_html_line_attribute_index('access function attribute') self.assertEqual(index.entries, [('Common-Function-Attributes.html#index-access-function-attribute', 'access', 'function', '')]) def test_function_attribute_with_target(self): index = Index() index.parse_html_line_attribute_index('nocompression function attribute, MIPS') self.assertEqual(index.entries, [('MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS', 'nocompression', 'function', 'MIPS')]) def test_reject_parens(self): index = Index() index.parse_html_line_attribute_index('target("3dnow") function attribute, x86') self.assertEqual(len(index.entries), 0) def test_type_attribute(self): index = Index() index.parse_html_line_attribute_index('aligned type attribute') self.assertEqual(index.entries, [('Common-Type-Attributes.html#index-aligned-type-attribute', 'aligned', 'type', '')]) def test_enumerator_attribute(self): index = Index() index.parse_html_line_attribute_index('deprecated enumerator attribute') self.assertEqual(index.entries, [('Enumerator-Attributes.html#index-deprecated-enumerator-attribute', 'deprecated', 'enumerator', '')]) def test_label_attribute(self): index = Index() index.parse_html_line_attribute_index('cold label attribute') self.assertEqual(index.entries, [('Label-Attributes.html#index-cold-label-attribute', 'cold', 'label', '')]) def test_statement_attribute(self): index = Index() index.parse_html_line_attribute_index('assume statement attribute') self.assertEqual(index.entries, [('Statement-Attributes.html#index-assume-statement-attribute', 'assume', 'statement', '')]) def test_variable_attribute(self): index = Index() index.parse_html_line_attribute_index('absdata variable attribute, AVR') self.assertEqual(index.entries, [('AVR-Variable-Attributes.html#index-absdata-variable-attribute_002c-AVR', 'absdata', 'variable', 'AVR')]) def test_parse_attribute_index(self): index = Index() index.parse_attribute_index(INPUT_HTML_PATH / INDEX_REL_PATH) self.assertEqual(index.entries_by_kind['enumerator'][0], ('deprecated', 'Enumerator-Attributes.html#index-deprecated-enumerator-attribute', '')) self.assertEqual(index.entries_by_kind['label'][0], ('cold', 'Label-Attributes.html#index-cold-label-attribute', '')) def main(args): index = Index() index.parse_attribute_index(args.base_html_dir / INDEX_REL_PATH) dstpath = args.src_gcc_dir / 'gcc' / 'attr-urls.def' index.generate_file(dstpath) if __name__ == '__main__': parser = argparse.ArgumentParser(description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('base_html_dir', type=Path) parser.add_argument('src_gcc_dir', type=Path) parser.add_argument('--unit-test', action='store_true') args = parser.parse_args() if args.unit_test: INPUT_HTML_PATH = args.base_html_dir unittest.main(argv=[sys.argv[0], '-v']) else: main(args)