diff options
Diffstat (limited to 'gcc/regenerate-attr-urls.py')
-rwxr-xr-x | gcc/regenerate-attr-urls.py | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/gcc/regenerate-attr-urls.py b/gcc/regenerate-attr-urls.py new file mode 100755 index 0000000..04a04b8 --- /dev/null +++ b/gcc/regenerate-attr-urls.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2023-2024 Free Software Foundation, Inc. +# +# Script to regenerate attr-urls.def from generated HTML. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +DESCRIPTION = """ +Parses the generated HTML (from "make html") to locate anchors +for attributes, and generates a gcc/attr-urls.def file in the source tree, +giving URLs for each attribute, where it can. + +Usage (from build/gcc subdirectory): + ../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src + +To run unit tests: + ../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src --unit-test +""" + +import argparse +import json +import os +from pathlib import Path +from pprint import pprint +import sys +import re +import unittest + +class Index: + def __init__(self): + self.entries = [] + self.entries_by_kind = {} + + def add_entry(self, url_suffix, name, kind, extra_text, verbose=False): + #if len(self.entries) > 5: + # return + self.entries.append( (url_suffix, name, kind, extra_text) ) + + if kind in self.entries_by_kind: + by_kind = self.entries_by_kind[kind] + else: + by_kind = [] + self.entries_by_kind[kind] = by_kind + by_kind.append( (name, url_suffix, extra_text) ) + + def parse_attribute_index(self, input_filename, verbose=False): + with open(input_filename) as f: + for line in f: + self.parse_html_line_attribute_index(line, verbose) + + def parse_html_line_attribute_index(self, line, verbose=False): + if verbose: + print(repr(line)) + + # Update for this in the GCC website's bin/preprocess process_html_file: + # | sed -e 's/_002d/-/g' -e 's/_002a/*/g' \ + line = line.replace('_002d', '-') + line = line.replace('_002a', '*') + + # e.g. <a href="Common-Function-Attributes.html#index-access-function-attribute"><code>access</code> function attribute</a> + # e.g. <a href="MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS"><code class="code">nocompression</code> function attribute, MIPS</a> + m = re.search(r'<a href="([\S]+)"><code[^>]*>([\S]+)</code> (\S+) attribute([^<]*)</a>', line) + if not m: + return + if verbose: + print(m.groups()) + + url_suffix, name, kind, extra_text = m.groups() + + if extra_text.startswith(', '): + extra_text = extra_text[2:] + + # Reject anchors where the name contains a paren + # e.g. 'target("3dnowa")': + if '(' in name: + return + + self.add_entry(url_suffix, name, kind, extra_text) + + def generate_file(self, dstpath): + with open(dstpath, 'w') as outf: + self.write_file(outf) + + def write_file(self, outf): + outf.write("/* Autogenerated by regenerate-attr-urls.py. */\n\n") + + for kind in sorted(self.entries_by_kind.keys()): + by_kind = self.entries_by_kind[kind] + outf.write("const attr_url_entry %s_attrs[] = {\n" % kind) + for name, url_suffix, extra_text in sorted(self.entries_by_kind[kind]): + outf.write(' { "%s", "gcc/%s", "%s", %i},\n' + % (name, url_suffix, extra_text, len(name))) + outf.write("};\n\n") + + outf.write('static const struct attr_url_table {\n') + outf.write(' const attr_url_entry *m_table;\n') + outf.write(' const size_t m_table_sz;\n') + outf.write('} attr_url_tables[] = {\n') + for kind in sorted(self.entries_by_kind.keys()): + outf.write(" { %s_attrs, ARRAY_SIZE (%s_attrs) },\n" % (kind, kind)) + outf.write("};\n") + +INDEX_REL_PATH = 'gcc/Concept-and-Symbol-Index.html' + +class TestParsingIndex(unittest.TestCase): + def test_function_attribute(self): + index = Index() + index.parse_html_line_attribute_index('<a href="Common-Function-Attributes.html#index-access-function-attribute"><code>access</code> function attribute</a>') + self.assertEqual(index.entries, [('Common-Function-Attributes.html#index-access-function-attribute', + 'access', + 'function', + '')]) + + def test_function_attribute_with_target(self): + index = Index() + index.parse_html_line_attribute_index('<a href="MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS"><code class="code">nocompression</code> function attribute, MIPS</a>') + self.assertEqual(index.entries, [('MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS', + 'nocompression', + 'function', + 'MIPS')]) + + def test_reject_parens(self): + index = Index() + index.parse_html_line_attribute_index('<a href="x86-Function-Attributes.html#index-target_0028_00223dnow_0022_0029-function-attribute_002c-x86"><code>target("3dnow")</code> function attribute, x86</a>') + self.assertEqual(len(index.entries), 0) + + def test_type_attribute(self): + index = Index() + index.parse_html_line_attribute_index('<a href="Common-Type-Attributes.html#index-aligned-type-attribute"><code>aligned</code> type attribute</a>') + self.assertEqual(index.entries, [('Common-Type-Attributes.html#index-aligned-type-attribute', + 'aligned', + 'type', + '')]) + + def test_enumerator_attribute(self): + index = Index() + index.parse_html_line_attribute_index('<a href="Enumerator-Attributes.html#index-deprecated-enumerator-attribute"><code>deprecated</code> enumerator attribute</a>') + self.assertEqual(index.entries, [('Enumerator-Attributes.html#index-deprecated-enumerator-attribute', + 'deprecated', + 'enumerator', + '')]) + def test_label_attribute(self): + index = Index() + index.parse_html_line_attribute_index('<a href="Label-Attributes.html#index-cold-label-attribute"><code>cold</code> label attribute</a>') + self.assertEqual(index.entries, [('Label-Attributes.html#index-cold-label-attribute', + 'cold', + 'label', + '')]) + + def test_statement_attribute(self): + index = Index() + index.parse_html_line_attribute_index('<a href="Statement-Attributes.html#index-assume-statement-attribute"><code>assume</code> statement attribute</a>') + self.assertEqual(index.entries, [('Statement-Attributes.html#index-assume-statement-attribute', + 'assume', + 'statement', + '')]) + + def test_variable_attribute(self): + index = Index() + index.parse_html_line_attribute_index('<a href="AVR-Variable-Attributes.html#index-absdata-variable-attribute_002c-AVR"><code>absdata</code> variable attribute, AVR</a>') + self.assertEqual(index.entries, [('AVR-Variable-Attributes.html#index-absdata-variable-attribute_002c-AVR', + 'absdata', + 'variable', + 'AVR')]) + + def test_parse_attribute_index(self): + index = Index() + index.parse_attribute_index(INPUT_HTML_PATH / INDEX_REL_PATH) + self.assertEqual(index.entries_by_kind['enumerator'][0], + ('deprecated', + 'Enumerator-Attributes.html#index-deprecated-enumerator-attribute', + '')) + self.assertEqual(index.entries_by_kind['label'][0], + ('cold', 'Label-Attributes.html#index-cold-label-attribute', '')) + +def main(args): + index = Index() + index.parse_attribute_index(args.base_html_dir / INDEX_REL_PATH) + dstpath = args.src_gcc_dir / 'gcc' / 'attr-urls.def' + index.generate_file(dstpath) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=DESCRIPTION, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('base_html_dir', type=Path) + parser.add_argument('src_gcc_dir', type=Path) + parser.add_argument('--unit-test', action='store_true') + args = parser.parse_args() + + if args.unit_test: + INPUT_HTML_PATH = args.base_html_dir + unittest.main(argv=[sys.argv[0], '-v']) + else: + main(args) |