#!/usr/bin/env python3 # Copyright (C) 2023-2024 Free Software Foundation, Inc. # # Script to regenerate FOO.opt.urls files for each FOO.opt in the # source tree. # # This file is part of GCC. # # GCC is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 3, or (at your option) any later # version. # # GCC is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License # along with GCC; see the file COPYING3. If not see # . */ DESCRIPTION = """ Parses the generated HTML (from "make html") to locate anchors for options, then parses the .opt files within the source tree, and generates a .opt.urls in the source tree for each .opt file, giving URLs for each option, where it can. Usage (from build/gcc subdirectory): ../../src/gcc/regenerate-opt-urls.py HTML/gcc-14.0.0/ ../../src To run unit tests: ../../src/gcc/regenerate-opt-urls.py HTML/gcc-14.0.0/ ../../src --unit-test """ import argparse import json import os from pathlib import Path from pprint import pprint import sys import re import unittest def canonicalize_option_name(option_name): if option_name.endswith('='): option_name = option_name[0:-1] return option_name def canonicalize_url_suffix(url_suffix): """ Various options have anchors for both the positive and negative form. For example -Wcpp has both: 'gcc/Warning-Options.html#index-Wno-cpp' 'gcc/Warning-Options.html#index-Wcpp' Return a canonicalized version of the url_suffix that strips out any "no-" prefixes, for use in deduplication. Note that the resulting url suffix might not correspond to an actual anchor in the HTML. """ url_suffix = re.sub('index-Wno-', 'index-W', url_suffix) url_suffix = re.sub('index-fno-', 'index-f', url_suffix) url_suffix = re.sub('_003d$', '', url_suffix) url_suffix = re.sub('-([0-9]+)$', '', url_suffix) return url_suffix class Index: def __init__(self): # Map from language (or None) to map from option name to set of URL suffixes self.entries = {} def add_entry(self, matched_text, url_suffix, language, verbose=False): if 'Attributes.html' in url_suffix: return matched_text = canonicalize_option_name(matched_text) if language not in self.entries: self.entries[language] = {} per_lang_entries = self.entries[language] if matched_text in per_lang_entries: # Partition by canonicalized url_suffixes; add the # first url_suffix in each such partition. c_new = canonicalize_url_suffix(url_suffix) for entry in per_lang_entries[matched_text]: c_entry = canonicalize_url_suffix(entry) if c_new == c_entry: return per_lang_entries[matched_text].add(url_suffix) else: per_lang_entries[matched_text] = set([url_suffix]) def get_languages(self): return self.entries.keys() def get_url_suffixes(self, text, language=None): text = canonicalize_option_name(text) per_lang_entries = self.entries.get(language) if per_lang_entries: return per_lang_entries.get(text) def parse_option_index(self, input_filename, language, verbose=False): with open(input_filename) as f: dirname = input_filename.parent.name for line in f: self.parse_html_line_option_index(dirname, line, language, verbose) def parse_html_line_option_index(self, dirname, line, language, verbose=False): if verbose: print(repr(line)) # Update for this in the GCC website's bin/preprocess process_html_file: # | sed -e 's/_002d/-/g' -e 's/_002a/*/g' \ line = line.replace('_002d', '-') line = line.replace('_002a', '*') # e.g. fmodulo-sched m = re.search(r'([\S]+)', line) if not m: return if verbose: print(m.groups()) url_suffix, index_text = m.groups() option = '-' + index_text # Strip off "no-" prefixes from options if option[:5] == '-Wno-': option = '-W' + option[5:] if option[:5] == '-fno-': option = '-f' + option[5:] url_suffix = dirname + '/' + url_suffix self.add_entry(option, url_suffix, language, verbose) class TestParsingIndex(unittest.TestCase): def test_parse_line(self): index = Index() index.parse_html_line_option_index('gcc', 'fmodulo-sched', None) self.assertEqual(index.get_url_suffixes('-fmodulo-sched'), {'gcc/Optimize-Options.html#index-fmodulo-sched'}) def test_negated_flag(self): index = Index() index.parse_html_line_option_index('gcc', 'fno-analyzerStatic Analyzer Options\n', None) self.assertEqual(index.get_url_suffixes('-fno-analyzer'), None) self.assertEqual(index.get_url_suffixes('-fanalyzer'), {'gcc/Static-Analyzer-Options.html#index-fno-analyzer'}) def test_negated_warning(self): index = Index() index.parse_html_line_option_index('gcc', 'Wno-allocaWarning Options\n', None) self.assertEqual(index.get_url_suffixes('-Wno-alloca'), None) self.assertEqual(index.get_url_suffixes('-Walloca'), {'gcc/Warning-Options.html#index-Wno-alloca'}) def test_parse_option_index(self): index = Index() index.parse_option_index(INPUT_HTML_PATH / 'gcc/Option-Index.html', language=None) self.assertEqual(index.get_url_suffixes('-fmodulo-sched'), {'gcc/Optimize-Options.html#index-fmodulo-sched'}) self.assertEqual(index.get_url_suffixes('-O'), {'gcc/Optimize-Options.html#index-O'}) self.assertEqual(index.get_url_suffixes('-O0'), {'gcc/Optimize-Options.html#index-O0'}) self.assertEqual(index.get_url_suffixes('-Wframe-larger-than='), {'gcc/Warning-Options.html#index-Wframe-larger-than_003d'}) # Check an option with duplicates: '-march' # The url_suffixes will be of the form # 'gcc/HPPA-Options.html#index-march-5', # 'gcc/LoongArch-Options.html#index-march-7', # etc, where the trailing number is, unfortunately, likely to # change from release to release. # Replace them with 'NN' for the purpose of this test: em_arch_url_suffixes = [re.sub('(-[0-9]+)', '-NN', s) for s in index.get_url_suffixes('-march')] self.assertIn('gcc/ARM-Options.html#index-march-NN', em_arch_url_suffixes) self.assertIn('gcc/x86-Options.html#index-march-NN', em_arch_url_suffixes) self.assertEqual(index.get_url_suffixes('-Wcpp'), {'gcc/Warning-Options.html#index-Wcpp'}) self.assertNotEqual(index.get_url_suffixes('-march'), None) self.assertNotEqual(index.get_url_suffixes('-march='), None) class OptFile: def __init__(self, opt_path, rel_path): """ Parse a .opt file. Similar to opt-gather.awk. """ self.rel_path = rel_path assert rel_path.startswith('gcc') self.records = [] with open(opt_path) as f: flag = 0 for line in f: if re.match(r'[ \t]*(;|$)', line): flag = 0 else: if flag == 0: self.records.append([line]) flag = 1 else: self.records[-1].append(line) # Mapping from target-specific page to subdirectory containing .opt files # documented on that page. TARGET_SPECIFIC_PAGES = { 'gcc/AArch64-Options.html' : 'gcc/config/aarch64/', 'gcc/AMD-GCN-Options.html' : 'gcc/config/gcn/', 'gcc/ARC-Options.html' : 'gcc/config/arc/', 'gcc/ARC-Options.html' : 'gcc/config/arc/', 'gcc/ARM-Options.html' : 'gcc/config/arm/', 'gcc/AVR-Options.html' : 'gcc/config/avr/', 'gcc/Adapteva-Epiphany-Options.html' : 'gcc/config/epiphany/', 'gcc/Blackfin-Options.html' : 'gcc/config/bfin/', 'gcc/C-SKY-Options.html' : 'gcc/config/csky/', 'gcc/C6X-Options.html' : 'gcc/config/c6x/', 'gcc/CRIS-Options.html' : 'gcc/config/cris/', 'gcc/DEC-Alpha-Options.html' : 'gcc/config/alpha/', 'gcc/FR30-Options.html' : 'gcc/config/fr30/', 'gcc/FRV-Options.html' : 'gcc/config/frv/', 'gcc/FT32-Options.html' : 'gcc/config/ft32/', 'gcc/H8_002f300-Options.html' : 'gcc/config/h8300/', 'gcc/HPPA-Options.html' : 'gcc/config/pa/', 'gcc/IA-64-Options.html' : 'gcc/config/ia64/', 'gcc/LoongArch-Options.html' : 'gcc/config/loongarch/', 'gcc/M32C-Options.html' : 'gcc/config/m32c/', 'gcc/M32R_002fD-Options.html' : 'gcc/config/m32r/', 'gcc/M680x0-Options.html' : 'gcc/config/m68k/', 'gcc/MCore-Options.html' : 'gcc/config/mcore/', 'gcc/MIPS-Options.html' : 'gcc/config/mips/', 'gcc/MMIX-Options.html' : 'gcc/config/mmix/', 'gcc/MN10300-Options.html' : 'gcc/config/mn10300/', 'gcc/MSP430-Options.html' : 'gcc/config/msp430/', 'gcc/MicroBlaze-Options.html' : 'gcc/config/microblaze/', 'gcc/Moxie-Options.html' : 'gcc/config/moxie/', 'gcc/NDS32-Options.html' : 'gcc/config/nds32/', 'gcc/Nios-II-Options.html' : 'gcc/config/nios2/', 'gcc/Nvidia-PTX-Options.html' : 'gcc/config/nvptx/', 'gcc/OpenRISC-Options.html' : 'gcc/config/or1k/', 'gcc/PDP-11-Options.html' : 'gcc/config/pdp11', 'gcc/PRU-Options.html' : 'gcc/config/pru/', 'gcc/RISC-V-Options.html' : 'gcc/config/riscv/', 'gcc/RL78-Options.html' : 'gcc/config/rl78/', 'gcc/RS_002f6000-and-PowerPC-Options.html' : 'gcc/config/rs6000/', 'gcc/RX-Options.html' : 'gcc/config/rx/', 'gcc/SH-Options.html' : 'gcc/config/sh/', 'gcc/SPARC-Options.html' : 'gcc/config/sparc/', 'gcc/S_002f390-and-zSeries-Options.html' : 'gcc/config/s390', 'gcc/V850-Options.html' : 'gcc/config/v850/', 'gcc/VAX-Options.html' : 'gcc/config/vax/', 'gcc/Visium-Options.html' : 'gcc/config/visium/', 'gcc/Xstormy16-Options.html' : 'gcc/config/stormy16/', 'gcc/Xtensa-Options.html' : 'gcc/config/xtensa/', 'gcc/eBPF-Options.html' : 'gcc/config/bpf/', 'gcc/x86-Options.html' : 'gcc/config/i386/', } def target_specific(url_suffix): for page_prefix, subdir in TARGET_SPECIFIC_PAGES.items(): if url_suffix.startswith(page_prefix): return subdir def filter_urlsuffixes_for_optfile(optfile, url_suffixes): """ Filter out target-specific options for the wrong target. """ result = set() for url_suffix in url_suffixes: subdir = target_specific(url_suffix) if subdir: if not optfile.rel_path.startswith(subdir): # Skip this continue result.add(url_suffix) return result class TestFiltering(unittest.TestCase): def test_target_specific(self): self.assertEqual(target_specific('gcc/Preprocessor-Options.html#index-A'), None) self.assertEqual(target_specific('gcc/MMIX-Options.html#index-mknuthdiv'), 'gcc/config/mmix/') def test_filter(self): s = {'gcc/MIPS-Options.html#index-munaligned-access-1', 'gcc/ARM-Options.html#index-munaligned-access'} arm_optfile = OptFile('/dev/null', 'gcc/config/arm/arm.opt') mips_optfile = OptFile('/dev/null', 'gcc/config/mips/mips.opt') self.assertEqual( filter_urlsuffixes_for_optfile(arm_optfile, s), {'gcc/ARM-Options.html#index-munaligned-access'}) self.assertEqual( filter_urlsuffixes_for_optfile(mips_optfile, s), {'gcc/MIPS-Options.html#index-munaligned-access-1'}) def write_url_file(index, optfile, dstfile): dstfile.write('; Autogenerated by regenerate-opt-urls.py from %s' ' and generated HTML\n\n' % optfile.rel_path) for record in optfile.records: opt = '-' + record[0].strip() url_suffixes_per_lang = {} count = 0 for lang in index.get_languages(): this_lang_suffixes = index.get_url_suffixes(opt, language=lang) url_suffixes_per_lang[lang] = this_lang_suffixes if this_lang_suffixes: count += len(this_lang_suffixes) if not count: continue directives = [] for lang in index.get_languages(): if lang: directive = 'LangUrlSuffix_%s for %r' % (lang, opt[1:]) else: directive = 'UrlSuffix for %r' % opt[1:] url_suffixes = url_suffixes_per_lang[lang] if url_suffixes: url_suffixes = filter_urlsuffixes_for_optfile(optfile, url_suffixes) if url_suffixes: if len(url_suffixes) == 1: if lang: directives.append('LangUrlSuffix_%s(%s)' % (lang, list(url_suffixes)[0])) else: directives.append('UrlSuffix(%s)' % list(url_suffixes)[0]) else: dstfile.write('; skipping %s due to multiple URLs:\n' % directive) for u in sorted(url_suffixes): dstfile.write('; duplicate: %r\n' % u) else: dstfile.write('; skipping %s due to finding no URLs\n' % directive) if directives: dstfile.write('%s\n' % opt[1:]) dstfile.write(' '.join(directives) + '\n') dstfile.write('\n') # A list of (REL_PATH, LANG) pairs, where # - REL_PATH is the relative path to a generated Option-Index.html file # for a specific frontend, and # - LANG is the name of the language (as specified in the "Language" record # within the lang.opt file), or None for the language-independent # documentation. PER_LANGUAGE_OPTION_INDEXES = [ ('gcc/Option-Index.html', None), ('gdc/Option-Index.html', 'D'), ('gfortran/Option-Index.html', 'Fortran') ] def main(args): index = Index() for option_index_rel_path, lang in PER_LANGUAGE_OPTION_INDEXES: index.parse_option_index(args.base_html_dir / option_index_rel_path, language=lang) for root, dirs, files in os.walk(args.src_gcc_dir): for f in files: if f.endswith('.opt'): opt_path = os.path.join(root, f) rel_path = os.path.relpath(opt_path, args.src_gcc_dir) optfile = OptFile(opt_path, rel_path) dstname = f + '.urls' urlfile = os.path.join(root, dstname) with open(urlfile, 'w') as dstfile: write_url_file(index, optfile, dstfile) if __name__ == '__main__': parser = argparse.ArgumentParser(description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('base_html_dir', type=Path) parser.add_argument('src_gcc_dir', type=Path) parser.add_argument('--unit-test', action='store_true') args = parser.parse_args() if args.unit_test: INPUT_HTML_PATH = args.base_html_dir unittest.main(argv=[sys.argv[0], '-v']) else: main(args)