diff options
author | Vadim D. <vvd170501@gmail.com> | 2024-11-04 16:55:38 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-04 14:55:38 +0100 |
commit | c0ce44e8fc03882641f270539265b20dba0fffdd (patch) | |
tree | f22f40364323e36606749bc99d7b4787d1f01a30 /clang/tools/include-mapping/cppreference_parser.py | |
parent | 2dd74d4a76a9c32ecfb118371ddfd3d126ab7cd8 (diff) | |
download | llvm-c0ce44e8fc03882641f270539265b20dba0fffdd.zip llvm-c0ce44e8fc03882641f270539265b20dba0fffdd.tar.gz llvm-c0ce44e8fc03882641f270539265b20dba0fffdd.tar.bz2 |
[Tooling/Inclusion] Update std symbols mapping (#113612)
Fixes #113494
Diffstat (limited to 'clang/tools/include-mapping/cppreference_parser.py')
-rw-r--r-- | clang/tools/include-mapping/cppreference_parser.py | 51 |
1 files changed, 39 insertions, 12 deletions
diff --git a/clang/tools/include-mapping/cppreference_parser.py b/clang/tools/include-mapping/cppreference_parser.py index f2ea553..9101f3d 100644 --- a/clang/tools/include-mapping/cppreference_parser.py +++ b/clang/tools/include-mapping/cppreference_parser.py @@ -7,7 +7,7 @@ # # ===------------------------------------------------------------------------===# -from bs4 import BeautifulSoup, NavigableString +from bs4 import BeautifulSoup, NavigableString, Tag import collections import multiprocessing @@ -40,7 +40,7 @@ def _HasClass(tag, *classes): return False -def _ParseSymbolPage(symbol_page_html, symbol_name): +def _ParseSymbolPage(symbol_page_html, symbol_name, qual_name): """Parse symbol page and retrieve the include header defined in this page. The symbol page provides header for the symbol, specifically in "Defined in header <header>" section. An example: @@ -69,7 +69,9 @@ def _ParseSymbolPage(symbol_page_html, symbol_name): was_decl = True # Symbols are in the first cell. found_symbols = row.find("td").stripped_strings - if not symbol_name in found_symbols: + if not any( + sym == symbol_name or sym == qual_name for sym in found_symbols + ): continue headers.update(current_headers) elif _HasClass(row, "t-dsc-header"): @@ -89,6 +91,22 @@ def _ParseSymbolPage(symbol_page_html, symbol_name): return headers or all_headers +def _ParseSymbolVariant(caption): + if not (isinstance(caption, NavigableString) and "(" in caption): + return None + + if ")" in caption.text: # (locale), (algorithm), etc. + return caption.text.strip(" ()") + + second_part = caption.next_sibling + if isinstance(second_part, Tag) and second_part.name == "code": + # (<code>std::complex</code>), etc. + third_part = second_part.next_sibling + if isinstance(third_part, NavigableString) and third_part.text.startswith(")"): + return second_part.text + return None + + def _ParseIndexPage(index_page_html): """Parse index page. The index page lists all std symbols and hrefs to their detailed pages @@ -107,9 +125,7 @@ def _ParseIndexPage(index_page_html): # This accidentally accepts begin/end despite the (iterator) caption: the # (since C++11) note is first. They are good symbols, so the bug is unfixed. caption = symbol_href.next_sibling - variant = None - if isinstance(caption, NavigableString) and "(" in caption: - variant = caption.text.strip(" ()") + variant = _ParseSymbolVariant(caption) symbol_tt = symbol_href.find("tt") if symbol_tt: symbols.append( @@ -122,9 +138,9 @@ def _ParseIndexPage(index_page_html): return symbols -def _ReadSymbolPage(path, name): +def _ReadSymbolPage(path, name, qual_name): with open(path) as f: - return _ParseSymbolPage(f.read(), name) + return _ParseSymbolPage(f.read(), name, qual_name) def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept): @@ -146,9 +162,8 @@ def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept): for symbol_name, symbol_page_path, variant in _ParseIndexPage(f.read()): # Variant symbols (e.g. the std::locale version of isalpha) add ambiguity. # FIXME: use these as a fallback rather than ignoring entirely. - variants_for_symbol = variants_to_accept.get( - (namespace or "") + symbol_name, () - ) + qualified_symbol_name = (namespace or "") + symbol_name + variants_for_symbol = variants_to_accept.get(qualified_symbol_name, ()) if variant and variant not in variants_for_symbol: continue path = os.path.join(root_dir, symbol_page_path) @@ -156,7 +171,9 @@ def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept): results.append( ( symbol_name, - pool.apply_async(_ReadSymbolPage, (path, symbol_name)), + pool.apply_async( + _ReadSymbolPage, (path, symbol_name, qualified_symbol_name) + ), ) ) else: @@ -192,6 +209,16 @@ def GetSymbols(parse_pages): variants_to_accept = { # std::remove<> has variant algorithm. "std::remove": ("algorithm"), + # These functions don't have a generic version, and all variants are defined in <chrono> + "std::chrono::abs": ("std::chrono::duration"), + "std::chrono::ceil": ("std::chrono::duration"), + "std::chrono::floor": ("std::chrono::duration"), + "std::chrono::from_stream": ("std::chrono::day"), + "std::chrono::round": ("std::chrono::duration"), + # Same, but in <filesystem> + "std::filesystem::begin": ("std::filesystem::directory_iterator"), + "std::filesystem::end": ("std::filesystem::directory_iterator"), + "std::ranges::get": ("std::ranges::subrange"), } symbols = [] # Run many workers to process individual symbol pages under the symbol index. |