aboutsummaryrefslogtreecommitdiff
path: root/clang/tools/include-mapping/cppreference_parser.py
diff options
context:
space:
mode:
authorVadim D. <vvd170501@gmail.com>2024-11-04 16:55:38 +0300
committerGitHub <noreply@github.com>2024-11-04 14:55:38 +0100
commitc0ce44e8fc03882641f270539265b20dba0fffdd (patch)
treef22f40364323e36606749bc99d7b4787d1f01a30 /clang/tools/include-mapping/cppreference_parser.py
parent2dd74d4a76a9c32ecfb118371ddfd3d126ab7cd8 (diff)
downloadllvm-c0ce44e8fc03882641f270539265b20dba0fffdd.zip
llvm-c0ce44e8fc03882641f270539265b20dba0fffdd.tar.gz
llvm-c0ce44e8fc03882641f270539265b20dba0fffdd.tar.bz2
[Tooling/Inclusion] Update std symbols mapping (#113612)
Fixes #113494
Diffstat (limited to 'clang/tools/include-mapping/cppreference_parser.py')
-rw-r--r--clang/tools/include-mapping/cppreference_parser.py51
1 files changed, 39 insertions, 12 deletions
diff --git a/clang/tools/include-mapping/cppreference_parser.py b/clang/tools/include-mapping/cppreference_parser.py
index f2ea553..9101f3d 100644
--- a/clang/tools/include-mapping/cppreference_parser.py
+++ b/clang/tools/include-mapping/cppreference_parser.py
@@ -7,7 +7,7 @@
#
# ===------------------------------------------------------------------------===#
-from bs4 import BeautifulSoup, NavigableString
+from bs4 import BeautifulSoup, NavigableString, Tag
import collections
import multiprocessing
@@ -40,7 +40,7 @@ def _HasClass(tag, *classes):
return False
-def _ParseSymbolPage(symbol_page_html, symbol_name):
+def _ParseSymbolPage(symbol_page_html, symbol_name, qual_name):
"""Parse symbol page and retrieve the include header defined in this page.
The symbol page provides header for the symbol, specifically in
"Defined in header <header>" section. An example:
@@ -69,7 +69,9 @@ def _ParseSymbolPage(symbol_page_html, symbol_name):
was_decl = True
# Symbols are in the first cell.
found_symbols = row.find("td").stripped_strings
- if not symbol_name in found_symbols:
+ if not any(
+ sym == symbol_name or sym == qual_name for sym in found_symbols
+ ):
continue
headers.update(current_headers)
elif _HasClass(row, "t-dsc-header"):
@@ -89,6 +91,22 @@ def _ParseSymbolPage(symbol_page_html, symbol_name):
return headers or all_headers
+def _ParseSymbolVariant(caption):
+ if not (isinstance(caption, NavigableString) and "(" in caption):
+ return None
+
+ if ")" in caption.text: # (locale), (algorithm), etc.
+ return caption.text.strip(" ()")
+
+ second_part = caption.next_sibling
+ if isinstance(second_part, Tag) and second_part.name == "code":
+ # (<code>std::complex</code>), etc.
+ third_part = second_part.next_sibling
+ if isinstance(third_part, NavigableString) and third_part.text.startswith(")"):
+ return second_part.text
+ return None
+
+
def _ParseIndexPage(index_page_html):
"""Parse index page.
The index page lists all std symbols and hrefs to their detailed pages
@@ -107,9 +125,7 @@ def _ParseIndexPage(index_page_html):
# This accidentally accepts begin/end despite the (iterator) caption: the
# (since C++11) note is first. They are good symbols, so the bug is unfixed.
caption = symbol_href.next_sibling
- variant = None
- if isinstance(caption, NavigableString) and "(" in caption:
- variant = caption.text.strip(" ()")
+ variant = _ParseSymbolVariant(caption)
symbol_tt = symbol_href.find("tt")
if symbol_tt:
symbols.append(
@@ -122,9 +138,9 @@ def _ParseIndexPage(index_page_html):
return symbols
-def _ReadSymbolPage(path, name):
+def _ReadSymbolPage(path, name, qual_name):
with open(path) as f:
- return _ParseSymbolPage(f.read(), name)
+ return _ParseSymbolPage(f.read(), name, qual_name)
def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept):
@@ -146,9 +162,8 @@ def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept):
for symbol_name, symbol_page_path, variant in _ParseIndexPage(f.read()):
# Variant symbols (e.g. the std::locale version of isalpha) add ambiguity.
# FIXME: use these as a fallback rather than ignoring entirely.
- variants_for_symbol = variants_to_accept.get(
- (namespace or "") + symbol_name, ()
- )
+ qualified_symbol_name = (namespace or "") + symbol_name
+ variants_for_symbol = variants_to_accept.get(qualified_symbol_name, ())
if variant and variant not in variants_for_symbol:
continue
path = os.path.join(root_dir, symbol_page_path)
@@ -156,7 +171,9 @@ def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept):
results.append(
(
symbol_name,
- pool.apply_async(_ReadSymbolPage, (path, symbol_name)),
+ pool.apply_async(
+ _ReadSymbolPage, (path, symbol_name, qualified_symbol_name)
+ ),
)
)
else:
@@ -192,6 +209,16 @@ def GetSymbols(parse_pages):
variants_to_accept = {
# std::remove<> has variant algorithm.
"std::remove": ("algorithm"),
+ # These functions don't have a generic version, and all variants are defined in <chrono>
+ "std::chrono::abs": ("std::chrono::duration"),
+ "std::chrono::ceil": ("std::chrono::duration"),
+ "std::chrono::floor": ("std::chrono::duration"),
+ "std::chrono::from_stream": ("std::chrono::day"),
+ "std::chrono::round": ("std::chrono::duration"),
+ # Same, but in <filesystem>
+ "std::filesystem::begin": ("std::filesystem::directory_iterator"),
+ "std::filesystem::end": ("std::filesystem::directory_iterator"),
+ "std::ranges::get": ("std::ranges::subrange"),
}
symbols = []
# Run many workers to process individual symbol pages under the symbol index.