1 files changed, 168 insertions, 155 deletions
diff --git a/clang/tools/include-mapping/cppreference_parser.py b/clang/tools/include-mapping/cppreference_parser.py
index 19bdde7..cefdbea 100644
--- a/clang/tools/include-mapping/cppreference_parser.py
+++ b/clang/tools/include-mapping/cppreference_parser.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python
-#===- cppreference_parser.py -  ------------------------------*- python -*--===#
+# ===- cppreference_parser.py -  ------------------------------*- python -*--===#
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 #
-#===------------------------------------------------------------------------===#
+# ===------------------------------------------------------------------------===#
 
 from bs4 import BeautifulSoup, NavigableString
 
@@ -18,176 +18,189 @@ import sys
 
 
 class Symbol:
+    def __init__(self, name, namespace, headers):
+        # unqualifed symbol name, e.g. "move"
+        self.name = name
+        # namespace of the symbol (with trailing "::"), e.g. "std::", "" (global scope)
+        # None for C symbols.
+        self.namespace = namespace
+        # a list of corresponding headers
+        self.headers = headers
 
-  def __init__(self, name, namespace, headers):
-    # unqualifed symbol name, e.g. "move"
-    self.name = name
-    # namespace of the symbol (with trailing "::"), e.g. "std::", "" (global scope)
-    # None for C symbols.
-    self.namespace = namespace
-    # a list of corresponding headers
-    self.headers = headers
-
-  def __lt__(self, other):
-    if self.namespace != other.namespace:
-      return str(self.namespace) < str(other.namespace)
-    return self.name < other.name
+    def __lt__(self, other):
+        if self.namespace != other.namespace:
+            return str(self.namespace) < str(other.namespace)
+        return self.name < other.name
 
 
 def _HasClass(tag, *classes):
-  for c in tag.get('class', []):
-    if c in classes:
-      return True
-  return False
+    for c in tag.get("class", []):
+        if c in classes:
+            return True
+    return False
 
 
 def _ParseSymbolPage(symbol_page_html, symbol_name):
-  """Parse symbol page and retrieve the include header defined in this page.
-  The symbol page provides header for the symbol, specifically in
-  "Defined in header <header>" section. An example:
-
-  <tr class="t-dsc-header">
-    <td colspan="2"> <div>Defined in header <code>&lt;ratio&gt;</code> </div>
-  </td></tr>
-
-  Returns a list of headers.
-  """
-  headers = set()
-  all_headers = set()
-
-  soup = BeautifulSoup(symbol_page_html, "html.parser")
-  # Rows in table are like:
-  #   Defined in header <foo>      .t-dsc-header
-  #   Defined in header <bar>      .t-dsc-header
-  #   decl1                        .t-dcl
-  #   Defined in header <baz>      .t-dsc-header
-  #   decl2                        .t-dcl
-  for table in soup.select('table.t-dcl-begin, table.t-dsc-begin'):
-    current_headers = []
-    was_decl = False
-    for row in table.select('tr'):
-      if _HasClass(row, 't-dcl', 't-dsc'):
-        was_decl = True
-        # Symbols are in the first cell.
-        found_symbols = row.find('td').stripped_strings
-        if not symbol_name in found_symbols:
-          continue
-        headers.update(current_headers)
-      elif _HasClass(row, 't-dsc-header'):
-        # If we saw a decl since the last header, this is a new block of headers
-        # for a new block of decls.
-        if was_decl:
-          current_headers = []
+    """Parse symbol page and retrieve the include header defined in this page.
+    The symbol page provides header for the symbol, specifically in
+    "Defined in header <header>" section. An example:
+
+    <tr class="t-dsc-header">
+      <td colspan="2"> <div>Defined in header <code>&lt;ratio&gt;</code> </div>
+    </td></tr>
+
+    Returns a list of headers.
+    """
+    headers = set()
+    all_headers = set()
+
+    soup = BeautifulSoup(symbol_page_html, "html.parser")
+    # Rows in table are like:
+    #   Defined in header <foo>      .t-dsc-header
+    #   Defined in header <bar>      .t-dsc-header
+    #   decl1                        .t-dcl
+    #   Defined in header <baz>      .t-dsc-header
+    #   decl2                        .t-dcl
+    for table in soup.select("table.t-dcl-begin, table.t-dsc-begin"):
+        current_headers = []
         was_decl = False
-        # There are also .t-dsc-header for "defined in namespace".
-        if not "Defined in header " in row.text:
-          continue
-        # The interesting header content (e.g. <cstdlib>) is wrapped in <code>.
-        for header_code in row.find_all("code"):
-          current_headers.append(header_code.text)
-          all_headers.add(header_code.text)
-  # If the symbol was never named, consider all named headers.
-  return headers or all_headers
+        for row in table.select("tr"):
+            if _HasClass(row, "t-dcl", "t-dsc"):
+                was_decl = True
+                # Symbols are in the first cell.
+                found_symbols = row.find("td").stripped_strings
+                if not symbol_name in found_symbols:
+                    continue
+                headers.update(current_headers)
+            elif _HasClass(row, "t-dsc-header"):
+                # If we saw a decl since the last header, this is a new block of headers
+                # for a new block of decls.
+                if was_decl:
+                    current_headers = []
+                was_decl = False
+                # There are also .t-dsc-header for "defined in namespace".
+                if not "Defined in header " in row.text:
+                    continue
+                # The interesting header content (e.g. <cstdlib>) is wrapped in <code>.
+                for header_code in row.find_all("code"):
+                    current_headers.append(header_code.text)
+                    all_headers.add(header_code.text)
+    # If the symbol was never named, consider all named headers.
+    return headers or all_headers
 
 
 def _ParseIndexPage(index_page_html):
-  """Parse index page.
-  The index page lists all std symbols and hrefs to their detailed pages
-  (which contain the defined header). An example:
-
-  <a href="abs.html" title="abs"><tt>abs()</tt></a> (int) <br>
-  <a href="acos.html" title="acos"><tt>acos()</tt></a> <br>
-
-  Returns a list of tuple (symbol_name, relative_path_to_symbol_page, variant).
-  """
-  symbols = []
-  soup = BeautifulSoup(index_page_html, "html.parser")
-  for symbol_href in soup.select("a[title]"):
-    # Ignore annotated symbols like "acos<>() (std::complex)".
-    # These tend to be overloads, and we the primary is more useful.
-    # This accidentally accepts begin/end despite the (iterator) caption: the
-    # (since C++11) note is first. They are good symbols, so the bug is unfixed.
-    caption = symbol_href.next_sibling
-    variant = None
-    if isinstance(caption, NavigableString) and "(" in caption:
-      variant = caption.text.strip(" ()")
-    symbol_tt = symbol_href.find("tt")
-    if symbol_tt:
-      symbols.append((symbol_tt.text.rstrip("<>()"), # strip any trailing <>()
-                      symbol_href["href"], variant))
-  return symbols
+    """Parse index page.
+    The index page lists all std symbols and hrefs to their detailed pages
+    (which contain the defined header). An example:
+
+    <a href="abs.html" title="abs"><tt>abs()</tt></a> (int) <br>
+    <a href="acos.html" title="acos"><tt>acos()</tt></a> <br>
+
+    Returns a list of tuple (symbol_name, relative_path_to_symbol_page, variant).
+    """
+    symbols = []
+    soup = BeautifulSoup(index_page_html, "html.parser")
+    for symbol_href in soup.select("a[title]"):
+        # Ignore annotated symbols like "acos<>() (std::complex)".
+        # These tend to be overloads, and we the primary is more useful.
+        # This accidentally accepts begin/end despite the (iterator) caption: the
+        # (since C++11) note is first. They are good symbols, so the bug is unfixed.
+        caption = symbol_href.next_sibling
+        variant = None
+        if isinstance(caption, NavigableString) and "(" in caption:
+            variant = caption.text.strip(" ()")
+        symbol_tt = symbol_href.find("tt")
+        if symbol_tt:
+            symbols.append(
+                (
+                    symbol_tt.text.rstrip("<>()"),  # strip any trailing <>()
+                    symbol_href["href"],
+                    variant,
+                )
+            )
+    return symbols
 
 
 def _ReadSymbolPage(path, name):
-  with open(path) as f:
-    return _ParseSymbolPage(f.read(), name)
+    with open(path) as f:
+        return _ParseSymbolPage(f.read(), name)
 
 
 def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept):
-  """Get all symbols listed in the index page. All symbols should be in the
-  given namespace.
-
-  Returns a list of Symbols.
-  """
-
-  # Workflow steps:
-  #   1. Parse index page which lists all symbols to get symbol
-  #      name (unqualified name) and its href link to the symbol page which
-  #      contains the defined header.
-  #   2. Parse the symbol page to get the defined header.
-  index_page_path = os.path.join(root_dir, index_page_name)
-  with open(index_page_path, "r") as f:
-    # Read each symbol page in parallel.
-    results = [] # (symbol_name, promise of [header...])
-    for symbol_name, symbol_page_path, variant in _ParseIndexPage(f.read()):
-      # Variant symbols (e.g. the std::locale version of isalpha) add ambiguity.
-      # FIXME: use these as a fallback rather than ignoring entirely.
-      variants_for_symbol = variants_to_accept.get(
-          (namespace or "") + symbol_name, ())
-      if variant and variant not in variants_for_symbol:
-        continue
-      path = os.path.join(root_dir, symbol_page_path)
-      if os.path.isfile(path):
-        results.append((symbol_name,
-                      pool.apply_async(_ReadSymbolPage, (path, symbol_name))))
-      else:
-        sys.stderr.write("Discarding information for symbol: %s. Page %s does not exist.\n" 
-          % (symbol_name, path))
-
-    # Build map from symbol name to a set of headers.
-    symbol_headers = collections.defaultdict(set)
-    for symbol_name, lazy_headers in results:
-      symbol_headers[symbol_name].update(lazy_headers.get())
-
-  symbols = []
-  for name, headers in sorted(symbol_headers.items(), key=lambda t : t[0]):
-    symbols.append(Symbol(name, namespace, list(headers)))
-  return symbols
+    """Get all symbols listed in the index page. All symbols should be in the
+    given namespace.
+
+    Returns a list of Symbols.
+    """
+
+    # Workflow steps:
+    #   1. Parse index page which lists all symbols to get symbol
+    #      name (unqualified name) and its href link to the symbol page which
+    #      contains the defined header.
+    #   2. Parse the symbol page to get the defined header.
+    index_page_path = os.path.join(root_dir, index_page_name)
+    with open(index_page_path, "r") as f:
+        # Read each symbol page in parallel.
+        results = []  # (symbol_name, promise of [header...])
+        for symbol_name, symbol_page_path, variant in _ParseIndexPage(f.read()):
+            # Variant symbols (e.g. the std::locale version of isalpha) add ambiguity.
+            # FIXME: use these as a fallback rather than ignoring entirely.
+            variants_for_symbol = variants_to_accept.get(
+                (namespace or "") + symbol_name, ()
+            )
+            if variant and variant not in variants_for_symbol:
+                continue
+            path = os.path.join(root_dir, symbol_page_path)
+            if os.path.isfile(path):
+                results.append(
+                    (
+                        symbol_name,
+                        pool.apply_async(_ReadSymbolPage, (path, symbol_name)),
+                    )
+                )
+            else:
+                sys.stderr.write(
+                    "Discarding information for symbol: %s. Page %s does not exist.\n"
+                    % (symbol_name, path)
+                )
+
+        # Build map from symbol name to a set of headers.
+        symbol_headers = collections.defaultdict(set)
+        for symbol_name, lazy_headers in results:
+            symbol_headers[symbol_name].update(lazy_headers.get())
+
+    symbols = []
+    for name, headers in sorted(symbol_headers.items(), key=lambda t: t[0]):
+        symbols.append(Symbol(name, namespace, list(headers)))
+    return symbols
 
 
 def GetSymbols(parse_pages):
-  """Get all symbols by parsing the given pages.
-
-  Args:
-    parse_pages: a list of tuples (page_root_dir, index_page_name, namespace)
-  """
-  # By default we prefer the non-variant versions, as they're more common. But
-  # there are some symbols, whose variant is more common. This list describes
-  # those symbols.
-  variants_to_accept = {
-      # std::remove<> has variant algorithm.
-      "std::remove": ("algorithm"),
-  }
-  symbols = []
-  # Run many workers to process individual symbol pages under the symbol index.
-  # Don't allow workers to capture Ctrl-C.
-  pool = multiprocessing.Pool(
-      initializer=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
-  try:
-    for root_dir, page_name, namespace in parse_pages:
-      symbols.extend(_GetSymbols(pool, root_dir, page_name, namespace,
-                                 variants_to_accept))
-  finally:
-    pool.terminate()
-    pool.join()
-  return sorted(symbols)
+    """Get all symbols by parsing the given pages.
+
+    Args:
+      parse_pages: a list of tuples (page_root_dir, index_page_name, namespace)
+    """
+    # By default we prefer the non-variant versions, as they're more common. But
+    # there are some symbols, whose variant is more common. This list describes
+    # those symbols.
+    variants_to_accept = {
+        # std::remove<> has variant algorithm.
+        "std::remove": ("algorithm"),
+    }
+    symbols = []
+    # Run many workers to process individual symbol pages under the symbol index.
+    # Don't allow workers to capture Ctrl-C.
+    pool = multiprocessing.Pool(
+        initializer=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)
+    )
+    try:
+        for root_dir, page_name, namespace in parse_pages:
+            symbols.extend(
+                _GetSymbols(pool, root_dir, page_name, namespace, variants_to_accept)
+            )
+    finally:
+        pool.terminate()
+        pool.join()
+    return sorted(symbols)