forked from OSchip/llvm-project
[clangd] Using symbol name to map includes for STL symbols.
Summary: Using suffix path mapping relies on the STL implementations, and it is not portable. This patch is using symbol name mapping, which should work with different STL implementations, fix clangd/clangd#9. To generate the symbol mapping, we parse the cppreference symbol index page to build a lookup table. The mapping is not completed, a few TODOs: - support symbols from different headers (e.g. std::move) - support STL macros - support symbols from std's sub-namespaces (e.g. chrono) Reviewers: ioeric, jfb, serge-sans-paille Reviewed By: ioeric Subscribers: sammccall, klimek, ilya-biryukov, ioeric, MaskRay, jkorous, mgrang, arphaman, kadircet, jfb, jdoerfert, cfe-commits Tags: #clang-tools-extra, #clang Differential Revision: https://reviews.llvm.org/D58345 llvm-svn: 356134
This commit is contained in:
parent
e81f5f91b4
commit
7f51b5dc32
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,149 @@
|
|||
#!/usr/bin/env python
|
||||
#===- gen_std.py - ------------------------------------------*- python -*--===#
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
|
||||
"""gen_std.py is a tool to generate a lookup table (from qualified names to
|
||||
include headers) for C++ Standard Library symbols by parsing archieved HTML
|
||||
files from cppreference.
|
||||
|
||||
Caveats and FIXMEs:
|
||||
- only symbols directly in "std" namespace are added, we should also add std's
|
||||
subnamespace symbols (e.g. chrono).
|
||||
- symbols with multiple variants or defined in multiple headers aren't added,
|
||||
e.g. std::move, std::swap
|
||||
|
||||
Usage:
|
||||
1. Install BeautifulSoup dependency, see instruction:
|
||||
https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
|
||||
2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at
|
||||
https://en.cppreference.com/w/Cppreference:Archives
|
||||
3. Unzip the zip file from step 2 to directory </cppreference>, you should
|
||||
get a "reference" directory in </cppreference>
|
||||
4. Run the command:
|
||||
gen_std.py -cppreference </cppreference/reference> > StdSymbolMap.inc
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import os
|
||||
import sys
|
||||
|
||||
STDGEN_CODE_PREFIX = """\
|
||||
//===-- gen_std.py generated file -------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Used to build a lookup table (qualified names => include headers) for C++
|
||||
// Standard Library symbols.
|
||||
//
|
||||
// Automatically generated file, DO NOT EDIT!
|
||||
//
|
||||
// Generated from cppreference offline HTML book (modified on %s).
|
||||
//===----------------------------------------------------------------------===//
|
||||
"""
|
||||
|
||||
def ParseSymbolPage(symbol_page_html):
|
||||
"""Parse symbol page and retrieve the include header defined in this page.
|
||||
The symbol page provides header for the symbol, specifically in
|
||||
"Defined in header <header>" section. An example:
|
||||
|
||||
<tr class="t-dsc-header">
|
||||
<td colspan="2"> <div>Defined in header <code><ratio></code> </div>
|
||||
</td></tr>
|
||||
|
||||
Returns a list of headers.
|
||||
"""
|
||||
headers = []
|
||||
|
||||
soup = BeautifulSoup(symbol_page_html, "html.parser")
|
||||
# "Defined in header " are defined in <tr class="t-dsc-header"> or
|
||||
# <tr class="t-dcl-header">.
|
||||
for header_tr in soup.select('tr.t-dcl-header,tr.t-dsc-header'):
|
||||
if "Defined in header " in header_tr.text:
|
||||
# The interesting header content (e.g. <cstdlib>) is wrapped in <code>.
|
||||
for header_code in header_tr.find_all("code"):
|
||||
headers.append(header_code.text)
|
||||
return headers
|
||||
|
||||
|
||||
def ParseIndexPage(index_page_html):
|
||||
"""Parse index page.
|
||||
The index page lists all std symbols and hrefs to their detailed pages
|
||||
(which contain the defined header). An example:
|
||||
|
||||
<a href="abs.html" title="abs"><tt>abs()</tt></a> (int) <br>
|
||||
<a href="acos.html" title="acos"><tt>acos()</tt></a> <br>
|
||||
|
||||
Returns a list of tuple (symbol_name, relative_path_to_symbol_page).
|
||||
"""
|
||||
symbols = []
|
||||
soup = BeautifulSoup(index_page_html, "html.parser")
|
||||
for symbol_href in soup.select("a[title]"):
|
||||
symbol_tt = symbol_href.find("tt")
|
||||
if symbol_tt:
|
||||
symbols.append((symbol_tt.text.rstrip("<>()"), # strip any trailing <>()
|
||||
symbol_href["href"]))
|
||||
return symbols
|
||||
|
||||
|
||||
def ParseArg():
|
||||
parser = argparse.ArgumentParser(description='Generate StdGen file')
|
||||
parser.add_argument('-cppreference', metavar='PATH',
|
||||
default='',
|
||||
help='path to the cppreference offline HTML directory',
|
||||
required=True
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = ParseArg()
|
||||
cpp_reference_root = args.cppreference
|
||||
cpp_symbol_root = os.path.join(cpp_reference_root, "en", "cpp")
|
||||
index_page_path = os.path.join(cpp_symbol_root, "symbol_index.html")
|
||||
if not os.path.exists(index_page_path):
|
||||
exit("Path %s doesn't exist!" % index_page_path)
|
||||
|
||||
# We don't have version information from the unzipped offline HTML files.
|
||||
# so we use the modified time of the symbol_index.html as the version.
|
||||
cppreference_modified_date = datetime.datetime.fromtimestamp(
|
||||
os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d')
|
||||
|
||||
# Workflow steps:
|
||||
# 1. Parse index page which lists all symbols to get symbol
|
||||
# name (unqualified name) and its href link to the symbol page which
|
||||
# contains the defined header.
|
||||
# 2. Parse the symbol page to get the defined header.
|
||||
|
||||
# A map from symbol name to a set of headers.
|
||||
symbols = {}
|
||||
with open(index_page_path, "r") as f:
|
||||
for symbol_name, symbol_page_path in ParseIndexPage(f.read()):
|
||||
with open(os.path.join(cpp_symbol_root, symbol_page_path), "r") as f:
|
||||
headers = ParseSymbolPage(f.read())
|
||||
if not headers:
|
||||
sys.stderr.write("No header found for symbol %s at %s\n" % (symbol_name,
|
||||
symbol_page_path))
|
||||
continue
|
||||
|
||||
if symbol_name not in symbols:
|
||||
symbols[symbol_name] = set()
|
||||
symbols[symbol_name].update(headers)
|
||||
|
||||
# Emit results to stdout.
|
||||
print STDGEN_CODE_PREFIX % cppreference_modified_date
|
||||
for name, headers in sorted(symbols.items(), key=lambda t : t[0]):
|
||||
if len(headers) > 1:
|
||||
# FIXME: support symbols with multiple headers (e.g. std::move).
|
||||
continue
|
||||
# SYMBOL(unqualified_name, namespace, header)
|
||||
print "SYMBOL(%s, %s, %s)" % (name, "std::", list(headers)[0])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env python
|
||||
#===- test.py - ---------------------------------------------*- python -*--===#
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
|
||||
from gen_std import ParseSymbolPage, ParseIndexPage
|
||||
|
||||
import unittest
|
||||
|
||||
class TestStdGen(unittest.TestCase):
|
||||
|
||||
def testParseIndexPage(self):
|
||||
html = """
|
||||
<a href="abs.html" title="abs"><tt>abs()</tt></a> (int) <br>
|
||||
<a href="complex/abs.html" title="abs"><tt>abs<>()</tt></a> (std::complex) <br>
|
||||
<a href="acos.html" title="acos"><tt>acos()</tt></a> <br>
|
||||
<a href="acosh.html" title="acosh"><tt>acosh()</tt></a> <span class="t-mark-rev">(since C++11)</span> <br>
|
||||
<a href="as_bytes.html" title="as bytes"><tt>as_bytes<>()</tt></a> <span class="t-mark-rev t-since-cxx20">(since C++20)</span> <br>
|
||||
"""
|
||||
|
||||
actual = ParseIndexPage(html)
|
||||
expected = [
|
||||
("abs", "abs.html"),
|
||||
("abs", "complex/abs.html"),
|
||||
("acos", "acos.html"),
|
||||
("acosh", "acosh.html"),
|
||||
("as_bytes", "as_bytes.html"),
|
||||
]
|
||||
self.assertEqual(len(actual), len(expected))
|
||||
for i in range(0, len(actual)):
|
||||
self.assertEqual(expected[i][0], actual[i][0])
|
||||
self.assertTrue(actual[i][1].endswith(expected[i][1]))
|
||||
|
||||
|
||||
def testParseSymbolPage_SingleHeader(self):
|
||||
# Defined in header <cmath>
|
||||
html = """
|
||||
<table class="t-dcl-begin"><tbody>
|
||||
<tr class="t-dsc-header">
|
||||
<td> <div>Defined in header <code><a href="cmath.html" title="cmath"><cmath></a></code>
|
||||
</div></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
"""
|
||||
self.assertEqual(ParseSymbolPage(html), ['<cmath>'])
|
||||
|
||||
|
||||
def testParseSymbolPage_MulHeaders(self):
|
||||
# Defined in header <cstddef>
|
||||
# Defined in header <cstdio>
|
||||
# Defined in header <cstdlib>
|
||||
html = """
|
||||
<table class="t-dcl-begin"><tbody>
|
||||
<tr class="t-dsc-header">
|
||||
<td> <div>Defined in header <code><a href="cstddef.html" title="cstddef"><cstddef></a></code>
|
||||
</div></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr class="t-dsc-header">
|
||||
<td> <div>Defined in header <code><a href="cstdio.html" title="cstdio"><cstdio></a></code>
|
||||
</div></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr class="t-dsc-header">
|
||||
<td> <div>Defined in header <code><a href=".cstdlib.html" title="ccstdlib"><cstdlib></a></code>
|
||||
</div></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
"""
|
||||
self.assertEqual(ParseSymbolPage(html),
|
||||
['<cstddef>', '<cstdio>', '<cstdlib>'])
|
||||
|
||||
|
||||
def testParseSymbolPage_MulHeadersInSameDiv(self):
|
||||
# Multile <code> blocks in a Div.
|
||||
# Defined in header <algorithm>
|
||||
# Defined in header <utility>
|
||||
html = """
|
||||
<tr class="t-dsc-header">
|
||||
<td><div>
|
||||
Defined in header <code><a href="../header/algorithm.html" title="cpp/header/algorithm"><algorithm></a></code><br>
|
||||
Defined in header <code><a href="../header/utility.html" title="cpp/header/utility"><utility></a></code>
|
||||
</div></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
"""
|
||||
self.assertEqual(ParseSymbolPage(html), ['<algorithm>', '<utility>'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -107,57 +107,30 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) {
|
|||
|
||||
void addSystemHeadersMapping(CanonicalIncludes *Includes) {
|
||||
static const std::vector<std::pair<const char *, const char *>> SymbolMap = {
|
||||
{"std::addressof", "<memory>"},
|
||||
// Map symbols in <iosfwd> to their preferred includes.
|
||||
{"std::basic_filebuf", "<fstream>"},
|
||||
{"std::basic_fstream", "<fstream>"},
|
||||
{"std::basic_ifstream", "<fstream>"},
|
||||
{"std::basic_ofstream", "<fstream>"},
|
||||
{"std::filebuf", "<fstream>"},
|
||||
{"std::fstream", "<fstream>"},
|
||||
{"std::ifstream", "<fstream>"},
|
||||
{"std::ofstream", "<fstream>"},
|
||||
{"std::wfilebuf", "<fstream>"},
|
||||
{"std::wfstream", "<fstream>"},
|
||||
{"std::wifstream", "<fstream>"},
|
||||
{"std::wofstream", "<fstream>"},
|
||||
{"std::basic_ios", "<ios>"},
|
||||
{"std::ios", "<ios>"},
|
||||
{"std::wios", "<ios>"},
|
||||
{"std::basic_iostream", "<iostream>"},
|
||||
{"std::iostream", "<iostream>"},
|
||||
{"std::wiostream", "<iostream>"},
|
||||
{"std::basic_istream", "<istream>"},
|
||||
{"std::istream", "<istream>"},
|
||||
{"std::wistream", "<istream>"},
|
||||
{"std::istreambuf_iterator", "<iterator>"},
|
||||
{"std::ostreambuf_iterator", "<iterator>"},
|
||||
{"std::basic_ostream", "<ostream>"},
|
||||
{"std::ostream", "<ostream>"},
|
||||
{"std::wostream", "<ostream>"},
|
||||
{"std::basic_istringstream", "<sstream>"},
|
||||
{"std::basic_ostringstream", "<sstream>"},
|
||||
{"std::basic_stringbuf", "<sstream>"},
|
||||
{"std::basic_stringstream", "<sstream>"},
|
||||
{"std::istringstream", "<sstream>"},
|
||||
{"std::ostringstream", "<sstream>"},
|
||||
{"std::string", "<string>"},
|
||||
{"std::stringbuf", "<sstream>"},
|
||||
{"std::stringstream", "<sstream>"},
|
||||
{"std::wistringstream", "<sstream>"},
|
||||
{"std::wostringstream", "<sstream>"},
|
||||
{"std::wstringbuf", "<sstream>"},
|
||||
{"std::wstringstream", "<sstream>"},
|
||||
{"std::basic_streambuf", "<streambuf>"},
|
||||
{"std::streambuf", "<streambuf>"},
|
||||
{"std::wstreambuf", "<streambuf>"},
|
||||
{"std::uint_least16_t", "<cstdint>"}, // <type_traits> redeclares these
|
||||
{"std::uint_least32_t", "<cstdint>"},
|
||||
{"std::declval", "<utility>"},
|
||||
#define SYMBOL(Name, NameSpace, Header) { #NameSpace#Name, #Header },
|
||||
#include "StdSymbolMap.inc"
|
||||
#undef SYMBOL
|
||||
};
|
||||
for (const auto &Pair : SymbolMap)
|
||||
Includes->addSymbolMapping(Pair.first, Pair.second);
|
||||
|
||||
// FIXME: remove the std header mapping once we support ambiguous symbols, now
|
||||
// it serves as a fallback to disambiguate:
|
||||
// - symbols with mulitiple headers (e.g. std::move)
|
||||
// - symbols with a primary template in one header and a specialization in
|
||||
// another (std::abs)
|
||||
static const std::vector<std::pair<const char *, const char *>>
|
||||
SystemHeaderMap = {
|
||||
{"include/__stddef_max_align_t.h", "<cstddef>"},
|
||||
|
|
Loading…
Reference in New Issue