forked from OSchip/llvm-project
118 lines
4.7 KiB
Python
Executable File
118 lines
4.7 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#===- gen_std.py - ------------------------------------------*- python -*--===#
|
|
#
|
|
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
# See https://llvm.org/LICENSE.txt for license information.
|
|
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
#
|
|
#===------------------------------------------------------------------------===#
|
|
|
|
"""gen_std.py is a tool to generate a lookup table (from qualified names to
|
|
include headers) for C/C++ Standard Library symbols by parsing archieved HTML
|
|
files from cppreference.
|
|
|
|
Caveats and FIXMEs:
|
|
- only symbols directly in "std" namespace are added, we should also add std's
|
|
subnamespace symbols (e.g. chrono).
|
|
- symbols with multiple variants or defined in multiple headers aren't added,
|
|
e.g. std::move, std::swap
|
|
|
|
Usage:
|
|
1. Install BeautifulSoup dependency, see instruction:
|
|
https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
|
|
2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at
|
|
https://en.cppreference.com/w/Cppreference:Archives
|
|
3. Unzip the zip file from step 2 to directory </cppreference>, you should
|
|
get a "reference" directory in </cppreference>
|
|
4. Run the command:
|
|
// Generate C++ symbols
|
|
gen_std.py -cppreference </cppreference/reference> -language=cpp > StdSymbolMap.inc
|
|
// Generate C symbols
|
|
gen_std.py -cppreference </cppreference/reference> -language=c > CSymbolMap.inc
|
|
"""
|
|
|
|
|
|
import cppreference_parser
|
|
import argparse
|
|
import datetime
|
|
import os
|
|
import sys
|
|
|
|
CODE_PREFIX = """\
|
|
//===-- gen_std.py generated file -------------------------------*- C++ -*-===//
|
|
//
|
|
// Used to build a lookup table (qualified names => include headers) for %s
|
|
// Standard Library symbols.
|
|
//
|
|
// Automatically generated file, DO NOT EDIT!
|
|
//
|
|
// Generated from cppreference offline HTML book (modified on %s).
|
|
//===----------------------------------------------------------------------===//
|
|
"""
|
|
|
|
def ParseArg():
|
|
parser = argparse.ArgumentParser(description='Generate StdGen file')
|
|
parser.add_argument('-cppreference', metavar='PATH',
|
|
default='',
|
|
help='path to the cppreference offline HTML directory',
|
|
required=True
|
|
)
|
|
parser.add_argument('-language',
|
|
default='cpp',
|
|
help='Generate c or cpp symbols',
|
|
required=True)
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
args = ParseArg()
|
|
if args.language == 'cpp':
|
|
page_root = os.path.join(args.cppreference, "en", "cpp")
|
|
symbol_index_root = os.path.join(page_root, "symbol_index")
|
|
parse_pages = [
|
|
(page_root, "symbol_index.html", "std::"),
|
|
# std sub-namespace symbols have separated pages.
|
|
# We don't index std literal operators (e.g.
|
|
# std::literals::chrono_literals::operator""d), these symbols can't be
|
|
# accessed by std::<symbol_name>.
|
|
# FIXME: index std::placeholders symbols, placeholders.html page is
|
|
# different (which contains one entry for _1, _2, ..., _N), we need special
|
|
# handling.
|
|
(symbol_index_root, "chrono.html", "std::chrono::"),
|
|
(symbol_index_root, "filesystem.html", "std::filesystem::"),
|
|
(symbol_index_root, "pmr.html", "std::pmr::"),
|
|
(symbol_index_root, "regex_constants.html", "std::regex_constants::"),
|
|
(symbol_index_root, "this_thread.html", "std::this_thread::"),
|
|
]
|
|
elif args.language == 'c':
|
|
page_root = os.path.join(args.cppreference, "en", "c")
|
|
symbol_index_root = page_root
|
|
parse_pages = [(page_root, "index.html", None)]
|
|
|
|
if not os.path.exists(symbol_index_root):
|
|
exit("Path %s doesn't exist!" % symbol_index_root)
|
|
|
|
symbols = cppreference_parser.GetSymbols(parse_pages)
|
|
|
|
# We don't have version information from the unzipped offline HTML files.
|
|
# so we use the modified time of the symbol_index.html as the version.
|
|
index_page_path = os.path.join(page_root, "index.html")
|
|
cppreference_modified_date = datetime.datetime.fromtimestamp(
|
|
os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d')
|
|
print CODE_PREFIX % (args.language.upper(), cppreference_modified_date)
|
|
for symbol in symbols:
|
|
if len(symbol.headers) == 1:
|
|
# SYMBOL(unqualified_name, namespace, header)
|
|
print "SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace,
|
|
symbol.headers[0])
|
|
elif len(symbol.headers) == 0:
|
|
sys.stderr.write("No header found for symbol %s\n" % symbol.name)
|
|
else:
|
|
# FIXME: support symbols with multiple headers (e.g. std::move).
|
|
sys.stderr.write("Ambiguous header for symbol %s: %s\n" % (
|
|
symbol.name, ', '.join(symbol.headers)))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|