From f3003a92c1fc9cf07f0a55796b20114d5dd7bba6 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 20 Jun 2018 21:16:37 +0000 Subject: [PATCH] Add python tool to dump and construct header maps Header maps are binary files used by Xcode, which are used to map header names or paths to other locations. Clang has support for those since its inception, but there's not a lot of header map testing around. Since it's a binary format, testing becomes pretty much brittle and its hard to even know what's inside if you don't have the appropriate tools. Add a python based tool that allows creating and dumping header maps based on a json description of those. While here, rewrite tests to use the tool and remove the binary files from the tree. This tool was initially written by Daniel Dunbar. Differential Revision: https://reviews.llvm.org/D46485 rdar://problem/39994722 llvm-svn: 335177 --- clang/CMakeLists.txt | 1 + clang/test/CMakeLists.txt | 1 + clang/test/Modules/crash-vfs-headermaps.m | 10 +- .../Inputs/headermap-rel/foo.hmap | Bin 804 -> 0 bytes .../Inputs/headermap-rel/foo.hmap.json | 6 + .../headermap-rel2/project-headers.hmap | Bin 108 -> 0 bytes .../headermap-rel2/project-headers.hmap.json | 6 + .../Inputs/nonportable-hmaps/foo.hmap | Bin 102 -> 0 bytes .../Inputs/nonportable-hmaps/foo.hmap.json | 6 + clang/test/Preprocessor/headermap-rel.c | 8 +- clang/test/Preprocessor/headermap-rel2.c | 9 +- .../nonportable-include-with-hmap.c | 4 +- clang/utils/hmaptool/CMakeLists.txt | 16 + clang/utils/hmaptool/hmaptool | 296 ++++++++++++++++++ 14 files changed, 344 insertions(+), 19 deletions(-) delete mode 100644 clang/test/Preprocessor/Inputs/headermap-rel/foo.hmap create mode 100644 clang/test/Preprocessor/Inputs/headermap-rel/foo.hmap.json delete mode 100644 clang/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap create mode 100644 clang/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap.json delete mode 100644 clang/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap create mode 100644 clang/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap.json create mode 100644 clang/utils/hmaptool/CMakeLists.txt create mode 100755 clang/utils/hmaptool/hmaptool diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index ab81ec34b00d..ae8835d751c0 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -753,6 +753,7 @@ endif() if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION) add_subdirectory(utils/ClangVisualizers) endif() +add_subdirectory(utils/hmaptool) configure_file( ${CLANG_SOURCE_DIR}/include/clang/Config/config.h.cmake diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 8efe3600c540..8d6db5348e08 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -54,6 +54,7 @@ list(APPEND CLANG_TEST_DEPS clang-rename clang-refactor clang-diff + hmaptool ) if(CLANG_ENABLE_STATIC_ANALYZER) diff --git a/clang/test/Modules/crash-vfs-headermaps.m b/clang/test/Modules/crash-vfs-headermaps.m index 4f88f3ba1197..d33534602800 100644 --- a/clang/test/Modules/crash-vfs-headermaps.m +++ b/clang/test/Modules/crash-vfs-headermaps.m @@ -1,15 +1,9 @@ // REQUIRES: crash-recovery, shell, system-darwin -// This uses a headermap with this entry: -// Foo.h -> Foo/Foo.h - -// Copy out the headermap from test/Preprocessor/Inputs/headermap-rel and avoid -// adding another binary format to the repository. - // RUN: rm -rf %t -// RUN: mkdir -p %t/m -// RUN: cp -a %S/../Preprocessor/Inputs/headermap-rel %t/i +// RUN: mkdir -p %t/m %t/i/Foo.framework/Headers // RUN: echo '// Foo.h' > %t/i/Foo.framework/Headers/Foo.h +// RUN: hmaptool write %S/../Preprocessor/Inputs/headermap-rel/foo.hmap.json %t/i/foo.hmap // RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \ // RUN: %clang -fsyntax-only -fmodules -fmodules-cache-path=%t/m %s \ diff --git a/clang/test/Preprocessor/Inputs/headermap-rel/foo.hmap b/clang/test/Preprocessor/Inputs/headermap-rel/foo.hmap deleted file mode 100644 index 783c64e67bb80a38f23845ed54fac43e2dc101a4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 804 ycmXR&%*|kAU|^77W?%r(4nWKa#KRSU{KyW(AbJ#xh5*hGaLdov%U}SK`V0V(7zHB$ diff --git a/clang/test/Preprocessor/Inputs/headermap-rel/foo.hmap.json b/clang/test/Preprocessor/Inputs/headermap-rel/foo.hmap.json new file mode 100644 index 000000000000..ccfd911f0f7f --- /dev/null +++ b/clang/test/Preprocessor/Inputs/headermap-rel/foo.hmap.json @@ -0,0 +1,6 @@ +{ + "mappings" : + { + "Foo.h" : "Foo/Foo.h" + } +} diff --git a/clang/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap b/clang/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap deleted file mode 100644 index a0770fb251242a3eec33dda98beab4f3d38adef8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108 zcmXR&%*|kAU|{e7Vi3&DdU3;?O;17dNI;^O?=)Qr@`l++@<42FQB{FKt<5`9!r E0L3N_r2qf` diff --git a/clang/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap.json b/clang/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap.json new file mode 100644 index 000000000000..e03703b1bb06 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/headermap-rel2/project-headers.hmap.json @@ -0,0 +1,6 @@ +{ + "mappings" : + { + "someheader.h" : "Product/someheader.h" + } +} diff --git a/clang/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap b/clang/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap deleted file mode 100644 index 9036f208711c4c0b8f0fc89c58648c63658b863f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 102 zcmXR&%*|kAU|{e7Vi3&&#DYMK3_#-CKrDeQh|YG)&({ZHy$pto)Wnq3qGJ6tpa=s* F2moPe3vU1b diff --git a/clang/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap.json b/clang/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap.json new file mode 100644 index 000000000000..c69f1df77253 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/nonportable-hmaps/foo.hmap.json @@ -0,0 +1,6 @@ +{ + "mappings" : + { + "Foo/Foo.h" : "headers/foo/Foo.h" + } +} diff --git a/clang/test/Preprocessor/headermap-rel.c b/clang/test/Preprocessor/headermap-rel.c index 38500a70f697..91f0d957e5a6 100644 --- a/clang/test/Preprocessor/headermap-rel.c +++ b/clang/test/Preprocessor/headermap-rel.c @@ -1,8 +1,6 @@ - -// This uses a headermap with this entry: -// Foo.h -> Foo/Foo.h - -// RUN: %clang_cc1 -E %s -o %t.i -I %S/Inputs/headermap-rel/foo.hmap -F %S/Inputs/headermap-rel +// RUN: rm -f %t.hmap +// RUN: hmaptool write %S/Inputs/headermap-rel/foo.hmap.json %t.hmap +// RUN: %clang_cc1 -E %s -o %t.i -I %t.hmap -F %S/Inputs/headermap-rel // RUN: FileCheck %s -input-file %t.i // CHECK: Foo.h is parsed diff --git a/clang/test/Preprocessor/headermap-rel2.c b/clang/test/Preprocessor/headermap-rel2.c index d61f3385b22d..172821efa425 100644 --- a/clang/test/Preprocessor/headermap-rel2.c +++ b/clang/test/Preprocessor/headermap-rel2.c @@ -1,8 +1,7 @@ -// This uses a headermap with this entry: -// someheader.h -> Product/someheader.h - -// RUN: %clang_cc1 -v -fsyntax-only %s -iquote %S/Inputs/headermap-rel2/project-headers.hmap -isystem %S/Inputs/headermap-rel2/system/usr/include -I %S/Inputs/headermap-rel2 -H -// RUN: %clang_cc1 -fsyntax-only %s -iquote %S/Inputs/headermap-rel2/project-headers.hmap -isystem %S/Inputs/headermap-rel2/system/usr/include -I %S/Inputs/headermap-rel2 -H 2> %t.out +// RUN: rm -f %t.hmap +// RUN: hmaptool write %S/Inputs/headermap-rel2/project-headers.hmap.json %t.hmap +// RUN: %clang_cc1 -v -fsyntax-only %s -iquote %t.hmap -isystem %S/Inputs/headermap-rel2/system/usr/include -I %S/Inputs/headermap-rel2 -H +// RUN: %clang_cc1 -fsyntax-only %s -iquote %t.hmap -isystem %S/Inputs/headermap-rel2/system/usr/include -I %S/Inputs/headermap-rel2 -H 2> %t.out // RUN: FileCheck %s -input-file %t.out // CHECK: Product/someheader.h diff --git a/clang/test/Preprocessor/nonportable-include-with-hmap.c b/clang/test/Preprocessor/nonportable-include-with-hmap.c index fc958e7e5e3f..0190d13498a2 100644 --- a/clang/test/Preprocessor/nonportable-include-with-hmap.c +++ b/clang/test/Preprocessor/nonportable-include-with-hmap.c @@ -1,5 +1,7 @@ +// RUN: rm -f %t.hmap +// RUN: hmaptool write %S/Inputs/nonportable-hmaps/foo.hmap.json %t.hmap // RUN: %clang_cc1 -Eonly \ -// RUN: -I%S/Inputs/nonportable-hmaps/foo.hmap \ +// RUN: -I%t.hmap \ // RUN: -I%S/Inputs/nonportable-hmaps \ // RUN: %s -verify // diff --git a/clang/utils/hmaptool/CMakeLists.txt b/clang/utils/hmaptool/CMakeLists.txt new file mode 100644 index 000000000000..f5cc7d856a40 --- /dev/null +++ b/clang/utils/hmaptool/CMakeLists.txt @@ -0,0 +1,16 @@ +set(CLANG_HMAPTOOL hmaptool) + +add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/bin/${CLANG_HMAPTOOL} + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/bin + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/${CLANG_HMAPTOOL} + ${CMAKE_BINARY_DIR}/bin/ + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${CLANG_HMAPTOOL}) + +list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${CLANG_HMAPTOOL}) +install(PROGRAMS ${CLANG_HMAPTOOL} DESTINATION bin) + +add_custom_target(hmaptool ALL DEPENDS ${Depends}) +set_target_properties(hmaptool PROPERTIES FOLDER "Utils") + diff --git a/clang/utils/hmaptool/hmaptool b/clang/utils/hmaptool/hmaptool new file mode 100755 index 000000000000..2b1ca7436c3f --- /dev/null +++ b/clang/utils/hmaptool/hmaptool @@ -0,0 +1,296 @@ +#!/usr/bin/env python +from __future__ import print_function + +import json +import optparse +import os +import struct +import sys + +### + +k_header_magic_LE = 'pamh' +k_header_magic_BE = 'hmap' + +def hmap_hash(str): + """hash(str) -> int + + Apply the "well-known" headermap hash function. + """ + + return sum((ord(c.lower()) * 13 + for c in str), 0) + +class HeaderMap(object): + @staticmethod + def frompath(path): + with open(path, 'rb') as f: + magic = f.read(4) + if magic == k_header_magic_LE: + endian_code = '<' + elif magic == k_header_magic_BE: + endian_code = '>' + else: + raise SystemExit("error: %s: not a headermap" % ( + path,)) + + # Read the header information. + header_fmt = endian_code + 'HHIIII' + header_size = struct.calcsize(header_fmt) + data = f.read(header_size) + if len(data) != header_size: + raise SystemExit("error: %s: truncated headermap header" % ( + path,)) + + (version, reserved, strtable_offset, num_entries, + num_buckets, max_value_len) = struct.unpack(header_fmt, data) + + if version != 1: + raise SystemExit("error: %s: unknown headermap version: %r" % ( + path, version)) + if reserved != 0: + raise SystemExit("error: %s: invalid reserved value in header" % ( + path,)) + + # The number of buckets must be a power of two. + if num_buckets == 0 or (num_buckets & num_buckets - 1) != 0: + raise SystemExit("error: %s: invalid number of buckets" % ( + path,)) + + # Read all of the buckets. + bucket_fmt = endian_code + 'III' + bucket_size = struct.calcsize(bucket_fmt) + buckets_data = f.read(num_buckets * bucket_size) + if len(buckets_data) != num_buckets * bucket_size: + raise SystemExit("error: %s: truncated headermap buckets" % ( + path,)) + buckets = [struct.unpack(bucket_fmt, + buckets_data[i*bucket_size:(i+1)*bucket_size]) + for i in range(num_buckets)] + + # Read the string table; the format doesn't explicitly communicate the + # size of the string table (which is dumb), so assume it is the rest of + # the file. + f.seek(0, 2) + strtable_size = f.tell() - strtable_offset + f.seek(strtable_offset) + + if strtable_size == 0: + raise SystemExit("error: %s: unable to read zero-sized string table"%( + path,)) + strtable = f.read(strtable_size) + + if len(strtable) != strtable_size: + raise SystemExit("error: %s: unable to read complete string table"%( + path,)) + if strtable[-1] != '\0': + raise SystemExit("error: %s: invalid string table in headermap" % ( + path,)) + + return HeaderMap(num_entries, buckets, strtable) + + def __init__(self, num_entries, buckets, strtable): + self.num_entries = num_entries + self.buckets = buckets + self.strtable = strtable + + def get_string(self, idx): + if idx >= len(self.strtable): + raise SystemExit("error: %s: invalid string index" % ( + path,)) + end_idx = self.strtable.index('\0', idx) + return self.strtable[idx:end_idx] + + @property + def mappings(self): + for key_idx,prefix_idx,suffix_idx in self.buckets: + if key_idx == 0: + continue + yield (self.get_string(key_idx), + self.get_string(prefix_idx) + self.get_string(suffix_idx)) + +### + +def action_dump(name, args): + "dump a headermap file" + + parser = optparse.OptionParser("%%prog %s [options] " % ( + name,)) + parser.add_option("-v", "--verbose", dest="verbose", + help="show more verbose output [%default]", + action="store_true", default=False) + (opts, args) = parser.parse_args(args) + + if len(args) != 1: + parser.error("invalid number of arguments") + + path, = args + + hmap = HeaderMap.frompath(path) + + # Dump all of the buckets. + print ('Header Map: %s' % (path,)) + if opts.verbose: + print ('headermap: %r' % (path,)) + print (' num entries: %d' % (hmap.num_entries,)) + print (' num buckets: %d' % (len(hmap.buckets),)) + print (' string table size: %d' % (len(hmap.strtable),)) + for i,bucket in enumerate(hmap.buckets): + key_idx,prefix_idx,suffix_idx = bucket + + if key_idx == 0: + continue + + # Get the strings. + key = hmap.get_string(key_idx) + prefix = hmap.get_string(prefix_idx) + suffix = hmap.get_string(suffix_idx) + + print (" bucket[%d]: %r -> (%r, %r) -- %d" % ( + i, key, prefix, suffix, (hmap_hash(key) & (num_buckets - 1)))) + else: + mappings = sorted(hmap.mappings) + for key,value in mappings: + print ("%s -> %s" % (key, value)) + print () + +def next_power_of_two(value): + if value < 0: + raise ArgumentError + return 1 if value == 0 else 2**(value - 1).bit_length() + +def action_write(name, args): + "write a headermap file from a JSON definition" + + parser = optparse.OptionParser("%%prog %s [options] " % ( + name,)) + (opts, args) = parser.parse_args(args) + + if len(args) != 2: + parser.error("invalid number of arguments") + + input_path,output_path = args + + with open(input_path, "r") as f: + input_data = json.load(f) + + # Compute the headermap contents, we make a table that is 1/3 full. + mappings = input_data['mappings'] + num_buckets = next_power_of_two(len(mappings) * 3) + + table = [(0, 0, 0) + for i in range(num_buckets)] + max_value_len = 0 + strtable = "\0" + for key,value in mappings.items(): + if not isinstance(key, str): + key = key.decode('utf-8') + if not isinstance(value, str): + value = value.decode('utf-8') + max_value_len = max(max_value_len, len(value)) + + key_idx = len(strtable) + strtable += key + '\0' + prefix = os.path.dirname(value) + '/' + suffix = os.path.basename(value) + prefix_idx = len(strtable) + strtable += prefix + '\0' + suffix_idx = len(strtable) + strtable += suffix + '\0' + + hash = hmap_hash(key) + for i in range(num_buckets): + idx = (hash + i) % num_buckets + if table[idx][0] == 0: + table[idx] = (key_idx, prefix_idx, suffix_idx) + break + else: + raise RuntimeError + + endian_code = '<' + magic = k_header_magic_LE + magic_size = 4 + header_fmt = endian_code + 'HHIIII' + header_size = struct.calcsize(header_fmt) + bucket_fmt = endian_code + 'III' + bucket_size = struct.calcsize(bucket_fmt) + strtable_offset = magic_size + header_size + num_buckets * bucket_size + header = (1, 0, strtable_offset, len(mappings), + num_buckets, max_value_len) + + # Write out the headermap. + with open(output_path, 'wb') as f: + f.write(magic.encode()) + f.write(struct.pack(header_fmt, *header)) + for bucket in table: + f.write(struct.pack(bucket_fmt, *bucket)) + f.write(strtable.encode()) + +def action_tovfs(name, args): + "convert a headermap to a VFS layout" + + parser = optparse.OptionParser("%%prog %s [options] " % ( + name,)) + parser.add_option("", "--build-path", dest="build_path", + help="build path prefix", + action="store", type=str) + (opts, args) = parser.parse_args(args) + + if len(args) != 2: + parser.error("invalid number of arguments") + if opts.build_path is None: + parser.error("--build-path is required") + + input_path,output_path = args + + hmap = HeaderMap.frompath(input_path) + + # Create the table for all the objects. + vfs = {} + vfs['version'] = 0 + build_dir_contents = [] + vfs['roots'] = [{ + 'name' : opts.build_path, + 'type' : 'directory', + 'contents' : build_dir_contents }] + + # We assume we are mapping framework paths, so a key of "Foo/Bar.h" maps to + # "/Foo.framework/Headers/Bar.h". + for key,value in hmap.mappings: + # If this isn't a framework style mapping, ignore it. + components = key.split('/') + if len(components) != 2: + continue + framework_name,header_name = components + build_dir_contents.append({ + 'name' : '%s.framework/Headers/%s' % (framework_name, + header_name), + 'type' : 'file', + 'external-contents' : value }) + + with open(output_path, 'w') as f: + json.dump(vfs, f, indent=2) + +commands = dict((name[7:].replace("_","-"), f) + for name,f in locals().items() + if name.startswith('action_')) + +def usage(): + print ("Usage: %s command [options]" % ( + os.path.basename(sys.argv[0])), file=sys.stderr) + print (file=sys.stderr) + print ("Available commands:", file=sys.stderr) + cmds_width = max(map(len, commands)) + for name,func in sorted(commands.items()): + print (" %-*s - %s" % (cmds_width, name, func.__doc__), file=sys.stderr) + sys.exit(1) + +def main(): + if len(sys.argv) < 2 or sys.argv[1] not in commands: + usage() + + cmd = sys.argv[1] + commands[cmd](cmd, sys.argv[2:]) + +if __name__ == '__main__': + main()