From 4f8a832c1914b61edf5cf75c9137ce79ab9e39a5 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Tue, 13 Dec 2011 23:17:12 +0000 Subject: [PATCH] Support/FileSystem: Add file_magic and move a vew clients over to it. llvm-svn: 146523 --- llvm/include/llvm/Support/FileSystem.h | 45 +++++++++- llvm/lib/Archive/ArchiveWriter.cpp | 6 +- llvm/lib/Support/Path.cpp | 24 ++--- llvm/lib/Support/PathV2.cpp | 116 ++++++++++++++++++++++++- 4 files changed, 172 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h index 264ec49aaa90..a8857fbd0325 100644 --- a/llvm/include/llvm/Support/FileSystem.h +++ b/llvm/include/llvm/Support/FileSystem.h @@ -32,12 +32,12 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/PathV1.h" #include "llvm/Support/system_error.h" #include #include #include #include +#include #if HAVE_SYS_STAT_H #include @@ -121,6 +121,44 @@ public: void type(file_type v) { Type = v; } }; +/// file_magic - An "enum class" enumeration of file types based on magic (the first +/// N bytes of the file). +struct file_magic { + enum _ { + unknown = 0, ///< Unrecognized file + bitcode, ///< Bitcode file + archive, ///< ar style archive file + elf_relocatable, ///< ELF Relocatable object file + elf_executable, ///< ELF Executable image + elf_shared_object, ///< ELF dynamically linked shared lib + elf_core, ///< ELF core image + macho_object, ///< Mach-O Object file + macho_executable, ///< Mach-O Executable + macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM + macho_core, ///< Mach-O Core File + macho_preload_executabl, ///< Mach-O Preloaded Executable + macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib + macho_dynamic_linker, ///< The Mach-O dynamic linker + macho_bundle, ///< Mach-O Bundle file + macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub + macho_dsym_companion, ///< Mach-O dSYM companion file + coff_object, ///< COFF object file + pecoff_executable ///< PECOFF executable file + }; + + bool is_object() const { + return v_ == unknown ? false : true; + } + + file_magic() : v_(unknown) {} + file_magic(_ v) : v_(v) {} + explicit file_magic(int v) : v_(_(v)) {} + operator int() const {return v_;} + +private: + int v_; +}; + /// @} /// @name Physical Operators /// @{ @@ -419,13 +457,16 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result); error_code get_magic(const Twine &path, uint32_t len, SmallVectorImpl &result); +/// @brief Identify the type of a binary file based on how magical it is. +file_magic identify_magic(StringRef magic); + /// @brief Get and identify \a path's type based on its content. /// /// @param path Input path. /// @param result Set to the type of file, or LLVMFileType::Unknown_FileType. /// @results errc::success if result has been successfully set, otherwise a /// platform specific error_code. -error_code identify_magic(const Twine &path, LLVMFileType &result); +error_code identify_magic(const Twine &path, file_magic &result); /// @brief Get library paths the system linker uses. /// diff --git a/llvm/lib/Archive/ArchiveWriter.cpp b/llvm/lib/Archive/ArchiveWriter.cpp index 8fcc7aa29cc8..9ef29432ddf2 100644 --- a/llvm/lib/Archive/ArchiveWriter.cpp +++ b/llvm/lib/Archive/ArchiveWriter.cpp @@ -182,11 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where, if (hasSlash || filePath.str().length() > 15) flags |= ArchiveMember::HasLongFilenameFlag; - sys::LLVMFileType type; + sys::fs::file_magic type; if (sys::fs::identify_magic(mbr->path.str(), type)) - type = sys::Unknown_FileType; + type = sys::fs::file_magic::unknown; switch (type) { - case sys::Bitcode_FileType: + case sys::fs::file_magic::bitcode: flags |= ArchiveMember::BitcodeFlag; break; default: diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index 9d8decc28cd3..dcddeda977d1 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -152,31 +152,31 @@ sys::IdentifyFileType(const char *magic, unsigned length) { bool Path::isArchive() const { - LLVMFileType type; + fs::file_magic type; if (fs::identify_magic(str(), type)) return false; - return type == Archive_FileType; + return type == fs::file_magic::archive; } bool Path::isDynamicLibrary() const { - LLVMFileType type; + fs::file_magic type; if (fs::identify_magic(str(), type)) return false; switch (type) { default: return false; - case Mach_O_FixedVirtualMemorySharedLib_FileType: - case Mach_O_DynamicallyLinkedSharedLib_FileType: - case Mach_O_DynamicallyLinkedSharedLibStub_FileType: - case ELF_SharedObject_FileType: - case COFF_FileType: return true; + case fs::file_magic::macho_fixed_virtual_memory_shared_lib: + case fs::file_magic::macho_dynamically_linked_shared_lib: + case fs::file_magic::macho_dynamically_linked_shared_lib_stub: + case fs::file_magic::elf_shared_object: + case fs::file_magic::pecoff_executable: return true; } } bool Path::isObjectFile() const { - LLVMFileType type; - if (fs::identify_magic(str(), type) || type == Unknown_FileType) + fs::file_magic type; + if (fs::identify_magic(str(), type) || type == fs::file_magic::unknown) return false; return true; } @@ -212,10 +212,10 @@ Path::appendSuffix(StringRef suffix) { bool Path::isBitcodeFile() const { - LLVMFileType type; + fs::file_magic type; if (fs::identify_magic(str(), type)) return false; - return type == Bitcode_FileType; + return type == fs::file_magic::bitcode; } bool Path::hasMagicNumber(StringRef Magic) const { diff --git a/llvm/lib/Support/PathV2.cpp b/llvm/lib/Support/PathV2.cpp index 9cf16dab38e7..7cc434bde744 100644 --- a/llvm/lib/Support/PathV2.cpp +++ b/llvm/lib/Support/PathV2.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/PathV2.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include #include @@ -738,13 +739,124 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) { return success; } -error_code identify_magic(const Twine &path, LLVMFileType &result) { +/// @brief Identify the magic in magic. +file_magic identify_magic(StringRef magic) { + switch ((unsigned char)magic[0]) { + case 0xDE: // 0x0B17C0DE = BC wraper + if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && + magic[3] == (char)0x0B) + return file_magic::bitcode; + break; + case 'B': + if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE) + return file_magic::bitcode; + break; + case '!': + if (magic.size() >= 8) + if (memcmp(magic.data(),"!\n",8) == 0) + return file_magic::archive; + break; + + case '\177': + if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { + if (magic.size() >= 18 && magic[17] == 0) + switch (magic[16]) { + default: break; + case 1: return file_magic::elf_relocatable; + case 2: return file_magic::elf_executable; + case 3: return file_magic::elf_shared_object; + case 4: return file_magic::elf_core; + } + } + break; + + case 0xCA: + if (magic[1] == char(0xFE) && magic[2] == char(0xBA) && + magic[3] == char(0xBE)) { + // This is complicated by an overlap with Java class files. + // See the Mach-O section in /usr/share/file/magic for details. + if (magic.size() >= 8 && magic[7] < 43) + // FIXME: Universal Binary of any type. + return file_magic::macho_dynamically_linked_shared_lib; + } + break; + + // The two magic numbers for mach-o are: + // 0xfeedface - 32-bit mach-o + // 0xfeedfacf - 64-bit mach-o + case 0xFE: + case 0xCE: + case 0xCF: { + uint16_t type = 0; + if (magic[0] == char(0xFE) && magic[1] == char(0xED) && + magic[2] == char(0xFA) && + (magic[3] == char(0xCE) || magic[3] == char(0xCF))) { + /* Native endian */ + if (magic.size() >= 16) type = magic[14] << 8 | magic[15]; + } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) && + magic[1] == char(0xFA) && magic[2] == char(0xED) && + magic[3] == char(0xFE)) { + /* Reverse endian */ + if (magic.size() >= 14) type = magic[13] << 8 | magic[12]; + } + switch (type) { + default: break; + case 1: return file_magic::macho_object; + case 2: return file_magic::macho_executable; + case 3: return file_magic::macho_fixed_virtual_memory_shared_lib; + case 4: return file_magic::macho_core; + case 5: return file_magic::macho_preload_executabl; + case 6: return file_magic::macho_dynamically_linked_shared_lib; + case 7: return file_magic::macho_dynamic_linker; + case 8: return file_magic::macho_bundle; + case 9: return file_magic::macho_dynamic_linker; + case 10: return file_magic::macho_dsym_companion; + } + break; + } + case 0xF0: // PowerPC Windows + case 0x83: // Alpha 32-bit + case 0x84: // Alpha 64-bit + case 0x66: // MPS R4000 Windows + case 0x50: // mc68K + case 0x4c: // 80386 Windows + if (magic[1] == 0x01) + return file_magic::coff_object; + + case 0x90: // PA-RISC Windows + case 0x68: // mc68K Windows + if (magic[1] == 0x02) + return file_magic::coff_object; + break; + + case 0x4d: // Possible MS-DOS stub on Windows PE file + if (magic[1] == 0x5a) { + uint32_t off = + *reinterpret_cast(magic.data() + 0x3c); + // PE/COFF file, either EXE or DLL. + if (off < magic.size() && memcmp(magic.data() + off, "PE\0\0",4) == 0) + return file_magic::pecoff_executable; + } + break; + + case 0x64: // x86-64 Windows. + if (magic[1] == char(0x86)) + return file_magic::coff_object; + break; + + default: + break; + } + return file_magic::unknown; +} + +error_code identify_magic(const Twine &path, file_magic &result) { SmallString<32> Magic; error_code ec = get_magic(path, Magic.capacity(), Magic); if (ec && ec != errc::value_too_large) return ec; - result = IdentifyFileType(Magic.data(), Magic.size()); + result = identify_magic(Magic); return success; }