From 50aa37b96cf9686b87ef675f3614ab5980e33d3b Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Mon, 27 Feb 2017 06:22:17 +0000 Subject: [PATCH] llvm-mc-fuzzer: add support for assembly This creates an llvm-mc-disassemble-fuzzer from the existing llvm-mc-fuzzer and finishing the assemble support in llvm-mc-assemble-fuzzer. llvm-svn: 296323 --- .../llvm-mc-assemble-fuzzer/CMakeLists.txt | 19 ++ .../llvm-mc-assemble-fuzzer.cpp | 313 ++++++++++++++++++ .../CMakeLists.txt | 9 +- .../llvm-mc-disassemble-fuzzer.cpp} | 24 +- 4 files changed, 342 insertions(+), 23 deletions(-) create mode 100644 llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt create mode 100644 llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp rename llvm/tools/{llvm-mc-fuzzer => llvm-mc-disassemble-fuzzer}/CMakeLists.txt (64%) rename llvm/tools/{llvm-mc-fuzzer/llvm-mc-fuzzer.cpp => llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp} (89%) diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt b/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt new file mode 100644 index 000000000000..c5fb62166cfd --- /dev/null +++ b/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt @@ -0,0 +1,19 @@ +if( LLVM_USE_SANITIZE_COVERAGE ) + include_directories(BEFORE + ${CMAKE_CURRENT_SOURCE_DIR}/../../lib/Fuzzer) + + set(LLVM_LINK_COMPONENTS + AllTargetsAsmPrinters + AllTargetsAsmParsers + AllTargetsDescs + AllTargetsInfos + MC + MCParser + Support + ) + add_llvm_tool(llvm-mc-assemble-fuzzer + llvm-mc-assemble-fuzzer.cpp) + target_link_libraries(llvm-mc-assemble-fuzzer + LLVMFuzzer + ) +endif() diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp new file mode 100644 index 000000000000..0344d8cd8c9a --- /dev/null +++ b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp @@ -0,0 +1,313 @@ +//===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "FuzzerInterface.h" +#include "llvm-c/Target.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ToolOutputFile.h" + +using namespace llvm; + +static cl::opt + TripleName("triple", cl::desc("Target triple to assemble for, " + "see -version for available targets")); + +static cl::opt + MCPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::value_desc("cpu-name"), cl::init("")); + +// This is useful for variable-length instruction sets. +static cl::opt InsnLimit( + "insn-limit", + cl::desc("Limit the number of instructions to process (0 for no limit)"), + cl::value_desc("count"), cl::init(0)); + +static cl::list + MAttrs("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes (-mattr=help for details)"), + cl::value_desc("a1,+a2,-a3,...")); +// The feature string derived from -mattr's values. +std::string FeaturesStr; + +static cl::list + FuzzerArgs("fuzzer-args", cl::Positional, + cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, + cl::PositionalEatsArgs); +static std::vector ModifiedArgv; + +enum OutputFileType { + OFT_Null, + OFT_AssemblyFile, + OFT_ObjectFile +}; +static cl::opt +FileType("filetype", cl::init(OFT_AssemblyFile), + cl::desc("Choose an output file type:"), + cl::values( + clEnumValN(OFT_AssemblyFile, "asm", + "Emit an assembly ('.s') file"), + clEnumValN(OFT_Null, "null", + "Don't emit anything (for timing purposes)"), + clEnumValN(OFT_ObjectFile, "obj", + "Emit a native object ('.o') file"))); + + +class LLVMFuzzerInputBuffer : public MemoryBuffer +{ + public: + LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_) + : Data(reinterpret_cast(data_)), + Size(size_) { + init(Data, Data+Size, false); + } + + + virtual BufferKind getBufferKind() const { + return MemoryBuffer_Malloc; // it's not disk-backed so I think that's + // the intent ... though AFAIK it + // probably came from an mmap or sbrk + } + + private: + const char *Data; + size_t Size; +}; + +static int AssembleInput(const char *ProgName, const Target *TheTarget, + SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, + MCAsmInfo &MAI, MCSubtargetInfo &STI, + MCInstrInfo &MCII, MCTargetOptions &MCOptions) { + static const bool NoInitialTextSection = false; + + std::unique_ptr Parser( + createMCAsmParser(SrcMgr, Ctx, Str, MAI)); + + std::unique_ptr TAP( + TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); + + if (!TAP) { + errs() << ProgName + << ": error: this target '" << TripleName + << "', does not support assembly parsing.\n"; + abort(); + } + + Parser->setTargetParser(*TAP); + + return Parser->Run(NoInitialTextSection); +} + + +int AssembleOneInput(const uint8_t *Data, size_t Size) { + const bool ShowInst = false; + const bool AsmVerbose = false; + const bool UseDwarfDirectory = true; + + Triple TheTriple(Triple::normalize(TripleName)); + + SourceMgr SrcMgr; + + std::unique_ptr BufferPtr(new LLVMFuzzerInputBuffer(Data, Size)); + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc()); + + static const std::vector NoIncludeDirs; + SrcMgr.setIncludeDirs(NoIncludeDirs); + + static std::string ArchName; + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, + Error); + if (!TheTarget) { + errs() << "error: this target '" << TheTriple.normalize() + << "/" << ArchName << "', was not found: '" << Error << "'\n"; + + abort(); + } + + std::unique_ptr MRI(TheTarget->createMCRegInfo(TripleName)); + if (!MRI) { + errs() << "Unable to create target register info!"; + abort(); + } + + std::unique_ptr MAI(TheTarget->createMCAsmInfo(*MRI, TripleName)); + if (!MAI) { + errs() << "Unable to create target asm info!"; + abort(); + } + + + MCObjectFileInfo MOFI; + MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr); + + static const bool UsePIC = false; + static const CodeModel::Model CMModel = CodeModel::Default; + MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, CMModel, Ctx); + + const unsigned OutputAsmVariant = 0; + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant, + *MAI, *MCII, *MRI); + if (!IP) { + errs() + << "error: unable to create instruction printer for target triple '" + << TheTriple.normalize() << "' with assembly variant " + << OutputAsmVariant << ".\n"; + + abort(); + } + + const char *ProgName = "llvm-mc-fuzzer"; + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr)); + MCCodeEmitter *CE = nullptr; + MCAsmBackend *MAB = nullptr; + + MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); + + std::string OutputString; + raw_string_ostream Out(OutputString); + auto FOut = llvm::make_unique(Out); + + std::unique_ptr Str; + + if (FileType == OFT_AssemblyFile) { + Str.reset(TheTarget->createAsmStreamer( + Ctx, std::move(FOut), AsmVerbose, + UseDwarfDirectory, IP, CE, MAB, ShowInst)); + } else { + assert(FileType == OFT_ObjectFile && "Invalid file type!"); + + std::error_code EC; + const std::string OutputFilename = "-"; + auto Out = llvm::make_unique(OutputFilename, EC, + sys::fs::F_None); + if (EC) { + errs() << EC.message() << '\n'; + abort(); + } + + // Don't waste memory on names of temp labels. + Ctx.setUseNamesOnTempLabels(false); + + std::unique_ptr BOS; + raw_pwrite_stream *OS = &Out->os(); + if (!Out->os().supportsSeeking()) { + BOS = make_unique(Out->os()); + OS = BOS.get(); + } + + MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx); + MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU, + MCOptions); + Str.reset(TheTarget->createMCObjectStreamer( + TheTriple, Ctx, *MAB, *OS, CE, *STI, MCOptions.MCRelaxAll, + MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ false)); + } + const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI, + *MCII, MCOptions); + + (void) Res; + + return 0; +} + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + return AssembleOneInput(Data, Size); +} + +int LLVMFuzzerInitialize(int *argc, char ***argv) { + // The command line is unusual compared to other fuzzers due to the need to + // specify the target. Options like -triple, -mcpu, and -mattr work like + // their counterparts in llvm-mc, while -fuzzer-args collects options for the + // fuzzer itself. + // + // Examples: + // + // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to + // 4-bytes each and use the contents of ./corpus as the test corpus: + // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ + // -fuzzer-args -max_len=4 -runs=100000 ./corpus + // + // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA + // feature enabled using up to 64-byte inputs: + // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ + // -disassemble -fuzzer-args ./corpus + // + // If your aim is to find instructions that are not tested, then it is + // advisable to constrain the maximum input size to a single instruction + // using -max_len as in the first example. This results in a test corpus of + // individual instructions that test unique paths. Without this constraint, + // there will be considerable redundancy in the corpus. + + char **OriginalArgv = *argv; + + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllAsmParsers(); + + cl::ParseCommandLineOptions(*argc, OriginalArgv); + + // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that + // the driver can parse its arguments. + // + // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. + // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a + // non-const buffer to avoid the need to clean up when the fuzzer terminates. + ModifiedArgv.push_back(OriginalArgv[0]); + for (const auto &FuzzerArg : FuzzerArgs) { + for (int i = 1; i < *argc; ++i) { + if (FuzzerArg == OriginalArgv[i]) + ModifiedArgv.push_back(OriginalArgv[i]); + } + } + *argc = ModifiedArgv.size(); + *argv = ModifiedArgv.data(); + + // Package up features to be passed to target/subtarget + // We have to pass it via a global since the callback doesn't + // permit any user data. + if (MAttrs.size()) { + SubtargetFeatures Features; + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + FeaturesStr = Features.getString(); + } + + if (TripleName.empty()) + TripleName = sys::getDefaultTargetTriple(); + + return 0; +} diff --git a/llvm/tools/llvm-mc-fuzzer/CMakeLists.txt b/llvm/tools/llvm-mc-disassemble-fuzzer/CMakeLists.txt similarity index 64% rename from llvm/tools/llvm-mc-fuzzer/CMakeLists.txt rename to llvm/tools/llvm-mc-disassemble-fuzzer/CMakeLists.txt index b42b3eee3c98..c539f823e57f 100644 --- a/llvm/tools/llvm-mc-fuzzer/CMakeLists.txt +++ b/llvm/tools/llvm-mc-disassemble-fuzzer/CMakeLists.txt @@ -3,16 +3,19 @@ if( LLVM_USE_SANITIZE_COVERAGE ) ${CMAKE_CURRENT_SOURCE_DIR}/../../lib/Fuzzer) set(LLVM_LINK_COMPONENTS + AllTargetsAsmPrinters AllTargetsDescs AllTargetsDisassemblers AllTargetsInfos MC MCDisassembler + MCParser Support ) - add_llvm_tool(llvm-mc-fuzzer - llvm-mc-fuzzer.cpp) - target_link_libraries(llvm-mc-fuzzer + add_llvm_tool(llvm-mc-disassemble-fuzzer + llvm-mc-disassemble-fuzzer.cpp) + + target_link_libraries(llvm-mc-disassemble-fuzzer LLVMFuzzer ) endif() diff --git a/llvm/tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp b/llvm/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp similarity index 89% rename from llvm/tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp rename to llvm/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp index e31ea762add5..643afe64073e 100644 --- a/llvm/tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp +++ b/llvm/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp @@ -20,19 +20,6 @@ using namespace llvm; const unsigned AssemblyTextBufSize = 80; -enum ActionType { - AC_Assemble, - AC_Disassemble -}; - -static cl::opt -Action(cl::desc("Action to perform:"), - cl::init(AC_Assemble), - cl::values(clEnumValN(AC_Assemble, "assemble", - "Assemble a .s file (default)"), - clEnumValN(AC_Disassemble, "disassemble", - "Disassemble strings of hex bytes"))); - static cl::opt TripleName("triple", cl::desc("Target triple to assemble for, " "see -version for available targets")); @@ -88,13 +75,7 @@ int DisassembleOneInput(const uint8_t *Data, size_t Size) { } int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Action == AC_Assemble) - errs() << "error: -assemble is not implemented\n"; - else if (Action == AC_Disassemble) - return DisassembleOneInput(Data, Size); - - llvm_unreachable("Unknown action"); - return 0; + return DisassembleOneInput(Data, Size); } int LLVMFuzzerInitialize(int *argc, char ***argv) { @@ -155,5 +136,8 @@ int LLVMFuzzerInitialize(int *argc, char ***argv) { FeaturesStr = Features.getString(); } + if (TripleName.empty()) + TripleName = sys::getDefaultTargetTriple(); + return 0; }