forked from OSchip/llvm-project
150 lines
5.1 KiB
C++
150 lines
5.1 KiB
C++
//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This program finds similar sections of a Module, and exports them as a JSON
|
|
// file.
|
|
//
|
|
// To find similarities contained across multiple modules, please use llvm-link
|
|
// first to merge the modules.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Analysis/IRSimilarityIdentifier.h"
|
|
#include "llvm/IRReader/IRReader.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/InitLLVM.h"
|
|
#include "llvm/Support/JSON.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
#include "llvm/Support/ToolOutputFile.h"
|
|
|
|
using namespace llvm;
|
|
using namespace IRSimilarity;
|
|
|
|
static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
|
|
cl::init("-"),
|
|
cl::value_desc("filename"));
|
|
|
|
static cl::opt<std::string> InputSourceFile(cl::Positional,
|
|
cl::desc("<Source file>"),
|
|
cl::init("-"),
|
|
cl::value_desc("filename"));
|
|
|
|
/// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
|
|
///
|
|
/// \param I - The Instruction to find the instruction number for.
|
|
/// \param LLVMInstNum - The mapping of Instructions to their location in the
|
|
/// module represented by an unsigned integer.
|
|
/// \returns The instruction number for \p I if it exists.
|
|
Optional<unsigned>
|
|
getPositionInModule(const Instruction *I,
|
|
const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
|
|
assert(I && "Instruction is nullptr!");
|
|
DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
|
|
if (It == LLVMInstNum.end())
|
|
return None;
|
|
return It->second;
|
|
}
|
|
|
|
/// Exports the given SimilarityGroups to a JSON file at \p FilePath.
|
|
///
|
|
/// \param FilePath - The path to the output location.
|
|
/// \param SimSections - The similarity groups to process.
|
|
/// \param LLVMInstNum - The mapping of Instructions to their location in the
|
|
/// module represented by an unsigned integer.
|
|
/// \returns A nonzero error code if there was a failure creating the file.
|
|
std::error_code
|
|
exportToFile(const StringRef FilePath,
|
|
const SimilarityGroupList &SimSections,
|
|
const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
|
|
std::error_code EC;
|
|
std::unique_ptr<ToolOutputFile> Out(
|
|
new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
|
|
if (EC)
|
|
return EC;
|
|
|
|
json::OStream J(Out->os(), 1);
|
|
J.objectBegin();
|
|
|
|
unsigned SimOption = 1;
|
|
// Process each list of SimilarityGroups organized by the Module.
|
|
for (const SimilarityGroup &G : SimSections) {
|
|
std::string SimOptionStr = std::to_string(SimOption);
|
|
J.attributeBegin(SimOptionStr);
|
|
J.arrayBegin();
|
|
// For each file there is a list of the range where the similarity
|
|
// exists.
|
|
for (const IRSimilarityCandidate &C : G) {
|
|
Optional<unsigned> Start =
|
|
getPositionInModule((*C.front()).Inst, LLVMInstNum);
|
|
Optional<unsigned> End =
|
|
getPositionInModule((*C.back()).Inst, LLVMInstNum);
|
|
|
|
assert(Start.hasValue() &&
|
|
"Could not find instruction number for first instruction");
|
|
assert(End.hasValue() &&
|
|
"Could not find instruction number for last instruction");
|
|
|
|
J.object([&] {
|
|
J.attribute("start", Start.getValue());
|
|
J.attribute("end", End.getValue());
|
|
});
|
|
}
|
|
J.arrayEnd();
|
|
J.attributeEnd();
|
|
SimOption++;
|
|
}
|
|
J.objectEnd();
|
|
|
|
Out->keep();
|
|
|
|
return EC;
|
|
}
|
|
|
|
int main(int argc, const char *argv[]) {
|
|
InitLLVM X(argc, argv);
|
|
|
|
cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
|
|
|
|
LLVMContext CurrContext;
|
|
SMDiagnostic Err;
|
|
std::unique_ptr<Module> ModuleToAnalyze =
|
|
parseIRFile(InputSourceFile, Err, CurrContext);
|
|
|
|
if (!ModuleToAnalyze) {
|
|
Err.print(argv[0], errs());
|
|
return 1;
|
|
}
|
|
|
|
// Mapping from an Instruction pointer to its occurrence in a sequential
|
|
// list of all the Instructions in a Module.
|
|
DenseMap<Instruction *, unsigned> LLVMInstNum;
|
|
|
|
// We give each instruction a number, which gives us a start and end value
|
|
// for the beginning and end of each IRSimilarityCandidate.
|
|
unsigned InstructionNumber = 1;
|
|
for (Function &F : *ModuleToAnalyze)
|
|
for (BasicBlock &BB : F)
|
|
for (Instruction &I : BB.instructionsWithoutDebug())
|
|
LLVMInstNum[&I]= InstructionNumber++;
|
|
|
|
// The similarity identifier we will use to find the similar sections.
|
|
IRSimilarityIdentifier SimIdent;
|
|
SimilarityGroupList SimilaritySections =
|
|
SimIdent.findSimilarity(*ModuleToAnalyze);
|
|
|
|
std::error_code E =
|
|
exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
|
|
if (E) {
|
|
errs() << argv[0] << ": " << E.message() << '\n';
|
|
return 2;
|
|
}
|
|
|
|
return 0;
|
|
}
|