[asan] extend asan-coverage (still experimental).

- add a mode for collecting per-block coverage (-asan-coverage=2). So far the implementation is naive (all blocks are instrumented), the performance overhead on top of asan could be as high as 30%. - Make sure the one-time calls to __sanitizer_cov are moved to function buttom, which in turn required to copy the original debug info into the call insn. Here is the performance data on SPEC 2006 (train data, comparing asan with asan-coverage={0,1,2}): asan+cov0 asan+cov1 diff 0-1 asan+cov2 diff 0-2 diff 1-2 400.perlbench, 65.60, 65.80, 1.00, 76.20, 1.16, 1.16 401.bzip2, 65.10, 65.50, 1.01, 75.90, 1.17, 1.16 403.gcc, 1.64, 1.69, 1.03, 2.04, 1.24, 1.21 429.mcf, 21.90, 22.60, 1.03, 23.20, 1.06, 1.03 445.gobmk, 166.00, 169.00, 1.02, 205.00, 1.23, 1.21 456.hmmer, 88.30, 87.90, 1.00, 91.00, 1.03, 1.04 458.sjeng, 210.00, 222.00, 1.06, 258.00, 1.23, 1.16 462.libquantum, 1.73, 1.75, 1.01, 2.11, 1.22, 1.21 464.h264ref, 147.00, 152.00, 1.03, 160.00, 1.09, 1.05 471.omnetpp, 115.00, 116.00, 1.01, 140.00, 1.22, 1.21 473.astar, 133.00, 131.00, 0.98, 142.00, 1.07, 1.08 483.xalancbmk, 118.00, 120.00, 1.02, 154.00, 1.31, 1.28 433.milc, 19.80, 20.00, 1.01, 20.10, 1.02, 1.01 444.namd, 16.20, 16.20, 1.00, 17.60, 1.09, 1.09 447.dealII, 41.80, 42.20, 1.01, 43.50, 1.04, 1.03 450.soplex, 7.51, 7.82, 1.04, 8.25, 1.10, 1.05 453.povray, 14.00, 14.40, 1.03, 15.80, 1.13, 1.10 470.lbm, 33.30, 34.10, 1.02, 34.10, 1.02, 1.00 482.sphinx3, 12.40, 12.30, 0.99, 13.00, 1.05, 1.06 llvm-svn: 199488
2014-01-17 11:00:30 +00:00 · 2014-01-17 11:00:30 +00:00 · 714c67c31e
parent 7677760ec8
commit 714c67c31e
4 changed files with 82 additions and 47 deletions
--- a/compiler-rt/lib/sanitizer_common/sanitizer_coverage.cc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_coverage.cc
@ -11,15 +11,15 @@
 // This file implements run-time support for a poor man's coverage tool.
 //
 // Compiler instrumentation:
-// For every function F the compiler injects the following code:
+// For every interesting basic block the compiler injects the following code:
 // if (*Guard) {
-//    __sanitizer_cov(&F);
+//    __sanitizer_cov();
 //    *Guard = 1;
 // }
-// It's fine to call __sanitizer_cov more than once for a given function.
+// It's fine to call __sanitizer_cov more than once for a given block.
 //
 // Run-time:
-//  - __sanitizer_cov(pc): record that we've executed a given PC.
+//  - __sanitizer_cov(): record that we've executed the PC (GET_CALLER_PC).
 //  - __sanitizer_cov_dump: dump the coverage data to disk.
 //  For every module of the current process that has coverage data
 //  this will create a file module_name.PID.sancov. The file format is simple:
@ -37,6 +37,7 @@
 #include "sanitizer_libc.h"
 #include "sanitizer_mutex.h"
 #include "sanitizer_procmaps.h"
+#include "sanitizer_stacktrace.h"
 #include "sanitizer_flags.h"

 struct CovData {
@ -105,8 +106,8 @@ void CovDump() {
 }  // namespace __sanitizer

 extern "C" {
-SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov(void *pc) {
-  CovAdd(reinterpret_cast<uptr>(pc));
+SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov() {
+  CovAdd(StackTrace::GetPreviousInstructionPc(GET_CALLER_PC()));
 }
 SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump() { CovDump(); }
 }  // extern "C"
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@ -99,7 +99,7 @@ extern "C" {
  void __sanitizer_report_error_summary(const char *error_summary);

  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump();
-  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov(void *pc);
+  SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov();
  SANITIZER_INTERFACE_ATTRIBUTE
  void __sanitizer_annotate_contiguous_container(const void *beg,
                                                 const void *end,
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@ -33,6 +33,7 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/InstVisitor.h"
@ -130,8 +131,9 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
 // This flag may need to be replaced with -f[no]asan-globals.
 static cl::opt<bool> ClGlobals("asan-globals",
       cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClCoverage("asan-coverage",
-       cl::desc("ASan coverage"), cl::Hidden, cl::init(false));
+static cl::opt<int> ClCoverage("asan-coverage",
+       cl::desc("ASan coverage. 0: none, 1: entry block, 2: all blocks"),
+       cl::Hidden, cl::init(false));
 static cl::opt<bool> ClInitializers("asan-initialization-order",
       cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClMemIntrin("asan-memintrin",
@ -320,7 +322,8 @@ struct AddressSanitizer : public FunctionPass {
  bool LooksLikeCodeInBug11395(Instruction *I);
  void FindDynamicInitializers(Module &M);
  bool GlobalIsLinkerInitialized(GlobalVariable *G);
-  bool InjectCoverage(Function &F);
+  bool InjectCoverage(Function &F, const ArrayRef<BasicBlock*> AllBlocks);
+  void InjectCoverageAtBlock(Function &F, BasicBlock &BB);

  bool CheckInitOrder;
  bool CheckUseAfterReturn;
@ -1076,7 +1079,7 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
  AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
      kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
  AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanCovName, IRB.getVoidTy(), IntptrTy, NULL));
+      kAsanCovName, IRB.getVoidTy(), NULL));
  // We insert an empty inline asm after __asan_report* to avoid callback merge.
  EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                            StringRef(""), StringRef(""),
@ -1148,33 +1151,11 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
  return false;
 }

-// Poor man's coverage that works with ASan.
-// We create a Guard boolean variable with the same linkage
-// as the function and inject this code into the entry block:
-// if (*Guard) {
-//    __sanitizer_cov(&F);
-//    *Guard = 1;
-// }
-// The accesses to Guard are atomic. The rest of the logic is
-// in __sanitizer_cov (it's fine to call it more than once).
-//
-// This coverage implementation provides very limited data:
-// it only tells if a given function was ever executed.
-// No counters, no per-basic-block or per-edge data.
-// But for many use cases this is what we need and the added slowdown
-// is negligible. This simple implementation will probably be obsoleted
-// by the upcoming Clang-based coverage implementation.
-// By having it here and now we hope to
-//  a) get the functionality to users earlier and
-//  b) collect usage statistics to help improve Clang coverage design.
-bool AddressSanitizer::InjectCoverage(Function &F) {
-  if (!ClCoverage) return false;
-
+void AddressSanitizer::InjectCoverageAtBlock(Function &F, BasicBlock &BB) {
+  BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
  // Skip static allocas at the top of the entry block so they don't become
  // dynamic when we split the block.  If we used our optimized stack layout,
  // then there will only be one alloca and it will come first.
-  BasicBlock &Entry = F.getEntryBlock();
-  BasicBlock::iterator IP = Entry.getFirstInsertionPt(), BE = Entry.end();
  for (; IP != BE; ++IP) {
    AllocaInst *AI = dyn_cast<AllocaInst>(IP);
    if (!AI || !AI->isStaticAlloca())
@ -1190,14 +1171,48 @@ bool AddressSanitizer::InjectCoverage(Function &F) {
  Load->setAtomic(Monotonic);
  Load->setAlignment(1);
  Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
-  Instruction *Ins = SplitBlockAndInsertIfThen(Cmp, IP, false);
+  Instruction *Ins = SplitBlockAndInsertIfThen(
+      Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
  IRB.SetInsertPoint(Ins);
  // We pass &F to __sanitizer_cov. We could avoid this and rely on
  // GET_CALLER_PC, but having the PC of the first instruction is just nice.
-  IRB.CreateCall(AsanCovFunction, IRB.CreatePointerCast(&F, IntptrTy));
+  Instruction *Call = IRB.CreateCall(AsanCovFunction);
+  Call->setDebugLoc(IP->getDebugLoc());
  StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
  Store->setAtomic(Monotonic);
  Store->setAlignment(1);
+}
+
+// Poor man's coverage that works with ASan.
+// We create a Guard boolean variable with the same linkage
+// as the function and inject this code into the entry block (-asan-coverage=1)
+// or all blocks (-asan-coverage=2):
+// if (*Guard) {
+//    __sanitizer_cov(&F);
+//    *Guard = 1;
+// }
+// The accesses to Guard are atomic. The rest of the logic is
+// in __sanitizer_cov (it's fine to call it more than once).
+//
+// This coverage implementation provides very limited data:
+// it only tells if a given function (block) was ever executed.
+// No counters, no per-edge data.
+// But for many use cases this is what we need and the added slowdown
+// is negligible. This simple implementation will probably be obsoleted
+// by the upcoming Clang-based coverage implementation.
+// By having it here and now we hope to
+//  a) get the functionality to users earlier and
+//  b) collect usage statistics to help improve Clang coverage design.
+bool AddressSanitizer::InjectCoverage(Function &F,
+                                      const ArrayRef<BasicBlock *> AllBlocks) {
+  if (!ClCoverage) return false;
+
+  if (ClCoverage == 1) {
+    InjectCoverageAtBlock(F, F.getEntryBlock());
+  } else {
+    for (size_t i = 0, n = AllBlocks.size(); i < n; i++)
+      InjectCoverageAtBlock(F, *AllBlocks[i]);
+  }
  return true;
 }

@ -1222,12 +1237,14 @@ bool AddressSanitizer::runOnFunction(Function &F) {
  SmallSet<Value*, 16> TempsToInstrument;
  SmallVector<Instruction*, 16> ToInstrument;
  SmallVector<Instruction*, 8> NoReturnCalls;
+  SmallVector<BasicBlock*, 16> AllBlocks;
  int NumAllocas = 0;
  bool IsWrite;

  // Fill the set of memory operations to instrument.
  for (Function::iterator FI = F.begin(), FE = F.end();
       FI != FE; ++FI) {
+    AllBlocks.push_back(FI);
    TempsToInstrument.clear();
    int NumInsnsPerBB = 0;
    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
@ -1297,7 +1314,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {

  bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();

-  if (InjectCoverage(F))
+  if (InjectCoverage(F, AllBlocks))
    res = true;

  DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
--- a/llvm/test/Instrumentation/AddressSanitizer/coverage.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/coverage.ll
@ -1,13 +1,30 @@
-; RUN: opt < %s -asan -asan-coverage=1 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-coverage=1 -S | FileCheck %s --check-prefix=CHECK1
+; RUN: opt < %s -asan -asan-coverage=2 -S | FileCheck %s --check-prefix=CHECK2
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
-define i32 @foo(i32* %a) sanitize_address {
+define void @foo(i32* %a) sanitize_address {
 entry:
-  ret i32 0
+  %tobool = icmp eq i32* %a, null
+  br i1 %tobool, label %if.end, label %if.then
+
+  if.then:                                          ; preds = %entry
+  store i32 0, i32* %a, align 4
+  br label %if.end
+
+  if.end:                                           ; preds = %entry, %if.then
+  ret void
 }
-; CHECK: define i32 @foo(i32* %a) #0 {
-; CHECK: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1
-; CHECK: %1 = icmp eq i8 0, %0
-; CHECK: br i1 %1, label %2, label %3
-; CHECK: call void @__sanitizer_cov(i64 ptrtoint (i32 (i32*)* @foo to i64))
-; CHECK: store atomic i8 1, i8* @__asan_gen_cov_foo monotonic, align 1
+; CHECK1-LABEL: define void @foo
+; CHECK1: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1
+; CHECK1: %1 = icmp eq i8 0, %0
+; CHECK1: br i1 %1, label %2, label %3
+; CHECK1: call void @__sanitizer_cov
+; CHECK1-NOT: call void @__sanitizer_cov
+; CHECK1: store atomic i8 1, i8* @__asan_gen_cov_foo monotonic, align 1
+
+; CHECK2-LABEL: define void @foo
+; CHECK2: call void @__sanitizer_cov
+; CHECK2: call void @__sanitizer_cov
+; CHECK2: call void @__sanitizer_cov
+; CHECK2-NOT: call void @__sanitizer_cov
+; CHECK2: ret void