[msan] Heuristically instrument unknown intrinsics.

This changes adds shadow and origin propagation for unknown intrinsics by examining the arguments and ModRef behaviour. For now, only 3 classes of intrinsics are handled: - those that look like simple SIMD store - those that look like simple SIMD load - those that don't have memory effects and look like arithmetic/logic/whatever operation on simple types. llvm-svn: 170530
2012-12-19 11:22:04 +00:00 · 2012-12-19 11:22:04 +00:00 · d7571cd4bc
parent fd41b5b969
commit d7571cd4bc
2 changed files with 217 additions and 3 deletions
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@ -1210,6 +1210,147 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
    VAHelper->visitVACopyInst(I);
  }
  enum IntrinsicKind {
    IK_DoesNotAccessMemory,
    IK_OnlyReadsMemory,
    IK_WritesMemory
  };
  static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) {
    const int DoesNotAccessMemory = IK_DoesNotAccessMemory;
    const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory;
    const int OnlyReadsMemory = IK_OnlyReadsMemory;
    const int OnlyAccessesArgumentPointees = IK_WritesMemory;
    const int UnknownModRefBehavior = IK_WritesMemory;
 #define GET_INTRINSIC_MODREF_BEHAVIOR
 #define ModRefBehavior IntrinsicKind
 #include "llvm/Intrinsics.gen"
 #undef ModRefBehavior
 #undef GET_INTRINSIC_MODREF_BEHAVIOR
  }
  /// \brief Handle vector store-like intrinsics.
  ///
  /// Instrument intrinsics that look like a simple SIMD store: writes memory,
  /// has 1 pointer argument and 1 vector argument, returns void.
  bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
    IRBuilder<> IRB(&I);
    Value* Addr = I.getArgOperand(0);
    Value *Shadow = getShadow(&I, 1);
    Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
    // We don't know the pointer alignment (could be unaligned SSE store!).
    // Have to assume to worst case.
    IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
    if (ClCheckAccessAddress)
      insertCheck(Addr, &I);
    // FIXME: use ClStoreCleanOrigin
    // FIXME: factor out common code from materializeStores
    if (ClTrackOrigins)
      IRB.CreateStore(getOrigin(&I, 1), getOriginPtr(Addr, IRB));
    return true;
  }
  /// \brief Handle vector load-like intrinsics.
  ///
  /// Instrument intrinsics that look like a simple SIMD load: reads memory,
  /// has 1 pointer argument, returns a vector.
  bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
    IRBuilder<> IRB(&I);
    Value *Addr = I.getArgOperand(0);
    Type *ShadowTy = getShadowTy(&I);
    Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
    // We don't know the pointer alignment (could be unaligned SSE load!).
    // Have to assume to worst case.
    setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld"));
    if (ClCheckAccessAddress)
      insertCheck(Addr, &I);
    if (ClTrackOrigins)
      setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
    return true;
  }
  /// \brief Handle (SIMD arithmetic)-like intrinsics.
  ///
  /// Instrument intrinsics with any number of arguments of the same type,
  /// equal to the return type. The type should be simple (no aggregates or
  /// pointers; vectors are fine).
  /// Caller guarantees that this intrinsic does not access memory.
  bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
    Type *RetTy = I.getType();
    if (!(RetTy->isIntOrIntVectorTy() ||
          RetTy->isFPOrFPVectorTy() ||
          RetTy->isX86_MMXTy()))
      return false;
    unsigned NumArgOperands = I.getNumArgOperands();
    for (unsigned i = 0; i < NumArgOperands; ++i) {
      Type *Ty = I.getArgOperand(i)->getType();
      if (Ty != RetTy)
        return false;
    }
    IRBuilder<> IRB(&I);
    ShadowAndOriginCombiner SC(this, IRB);
    for (unsigned i = 0; i < NumArgOperands; ++i)
      SC.Add(I.getArgOperand(i));
    SC.Done(&I);
    return true;
  }
  /// \brief Heuristically instrument unknown intrinsics.
  ///
  /// The main purpose of this code is to do something reasonable with all
  /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
  /// We recognize several classes of intrinsics by their argument types and
  /// ModRefBehaviour and apply special intrumentation when we are reasonably
  /// sure that we know what the intrinsic does.
  ///
  /// We special-case intrinsics where this approach fails. See llvm.bswap
  /// handling as an example of that.
  bool handleUnknownIntrinsic(IntrinsicInst &I) {
    unsigned NumArgOperands = I.getNumArgOperands();
    if (NumArgOperands == 0)
      return false;
    Intrinsic::ID iid = I.getIntrinsicID();
    IntrinsicKind IK = getIntrinsicKind(iid);
    bool OnlyReadsMemory = IK == IK_OnlyReadsMemory;
    bool WritesMemory = IK == IK_WritesMemory;
    assert(!(OnlyReadsMemory && WritesMemory));
    if (NumArgOperands == 2 &&
        I.getArgOperand(0)->getType()->isPointerTy() &&
        I.getArgOperand(1)->getType()->isVectorTy() &&
        I.getType()->isVoidTy() &&
        WritesMemory) {
      // This looks like a vector store.
      return handleVectorStoreIntrinsic(I);
    }
    if (NumArgOperands == 1 &&
        I.getArgOperand(0)->getType()->isPointerTy() &&
        I.getType()->isVectorTy() &&
        OnlyReadsMemory) {
      // This looks like a vector load.
      return handleVectorLoadIntrinsic(I);
    }
    if (!OnlyReadsMemory && !WritesMemory)
      if (maybeHandleSimpleNomemIntrinsic(I))
        return true;
    // FIXME: detect and handle SSE maskstore/maskload
    return false;
  }
  void handleBswap(IntrinsicInst &I) {
    IRBuilder<> IRB(&I);
    Value *Op = I.getArgOperand(0);
@ -1226,7 +1367,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
      handleBswap(I);
      break;
    default:
-      visitInstruction(I);
+      if (!handleUnknownIntrinsic(I))
        visitInstruction(I);
      break;
    }
  }
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
@ -1,5 +1,5 @@
-; RUN: opt < %s -msan -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; RUN: opt < %s -msan -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK-ORIGINS %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK-ORIGINS %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 ; Check the presence of __msan_init
@ -408,6 +408,7 @@ define <4 x i32> @ShuffleVector(<4 x i32> %vec, <4 x i32> %vec1) {
 ; CHECK: shufflevector
 ; CHECK: ret <4 x i32>
 ; Test bswap intrinsic instrumentation
 define i32 @BSwap(i32 %x) nounwind uwtable readnone {
  %y = tail call i32 @llvm.bswap.i32(i32 %x)
@ -423,3 +424,74 @@ declare i32 @llvm.bswap.i32(i32) nounwind readnone
 ; CHECK: @llvm.bswap.i32
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: ret i32
 ; Store intrinsic.
 define void @StoreIntrinsic(i8* %p, <4 x float> %x) nounwind uwtable {
  call void @llvm.x86.sse.storeu.ps(i8* %p, <4 x float> %x)
  ret void
 }
 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
 ; CHECK: @StoreIntrinsic
 ; CHECK-NOT: br
 ; CHECK-NOT: = or
 ; CHECK: store <4 x i32> {{.*}} align 1
 ; CHECK: call void @llvm.x86.sse.storeu.ps
 ; CHECK: ret void
 ; Load intrinsic.
 define <16 x i8> @LoadIntrinsic(i8* %p) nounwind uwtable {
  %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p)
  ret <16 x i8> %call
 }
 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p) nounwind
 ; CHECK: @LoadIntrinsic
 ; CHECK: load <16 x i8>* {{.*}} align 1
 ; CHECK-NOT: br
 ; CHECK-NOT: = or
 ; CHECK: call <16 x i8> @llvm.x86.sse3.ldu.dq
 ; CHECK: store <16 x i8> {{.*}} @__msan_retval_tls
 ; CHECK: ret <16 x i8>
 ; CHECK-ORIGINS: @LoadIntrinsic
 ; CHECK-ORIGINS: [[ORIGIN:%[01-9a-z]+]] = load i32* {{.*}}
 ; CHECK-ORIGINS: call <16 x i8> @llvm.x86.sse3.ldu.dq
 ; CHECK-ORIGINS: store i32 {{.*}}[[ORIGIN]], i32* @__msan_retval_origin_tls
 ; CHECK-ORIGINS: ret <16 x i8>
 ; Simple NoMem intrinsic
 ; Check that shadow is OR'ed, and origin is Select'ed
 ; And no shadow checks!
 define <8 x i16> @Paddsw128(<8 x i16> %a, <8 x i16> %b) nounwind uwtable {
  %call = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %call
 }
 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) nounwind
 ; CHECK: @Paddsw128
 ; CHECK-NEXT: load <8 x i16>* {{.*}} @__msan_param_tls
 ; CHECK-NEXT: load <8 x i16>* {{.*}} @__msan_param_tls
 ; CHECK-NEXT: = or <8 x i16>
 ; CHECK-NEXT: call <8 x i16> @llvm.x86.sse2.padds.w
 ; CHECK-NEXT: store <8 x i16> {{.*}} @__msan_retval_tls
 ; CHECK-NEXT: ret <8 x i16>
 ; CHECK-ORIGINS: @Paddsw128
 ; CHECK-ORIGINS: load i32* {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS: load i32* {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS: = bitcast <8 x i16> {{.*}} to i128
 ; CHECK-ORIGINS-NEXT: = icmp ne i128 {{.*}}, 0
 ; CHECK-ORIGINS-NEXT: = select i1 {{.*}}, i32 {{.*}}, i32
 ; CHECK-ORIGINS: call <8 x i16> @llvm.x86.sse2.padds.w
 ; CHECK-ORIGINS: store i32 {{.*}} @__msan_retval_origin_tls
 ; CHECK-ORIGINS: ret <8 x i16>