diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ae63977377d1..d399e6c12f03 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1210,6 +1210,147 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     VAHelper->visitVACopyInst(I);
   }
 
+  enum IntrinsicKind {
+    IK_DoesNotAccessMemory,
+    IK_OnlyReadsMemory,
+    IK_WritesMemory
+  };
+
+  static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) {
+    const int DoesNotAccessMemory = IK_DoesNotAccessMemory;
+    const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory;
+    const int OnlyReadsMemory = IK_OnlyReadsMemory;
+    const int OnlyAccessesArgumentPointees = IK_WritesMemory;
+    const int UnknownModRefBehavior = IK_WritesMemory;
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#define ModRefBehavior IntrinsicKind
+#include "llvm/Intrinsics.gen"
+#undef ModRefBehavior
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+  }
+
+  /// \brief Handle vector store-like intrinsics.
+  ///
+  /// Instrument intrinsics that look like a simple SIMD store: writes memory,
+  /// has 1 pointer argument and 1 vector argument, returns void.
+  bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value* Addr = I.getArgOperand(0);
+    Value *Shadow = getShadow(&I, 1);
+    Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
+
+    // We don't know the pointer alignment (could be unaligned SSE store!).
+    // Have to assume to worst case.
+    IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
+
+    if (ClCheckAccessAddress)
+      insertCheck(Addr, &I);
+
+    // FIXME: use ClStoreCleanOrigin
+    // FIXME: factor out common code from materializeStores
+    if (ClTrackOrigins)
+      IRB.CreateStore(getOrigin(&I, 1), getOriginPtr(Addr, IRB));
+    return true;
+  }
+
+  /// \brief Handle vector load-like intrinsics.
+  ///
+  /// Instrument intrinsics that look like a simple SIMD load: reads memory,
+  /// has 1 pointer argument, returns a vector.
+  bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Addr = I.getArgOperand(0);
+
+    Type *ShadowTy = getShadowTy(&I);
+    Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+    // We don't know the pointer alignment (could be unaligned SSE load!).
+    // Have to assume to worst case.
+    setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld"));
+
+    if (ClCheckAccessAddress)
+      insertCheck(Addr, &I);
+
+    if (ClTrackOrigins)
+      setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
+    return true;
+  }
+
+  /// \brief Handle (SIMD arithmetic)-like intrinsics.
+  ///
+  /// Instrument intrinsics with any number of arguments of the same type,
+  /// equal to the return type. The type should be simple (no aggregates or
+  /// pointers; vectors are fine).
+  /// Caller guarantees that this intrinsic does not access memory.
+  bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
+    Type *RetTy = I.getType();
+    if (!(RetTy->isIntOrIntVectorTy() ||
+          RetTy->isFPOrFPVectorTy() ||
+          RetTy->isX86_MMXTy()))
+      return false;
+
+    unsigned NumArgOperands = I.getNumArgOperands();
+
+    for (unsigned i = 0; i < NumArgOperands; ++i) {
+      Type *Ty = I.getArgOperand(i)->getType();
+      if (Ty != RetTy)
+        return false;
+    }
+
+    IRBuilder<> IRB(&I);
+    ShadowAndOriginCombiner SC(this, IRB);
+    for (unsigned i = 0; i < NumArgOperands; ++i)
+      SC.Add(I.getArgOperand(i));
+    SC.Done(&I);
+
+    return true;
+  }
+
+  /// \brief Heuristically instrument unknown intrinsics.
+  ///
+  /// The main purpose of this code is to do something reasonable with all
+  /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
+  /// We recognize several classes of intrinsics by their argument types and
+  /// ModRefBehaviour and apply special intrumentation when we are reasonably
+  /// sure that we know what the intrinsic does.
+  ///
+  /// We special-case intrinsics where this approach fails. See llvm.bswap
+  /// handling as an example of that.
+  bool handleUnknownIntrinsic(IntrinsicInst &I) {
+    unsigned NumArgOperands = I.getNumArgOperands();
+    if (NumArgOperands == 0)
+      return false;
+
+    Intrinsic::ID iid = I.getIntrinsicID();
+    IntrinsicKind IK = getIntrinsicKind(iid);
+    bool OnlyReadsMemory = IK == IK_OnlyReadsMemory;
+    bool WritesMemory = IK == IK_WritesMemory;
+    assert(!(OnlyReadsMemory && WritesMemory));
+
+    if (NumArgOperands == 2 &&
+        I.getArgOperand(0)->getType()->isPointerTy() &&
+        I.getArgOperand(1)->getType()->isVectorTy() &&
+        I.getType()->isVoidTy() &&
+        WritesMemory) {
+      // This looks like a vector store.
+      return handleVectorStoreIntrinsic(I);
+    }
+
+    if (NumArgOperands == 1 &&
+        I.getArgOperand(0)->getType()->isPointerTy() &&
+        I.getType()->isVectorTy() &&
+        OnlyReadsMemory) {
+      // This looks like a vector load.
+      return handleVectorLoadIntrinsic(I);
+    }
+
+    if (!OnlyReadsMemory && !WritesMemory)
+      if (maybeHandleSimpleNomemIntrinsic(I))
+        return true;
+
+    // FIXME: detect and handle SSE maskstore/maskload
+    return false;
+  }
+
   void handleBswap(IntrinsicInst &I) {
     IRBuilder<> IRB(&I);
     Value *Op = I.getArgOperand(0);
@@ -1226,7 +1367,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleBswap(I);
       break;
     default:
-      visitInstruction(I);
+      if (!handleUnknownIntrinsic(I))
+        visitInstruction(I);
       break;
     }
   }
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
index b6dcd16662c9..1e2be00e7e7c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -msan -S | FileCheck %s
-; RUN: opt < %s -msan -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK-ORIGINS %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK-ORIGINS %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Check the presence of __msan_init
@@ -408,6 +408,7 @@ define <4 x i32> @ShuffleVector(<4 x i32> %vec, <4 x i32> %vec1) {
 ; CHECK: shufflevector
 ; CHECK: ret <4 x i32>
 
+
 ; Test bswap intrinsic instrumentation
 define i32 @BSwap(i32 %x) nounwind uwtable readnone {
   %y = tail call i32 @llvm.bswap.i32(i32 %x)
@@ -423,3 +424,74 @@ declare i32 @llvm.bswap.i32(i32) nounwind readnone
 ; CHECK: @llvm.bswap.i32
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: ret i32
+
+
+; Store intrinsic.
+
+define void @StoreIntrinsic(i8* %p, <4 x float> %x) nounwind uwtable {
+  call void @llvm.x86.sse.storeu.ps(i8* %p, <4 x float> %x)
+  ret void
+}
+
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+; CHECK: @StoreIntrinsic
+; CHECK-NOT: br
+; CHECK-NOT: = or
+; CHECK: store <4 x i32> {{.*}} align 1
+; CHECK: call void @llvm.x86.sse.storeu.ps
+; CHECK: ret void
+
+
+; Load intrinsic.
+
+define <16 x i8> @LoadIntrinsic(i8* %p) nounwind uwtable {
+  %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p)
+  ret <16 x i8> %call
+}
+
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p) nounwind
+
+; CHECK: @LoadIntrinsic
+; CHECK: load <16 x i8>* {{.*}} align 1
+; CHECK-NOT: br
+; CHECK-NOT: = or
+; CHECK: call <16 x i8> @llvm.x86.sse3.ldu.dq
+; CHECK: store <16 x i8> {{.*}} @__msan_retval_tls
+; CHECK: ret <16 x i8>
+
+; CHECK-ORIGINS: @LoadIntrinsic
+; CHECK-ORIGINS: [[ORIGIN:%[01-9a-z]+]] = load i32* {{.*}}
+; CHECK-ORIGINS: call <16 x i8> @llvm.x86.sse3.ldu.dq
+; CHECK-ORIGINS: store i32 {{.*}}[[ORIGIN]], i32* @__msan_retval_origin_tls
+; CHECK-ORIGINS: ret <16 x i8>
+
+
+; Simple NoMem intrinsic
+; Check that shadow is OR'ed, and origin is Select'ed
+; And no shadow checks!
+
+define <8 x i16> @Paddsw128(<8 x i16> %a, <8 x i16> %b) nounwind uwtable {
+  %call = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %call
+}
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) nounwind
+
+; CHECK: @Paddsw128
+; CHECK-NEXT: load <8 x i16>* {{.*}} @__msan_param_tls
+; CHECK-NEXT: load <8 x i16>* {{.*}} @__msan_param_tls
+; CHECK-NEXT: = or <8 x i16>
+; CHECK-NEXT: call <8 x i16> @llvm.x86.sse2.padds.w
+; CHECK-NEXT: store <8 x i16> {{.*}} @__msan_retval_tls
+; CHECK-NEXT: ret <8 x i16>
+
+; CHECK-ORIGINS: @Paddsw128
+; CHECK-ORIGINS: load i32* {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS: load i32* {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS: = bitcast <8 x i16> {{.*}} to i128
+; CHECK-ORIGINS-NEXT: = icmp ne i128 {{.*}}, 0
+; CHECK-ORIGINS-NEXT: = select i1 {{.*}}, i32 {{.*}}, i32
+; CHECK-ORIGINS: call <8 x i16> @llvm.x86.sse2.padds.w
+; CHECK-ORIGINS: store i32 {{.*}} @__msan_retval_origin_tls
+; CHECK-ORIGINS: ret <8 x i16>