[ubsan] Teach the pointer overflow check that "p - <unsigned> <= p" (PR33430)

The pointer overflow check gives false negatives when dealing with expressions in which an unsigned value is subtracted from a pointer. This is summarized in PR33430 [1]: ubsan permits the result of the subtraction to be greater than "p", but it should not. To fix the issue, we should track whether or not the pointer expression is a subtraction. If it is, and the indices are unsigned, we know to expect "p - <unsigned> <= p". I've tested this by running check-{llvm,clang} with a stage2 ubsan-enabled build. I've also added some tests to compiler-rt, which are in D34122. [1] https://bugs.llvm.org/show_bug.cgi?id=33430 Differential Revision: https://reviews.llvm.org/D34121 llvm-svn: 307955
2017-07-13 20:55:26 +00:00 · 2017-07-13 20:55:26 +00:00 · 175b6d1f28
parent 982060b0a4
commit 175b6d1f28
4 changed files with 44 additions and 20 deletions
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@ -3052,7 +3052,9 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF,
                                          SourceLocation loc,
                                    const llvm::Twine &name = "arrayidx") {
  if (inbounds) {
-    return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, loc, name);
+    return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices,
+                                      CodeGenFunction::NotSubtraction, loc,
+                                      name);
  } else {
    return CGF.Builder.CreateGEP(ptr, indices, name);
  }
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@ -1851,7 +1851,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
  llvm::Value *input;

  int amount = (isInc ? 1 : -1);
-  bool signedIndex = !isInc;
+  bool isSubtraction = !isInc;

  if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
    type = atomicTy->getValueType();
@ -1941,8 +1941,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
      if (CGF.getLangOpts().isSignedOverflowDefined())
        value = Builder.CreateGEP(value, numElts, "vla.inc");
      else
-        value = CGF.EmitCheckedInBoundsGEP(value, numElts, signedIndex,
-                                           E->getExprLoc(), "vla.inc");
+        value = CGF.EmitCheckedInBoundsGEP(
+            value, numElts, /*SignedIndices=*/false, isSubtraction,
+            E->getExprLoc(), "vla.inc");

    // Arithmetic on function pointers (!) is just +-1.
    } else if (type->isFunctionType()) {
@ -1952,8 +1953,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
      if (CGF.getLangOpts().isSignedOverflowDefined())
        value = Builder.CreateGEP(value, amt, "incdec.funcptr");
      else
-        value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex,
-                                           E->getExprLoc(), "incdec.funcptr");
+        value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false,
+                                           isSubtraction, E->getExprLoc(),
+                                           "incdec.funcptr");
      value = Builder.CreateBitCast(value, input->getType());

    // For everything else, we can just do a simple increment.
@ -1962,8 +1964,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
      if (CGF.getLangOpts().isSignedOverflowDefined())
        value = Builder.CreateGEP(value, amt, "incdec.ptr");
      else
-        value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex,
-                                           E->getExprLoc(), "incdec.ptr");
+        value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false,
+                                           isSubtraction, E->getExprLoc(),
+                                           "incdec.ptr");
    }

  // Vector increment/decrement.
@ -2044,7 +2047,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
    if (CGF.getLangOpts().isSignedOverflowDefined())
      value = Builder.CreateGEP(value, sizeValue, "incdec.objptr");
    else
-      value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, signedIndex,
+      value = CGF.EmitCheckedInBoundsGEP(value, sizeValue,
+                                         /*SignedIndices=*/false, isSubtraction,
                                         E->getExprLoc(), "incdec.objptr");
    value = Builder.CreateBitCast(value, input->getType());
  }
@ -2663,7 +2667,6 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
  }

  bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType();
-  bool mayHaveNegativeGEPIndex = isSigned || isSubtraction;

  unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth();
  auto &DL = CGF.CGM.getDataLayout();
@ -2715,7 +2718,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
    } else {
      index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index");
      pointer =
-          CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex,
+          CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction,
                                     op.E->getExprLoc(), "add.ptr");
    }
    return pointer;
@ -2733,7 +2736,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
  if (CGF.getLangOpts().isSignedOverflowDefined())
    return CGF.Builder.CreateGEP(pointer, index, "add.ptr");

-  return CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex,
+  return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction,
                                    op.E->getExprLoc(), "add.ptr");
 }

@ -2807,7 +2810,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
 Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
  if (op.LHS->getType()->isPointerTy() ||
      op.RHS->getType()->isPointerTy())
-    return emitPointerArithmetic(CGF, op, /*subtraction*/ false);
+    return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction);

  if (op.Ty->isSignedIntegerOrEnumerationType()) {
    switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
@ -2878,7 +2881,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
  // If the RHS is not a pointer, then we have normal pointer
  // arithmetic.
  if (!op.RHS->getType()->isPointerTy())
-    return emitPointerArithmetic(CGF, op, /*subtraction*/ true);
+    return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction);

  // Otherwise, this is a pointer subtraction.

@ -3853,6 +3856,7 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue(
 Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr,
                                               ArrayRef<Value *> IdxList,
                                               bool SignedIndices,
+                                               bool IsSubtraction,
                                               SourceLocation Loc,
                                               const Twine &Name) {
  Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name);
@ -3958,15 +3962,19 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr,
  // pointer matches the sign of the total offset.
  llvm::Value *ValidGEP;
  auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows);
-  auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
  if (SignedIndices) {
+    auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
    auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero);
    llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr);
    ValidGEP = Builder.CreateAnd(
        Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid),
        NoOffsetOverflow);
-  } else {
+  } else if (!SignedIndices && !IsSubtraction) {
+    auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr);
    ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow);
+  } else {
+    auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr);
+    ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow);
  }

  llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)};
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@ -3589,12 +3589,19 @@ public:
  /// nonnull, if \p LHS is marked _Nonnull.
  void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc);

+  /// An enumeration which makes it easier to specify whether or not an
+  /// operation is a subtraction.
+  enum { NotSubtraction = false, IsSubtraction = true };
+
  /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to
  /// detect undefined behavior when the pointer overflow sanitizer is enabled.
  /// \p SignedIndices indicates whether any of the GEP indices are signed.
+  /// \p IsSubtraction indicates whether the expression used to form the GEP
+  /// is a subtraction.
  llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr,
                                      ArrayRef<llvm::Value *> IdxList,
                                      bool SignedIndices,
+                                      bool IsSubtraction,
                                      SourceLocation Loc,
                                      const Twine &Name = "");

--- a/clang/test/CodeGen/ubsan-pointer-overflow.m
+++ b/clang/test/CodeGen/ubsan-pointer-overflow.m
@ -10,16 +10,20 @@ void unary_arith(char *p) {
  ++p;

  // CHECK: ptrtoint i8* {{.*}} to i64, !nosanitize
-  // CHECK-NEXT: add i64 {{.*}}, -1, !nosanitize
-  // CHECK: select i1 false{{.*}}, !nosanitize
+  // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 {{.*}}, -1, !nosanitize
+  // CHECK: [[NEGVALID:%.*]] = icmp ule i64 [[COMPGEP]], {{.*}}, !nosanitize
+  // CHECK-NOT: select
+  // CHECK: br i1 [[NEGVALID]]{{.*}}, !nosanitize
  // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
  --p;

+  // CHECK: icmp uge i64
  // CHECK-NOT: select
  // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
  p++;

-  // CHECK: select
+  // CHECK: icmp ule i64
+  // CHECK-NOT: select
  // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
  p--;
 }
@ -64,7 +68,8 @@ void binary_arith_unsigned(char *p, unsigned i) {

  // CHECK: [[OFFSET:%.*]] = sub i64 0, {{.*}}
  // CHECK-NEXT: getelementptr inbounds {{.*}} [[OFFSET]]
-  // CHECK: select
+  // CHECK: icmp ule i64
+  // CHECK-NOT: select
  // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
  p - i;
 }
@ -121,8 +126,10 @@ void pointer_array(int **arr, int k) {

 // CHECK-LABEL: define void @pointer_array_unsigned_indices
 void pointer_array_unsigned_indices(int **arr, unsigned k) {
+  // CHECK: icmp uge
  // CHECK-NOT: select
  // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
+  // CHECK: icmp uge
  // CHECK-NOT: select
  // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}}
  arr[k][k];