[ValueTracking] Interpret GEPs as a series of adds multiplied by the related scaling factor

Prior to this patch, computeKnownBits would only try to deduce trailing zeros bits for getelementptrs. This patch adds the logic to treat geps as a series of add * scaling factor. Thanks to this patch, using a gep or performing an address computation directly "by hand" (ptrtoint followed by adds and mul followed by inttoptr) offers the same computeKnownBits information. Previously, the "by hand" approach would have given more information. This is related to https://llvm.org/PR47241. Differential Revision: https://reviews.llvm.org/D86364
2020-10-20 14:43:25 -07:00 · 2020-10-20 14:43:25 -07:00 · ee6abef532
parent e97e9851b2
commit ee6abef532
4 changed files with 154 additions and 35 deletions
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@ -1358,24 +1358,32 @@ static void computeKnownBitsFromOperator(const Operator *I,
  case Instruction::GetElementPtr: {
    // Analyze all of the subscripts of this getelementptr instruction
    // to determine if we can prove known low zero bits.
-    KnownBits LocalKnown(BitWidth);
-    computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
-    unsigned TrailZ = LocalKnown.countMinTrailingZeros();
+    computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+    // Accumulate the constant indices in a separate variable
+    // to minimize the number of calls to computeForAddSub.
+    APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);

    gep_type_iterator GTI = gep_type_begin(I);
+    // If the inbounds keyword is not present, the offsets are added to the
+    // base address with silently-wrapping two’s complement arithmetic.
+    bool IsInBounds = cast<GEPOperator>(I)->isInBounds();
    for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
      // TrailZ can only become smaller, short-circuit if we hit zero.
-      if (TrailZ == 0)
+      if (Known.isUnknown())
        break;

      Value *Index = I->getOperand(i);
+
+      // Handle case when index is zero.
+      Constant *CIndex = dyn_cast<Constant>(Index);
+      if (CIndex && CIndex->isZeroValue())
+        continue;
+
      if (StructType *STy = GTI.getStructTypeOrNull()) {
        // Handle struct member offset arithmetic.

-        // Handle case when index is vector zeroinitializer
-        Constant *CIndex = cast<Constant>(Index);
-        if (CIndex->isZeroValue())
-          continue;
+        assert(CIndex &&
+               "Access to structure field must be known at compile time");

        if (CIndex->getType()->isVectorTy())
          Index = CIndex->getSplatValue();
@ -1383,26 +1391,58 @@ static void computeKnownBitsFromOperator(const Operator *I,
        unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
        const StructLayout *SL = Q.DL.getStructLayout(STy);
        uint64_t Offset = SL->getElementOffset(Idx);
-        TrailZ = std::min<unsigned>(TrailZ,
-                                    countTrailingZeros(Offset));
-      } else {
-        // Handle array index arithmetic.
-        Type *IndexedTy = GTI.getIndexedType();
-        if (!IndexedTy->isSized()) {
-          TrailZ = 0;
-          break;
-        }
-        unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
-        uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy).getKnownMinSize();
-        LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0);
-        computeKnownBits(Index, LocalKnown, Depth + 1, Q);
-        TrailZ = std::min(TrailZ,
-                          unsigned(countTrailingZeros(TypeSize) +
-                                   LocalKnown.countMinTrailingZeros()));
+        AccConstIndices += Offset;
+        continue;
      }
-    }

-    Known.Zero.setLowBits(TrailZ);
+      // Handle array index arithmetic.
+      Type *IndexedTy = GTI.getIndexedType();
+      if (!IndexedTy->isSized()) {
+        Known.resetAll();
+        break;
+      }
+
+      unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
+      KnownBits IndexBits(IndexBitWidth);
+      computeKnownBits(Index, IndexBits, Depth + 1, Q);
+      TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
+      uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize();
+      KnownBits ScalingFactor(IndexBitWidth);
+      // Multiply by current sizeof type.
+      // &A[i] == A + i * sizeof(*A[i]).
+      if (IndexTypeSize.isScalable()) {
+        // For scalable types the only thing we know about sizeof is
+        // that this is a multiple of the minimum size.
+        ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes));
+      } else if (IndexBits.isConstant()) {
+        APInt IndexConst = IndexBits.getConstant();
+        APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
+        IndexConst *= ScalingFactor;
+        AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
+        continue;
+      } else {
+        ScalingFactor.Zero = ~TypeSizeInBytes;
+        ScalingFactor.One = TypeSizeInBytes;
+      }
+      IndexBits = KnownBits::computeForMul(IndexBits, ScalingFactor);
+
+      // If the offsets have a different width from the pointer, according
+      // to the language reference we need to sign-extend or truncate them
+      // to the width of the pointer.
+      IndexBits = IndexBits.sextOrTrunc(BitWidth);
+
+      Known = KnownBits::computeForAddSub(
+          /*Add=*/true,
+          /*NSW=*/IsInBounds, Known, IndexBits);
+    }
+    if (!Known.isUnknown() && !AccConstIndices.isNullValue()) {
+      KnownBits Index(BitWidth);
+      Index.Zero = ~AccConstIndices;
+      Index.One = AccConstIndices;
+      Known = KnownBits::computeForAddSub(
+          /*Add=*/true,
+          /*NSW=*/IsInBounds, Known, Index);
+    }
    break;
  }
  case Instruction::PHI: {
--- a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@ -225,7 +225,7 @@ define i32 @test_cast_gep_large_indices_as() {

 define i32 @test_constant_cast_gep_struct_indices_as() {
 ; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
-; CHECK-NEXT:    [[Y:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo, [[STRUCT_FOO:%.*]] addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
+; CHECK-NEXT:    [[Y:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo, [[STRUCT_FOO:%.*]] addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 16
 ; CHECK-NEXT:    ret i32 [[Y]]
 ;
  %x = getelementptr %struct.foo, %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2
--- a/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-gep.ll
@ -15,20 +15,20 @@ define void @frob() {
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 8
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0), align 4
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 4
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 16
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 2), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 0), align 8
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1), align 4
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 8
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 16
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 0), align 4
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 4
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 8
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 16
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2), align 8
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 8
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 8
-; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 8
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 4
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 16
+; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 4
 ; CHECK-NEXT:    store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8
 ; CHECK-NEXT:    store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 16
 ; CHECK-NEXT:    store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8
@ -49,9 +49,9 @@ define void @frob() {
  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4
  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4
  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 14), align 8
-  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 4
  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 16), align 8
-  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 4
  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8
  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8
  store i32 1, i32* getelementptr ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
@ -98,3 +98,22 @@ entry:

  ret i16 %E
 }
+
+; Check that we improve the alignment information.
+; The base pointer is 16-byte aligned and we access the field at
+; an offset of 8-byte.
+; Every element in the @CallerInfos array is 16-byte aligned so
+; any access from the following gep is 8-byte aligned.
+%struct.CallerInfo = type { i8*, i32 }
+@CallerInfos = global [128 x %struct.CallerInfo] zeroinitializer, align 16
+
+define i32 @test_gep_in_struct(i64 %idx) {
+; CHECK-LABEL: @test_gep_in_struct(
+; CHECK-NEXT:    [[NS7:%.*]] = getelementptr inbounds [128 x %struct.CallerInfo], [128 x %struct.CallerInfo]* @CallerInfos, i64 0, i64 [[IDX:%.*]], i32 1
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[NS7]], align 8
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %NS7 = getelementptr inbounds [128 x %struct.CallerInfo], [128 x %struct.CallerInfo]* @CallerInfos, i64 0, i64 %idx, i32 1
+  %res = load i32, i32* %NS7, align 1
+  ret i32 %res
+}
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@ -1208,6 +1208,66 @@ TEST_F(ComputeKnownBitsTest, ComputeKnownBitsAddWithRangeNoOverlap) {
  EXPECT_EQ(Known.getMaxValue(), 575);
 }

+TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRange) {
+  parseAssembly(
+      "define void @test(i64* %p) {\n"
+      "  %A = load i64, i64* %p, !range !{i64 64, i64 65536}\n"
+      "  %APtr = inttoptr i64 %A to float*"
+      "  %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n"
+      "  %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n"
+      "  call void @llvm.assume(i1 %c)\n"
+      "  ret void\n"
+      "}\n"
+      "declare void @llvm.assume(i1)\n");
+  AssumptionCache AC(*F);
+  KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC,
+                                     F->front().getTerminator());
+  EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1));
+  EXPECT_EQ(Known.One.getZExtValue(), 0u);
+  Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512");
+  Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC,
+                           F->front().getTerminator());
+  // We know of one less zero because 512 may have produced a 1 that
+  // got carried all the way to the first trailing zero.
+  EXPECT_EQ(Known.Zero.getZExtValue(), ~(65536llu - 1) << 1);
+  EXPECT_EQ(Known.One.getZExtValue(), 0u);
+  // The known range is not precise given computeKnownBits works
+  // with the masks of zeros and ones, not the ranges.
+  EXPECT_EQ(Known.getMinValue(), 0u);
+  EXPECT_EQ(Known.getMaxValue(), 131071);
+}
+
+// 4*128 + [32, 64) doesn't produce overlapping bits.
+// Make sure we get all the individual bits properly.
+// This test is useful to check that we account for the scaling factor
+// in the gep. Indeed, gep float, [32,64), 128 is not 128 + [32,64).
+TEST_F(ComputeKnownBitsTest, ComputeKnownBitsGEPWithRangeNoOverlap) {
+  parseAssembly(
+      "define void @test(i64* %p) {\n"
+      "  %A = load i64, i64* %p, !range !{i64 32, i64 64}\n"
+      "  %APtr = inttoptr i64 %A to float*"
+      "  %APtrPlus512 = getelementptr float, float* %APtr, i32 128\n"
+      "  %c = icmp ugt float* %APtrPlus512, inttoptr (i32 523 to float*)\n"
+      "  call void @llvm.assume(i1 %c)\n"
+      "  ret void\n"
+      "}\n"
+      "declare void @llvm.assume(i1)\n");
+  AssumptionCache AC(*F);
+  KnownBits Known = computeKnownBits(A, M->getDataLayout(), /* Depth */ 0, &AC,
+                                     F->front().getTerminator());
+  EXPECT_EQ(Known.Zero.getZExtValue(), ~(64llu - 1));
+  EXPECT_EQ(Known.One.getZExtValue(), 32u);
+  Instruction &APtrPlus512 = findInstructionByName(F, "APtrPlus512");
+  Known = computeKnownBits(&APtrPlus512, M->getDataLayout(), /* Depth */ 0, &AC,
+                           F->front().getTerminator());
+  EXPECT_EQ(Known.Zero.getZExtValue(), ~512llu & ~(64llu - 1));
+  EXPECT_EQ(Known.One.getZExtValue(), 512u | 32u);
+  // The known range is not precise given computeKnownBits works
+  // with the masks of zeros and ones, not the ranges.
+  EXPECT_EQ(Known.getMinValue(), 544);
+  EXPECT_EQ(Known.getMaxValue(), 575);
+}
+
 class IsBytewiseValueTest : public ValueTrackingTest,
                            public ::testing::WithParamInterface<
                                std::pair<const char *, const char *>> {