Revert "[VectorUtils] Query number of sign bits to allow more truncations"

This was a fairly simple patch but on closer inspection was seriously flawed and caused PR27690. This reverts commit r268921. llvm-svn: 269051
2016-05-10 12:27:23 +00:00 · 2016-05-10 12:27:23 +00:00 · aa1d638800
parent 1e1e286a6b
commit aa1d638800
2 changed files with 4 additions and 50 deletions
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@ -320,9 +320,6 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
  SmallPtrSet<Instruction *, 4> InstructionSet;
  MapVector<Instruction *, uint64_t> MinBWs;

-  assert(Blocks.size() > 0 && "Must have at least one block!");
-  const DataLayout &DL = Blocks[0]->getModule()->getDataLayout();
-  
  // Determine the roots. We work bottom-up, from truncs or icmps.
  bool SeenExtFromIllegalType = false;
  for (auto *BB : Blocks)
@ -366,19 +363,12 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,

    // If we encounter a type that is larger than 64 bits, we can't represent
    // it so bail out.
-    APInt NeededBits = DB.getDemandedBits(I);
-    unsigned BW = NeededBits.getBitWidth();
-    if (BW > 64)
+    if (DB.getDemandedBits(I).getBitWidth() > 64)
      return MapVector<Instruction *, uint64_t>();

-    auto NSB = ComputeNumSignBits(I, DL);
-
-    // Query demanded bits for the bits required by the instruction. Remove
-    // any bits that are equal to the sign bit, because we can truncate the
-    // instruction without changing their value.
-    NeededBits &= APInt::getLowBitsSet(BW, BW - NSB);
-    DBits[Leader] |= NeededBits.getZExtValue();
-    DBits[I] |= NeededBits.getZExtValue();
+    uint64_t V = DB.getDemandedBits(I).getZExtValue();
+    DBits[Leader] |= V;
+    DBits[I] = V;

    // Casts, loads and instructions outside of our range terminate a chain
    // successfully.
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@ -263,41 +263,5 @@ for.body:                                         ; preds = %entry, %for.body
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }

-; CHECK-LABEL: @add_g
-; CHECK: load <16 x i8>
-; CHECK: xor <16 x i8>
-; CHECK: icmp ult <16 x i8>
-; CHECK: select <16 x i1> {{.*}}, <16 x i8>
-; CHECK: store <16 x i8>
-define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture
-%r, i8 %arg1, i32 %len) #0 {
-  %1 = icmp sgt i32 %len, 0
-  br i1 %1, label %.lr.ph, label %._crit_edge
-
-.lr.ph:                                           ; preds = %0
-  %2 = sext i8 %arg1 to i64
-  br label %3
-
-._crit_edge:                                      ; preds = %3, %0
-  ret void
-
-; <label>:3                                       ; preds = %3, %.lr.ph
-  %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ]
-  %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %x5 = load i8, i8* %x4
-  %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  %x8 = load i8, i8* %x7
-  %x9 = zext i8 %x5 to i32
-  %x10 = xor i32 %x9, 255
-  %x11 = icmp ult i32 %x10, 24
-  %x12 = select i1 %x11, i32 %x10, i32 24
-  %x13 = trunc i32 %x12 to i8
-  store i8 %x13, i8* %x4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %len
-  br i1 %exitcond, label %._crit_edge, label %3
-}
-
 attributes #0 = { nounwind }