From fec2945998947f04d672e9c5f33b57f7177474c0 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Tue, 11 May 2021 16:08:53 -0700
Subject: [PATCH] Revert "[GVN] Clobber partially aliased loads."

This reverts commit 6c570442318e2d3b8b13e95c2f2f588d71491acb.

It causes assertion errors due to widening atomic loads, and potentially causes miscompile elsewhere too. Repro, also posted to D95543:

```
$ cat repro.ll
; ModuleID = 'repro.ll'
source_filename = "repro.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.widget = type { i32 }
%struct.baz = type { i32, %struct.snork }
%struct.snork = type { %struct.spam }
%struct.spam = type { i32, i32 }

@global = external local_unnamed_addr global %struct.widget, align 4
@global.1 = external local_unnamed_addr global i8, align 1
@global.2 = external local_unnamed_addr global i32, align 4

define void @zot(%struct.baz* %arg) local_unnamed_addr align 2 {
bb:
  %tmp = getelementptr inbounds %struct.baz, %struct.baz* %arg, i64 0, i32 1
  %tmp1 = bitcast %struct.snork* %tmp to i64*
  %tmp2 = load i64, i64* %tmp1, align 4
  %tmp3 = getelementptr inbounds %struct.baz, %struct.baz* %arg, i64 0, i32 1, i32 0, i32 1
  %tmp4 = icmp ugt i64 %tmp2, 4294967295
  br label %bb5

bb5:                                              ; preds = %bb14, %bb
  %tmp6 = load i32, i32* %tmp3, align 4
  %tmp7 = icmp ne i32 %tmp6, 0
  %tmp8 = select i1 %tmp7, i1 %tmp4, i1 false
  %tmp9 = zext i1 %tmp8 to i8
  store i8 %tmp9, i8* @global.1, align 1
  %tmp10 = load i32, i32* @global.2, align 4
  switch i32 %tmp10, label %bb11 [
    i32 1, label %bb12
    i32 2, label %bb12
  ]

bb11:                                             ; preds = %bb5
  br label %bb14

bb12:                                             ; preds = %bb5, %bb5
  %tmp13 = load atomic i32, i32* getelementptr inbounds (%struct.widget, %struct.widget* @global, i64 0, i32 0) acquire, align 4
  br label %bb14

bb14:                                             ; preds = %bb12, %bb11
  br label %bb5
}
$ opt -O2 repro.ll -disable-output
opt: /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp:496: llvm::Value *llvm::VNCoercion::getLoadValueForLoad(llvm::LoadInst *, unsigned int, llvm::Type *, llvm::Instruction *, const llvm::DataLayout &): Assertion `SrcVal->isSimple() && "Cannot widen volatile/atomic load!"' failed.
PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace.
Stack dump:
0.      Program arguments: /home/rupprecht/dev/opt -O2 repro.ll -disable-output
...
```
---
 .../llvm/Analysis/MemoryDependenceAnalysis.h  |   3 -
 .../lib/Analysis/MemoryDependenceAnalysis.cpp |  11 +-
 llvm/lib/Transforms/Scalar/GVN.cpp            |  17 +-
 llvm/test/Transforms/GVN/PRE/rle.ll           | 246 ++++--------------
 4 files changed, 66 insertions(+), 211 deletions(-)

diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 0351b9e914a0..efde00f82d57 100644
--- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -363,7 +363,6 @@ private:
   PredIteratorCache PredCache;
 
   unsigned DefaultBlockScanLimit;
-  Optional<int32_t> ClobberOffset;
 
 public:
   MemoryDependenceResults(AAResults &AA, AssumptionCache &AC,
@@ -469,8 +468,6 @@ public:
   /// Release memory in caches.
   void releaseMemory();
 
-  Optional<int32_t> getClobberOffset() const { return ClobberOffset; }
-
 private:
   MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall,
                                      BasicBlock::iterator ScanIt,
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index d45a4b428053..fa5e96a9eef5 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -512,12 +512,16 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
         if (R == AliasResult::MustAlias)
           return MemDepResult::getDef(Inst);
 
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+      // in terms of clobbering loads, but since it does this by looking
+      // at the clobbering load directly, it doesn't know about any
+      // phi translation that may have happened along the way.
+
         // If we have a partial alias, then return this as a clobber for the
         // client to handle.
-        if (R == AliasResult::PartialAlias && R.hasOffset()) {
-          ClobberOffset = R.getOffset();
+        if (R == AliasResult::PartialAlias)
           return MemDepResult::getClobber(Inst);
-        }
+#endif
 
         // Random may-alias loads don't depend on each other without a
         // dependence.
@@ -636,7 +640,6 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
 }
 
 MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
-  ClobberOffset = None;
   Instruction *ScanPos = QueryInst;
 
   // Check for a cached result
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 8bc35e0a7913..29da739fa16e 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -999,22 +999,9 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
       // Can't forward from non-atomic to atomic without violating memory model.
       if (DepLoad != Load && Address &&
           Load->isAtomic() <= DepLoad->isAtomic()) {
-        Type *LoadType = Load->getType();
-        int Offset = -1;
+        int Offset = analyzeLoadFromClobberingLoad(Load->getType(), Address,
+                                                   DepLoad, DL);
 
-        // If Memory Dependence Analysis reported clobber check, it was nested
-        // and can be extracted from the MD result
-        if (DepInfo.isClobber() &&
-            canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
-          const auto ClobberOff = MD->getClobberOffset();
-          // GVN has no deal with a negative offset.
-          Offset = (ClobberOff == None || ClobberOff.getValue() < 0)
-                       ? -1
-                       : ClobberOff.getValue();
-        }
-        if (Offset == -1)
-          Offset =
-              analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL);
         if (Offset != -1) {
           Res = AvailableValue::getLoad(DepLoad, Offset);
           return true;
diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll
index 268030b6b566..2653e963c249 100644
--- a/llvm/test/Transforms/GVN/PRE/rle.ll
+++ b/llvm/test/Transforms/GVN/PRE/rle.ll
@@ -37,12 +37,12 @@ define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
 declare void @helper()
 define void @crash1() {
 ; CHECK-LABEL: @crash1(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) nounwind
-  %ttmp = load i8, i8* bitcast (void ()* @helper to i8*)
-  %x = icmp eq i8 %ttmp, 15
+  %tmp = load i8, i8* bitcast (void ()* @helper to i8*)
+  %x = icmp eq i8 %tmp, 15
   ret void
 }
 
@@ -203,8 +203,8 @@ entry:
   %conv = bitcast i16* %A to i8*
   tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false)
   %arrayidx = getelementptr inbounds i16, i16* %A, i64 42
-  %ttmp2 = load i16, i16* %arrayidx
-  ret i16 %ttmp2
+  %tmp2 = load i16, i16* %arrayidx
+  ret i16 %tmp2
 }
 
 ; memset -> float forwarding.
@@ -225,8 +225,8 @@ entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
   tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 42 ; <float*> [#uses=1]
-  %ttmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
-  ret float %ttmp2
+  %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
 }
 
 ;; non-local memset -> i16 load forwarding.
@@ -276,8 +276,8 @@ entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
-  %ttmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
-  ret float %ttmp2
+  %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
 }
 
 ; memcpy from address space 1
@@ -292,8 +292,8 @@ entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
   tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
-  %ttmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
-  ret float %ttmp2
+  %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
 }
 
 ;; non-local i32/float -> i8 load forwarding.
@@ -934,17 +934,17 @@ define i32 @memset_to_load() nounwind readnone {
 ; CHECK-LABEL: @memset_to_load(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X:%.*]] = alloca [256 x i32], align 4
-; CHECK-NEXT:    [[TTMP:%.*]] = bitcast [256 x i32]* [[X]] to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TTMP]], i8 0, i64 1024, i1 false)
+; CHECK-NEXT:    [[TMP:%.*]] = bitcast [256 x i32]* [[X]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 1024, i1 false)
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
   %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
-  %ttmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
-  call void @llvm.memset.p0i8.i64(i8* align 4 %ttmp, i8 0, i64 1024, i1 false)
+  %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
+  call void @llvm.memset.p0i8.i64(i8* align 4 %tmp, i8 0, i64 1024, i1 false)
   %arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %x, i32 0, i32 0 ; <i32*>
-  %ttmp1 = load i32, i32* %arraydecay                   ; <i32> [#uses=1]
-  ret i32 %ttmp1
+  %tmp1 = load i32, i32* %arraydecay                   ; <i32> [#uses=1]
+  ret i32 %tmp1
 }
 
 
@@ -956,20 +956,20 @@ define i32 @load_load_partial_alias(i8* %P) nounwind ssp {
 ; CHECK-LABEL: @load_load_partial_alias(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P:%.*]] to i32*
-; CHECK-NEXT:    [[TTMP2:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TTMP2]], {{8|16}}
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP2]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[ADD_PTR]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CONV]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
 entry:
   %0 = bitcast i8* %P to i32*
-  %ttmp2 = load i32, i32* %0
+  %tmp2 = load i32, i32* %0
   %add.ptr = getelementptr inbounds i8, i8* %P, i64 1
-  %ttmp5 = load i8, i8* %add.ptr
-  %conv = zext i8 %ttmp5 to i32
-  %add = add nsw i32 %ttmp2, %conv
+  %tmp5 = load i8, i8* %add.ptr
+  %conv = zext i8 %tmp5 to i32
+  %add = add nsw i32 %tmp2, %conv
   ret i32 %add
 }
 
@@ -981,11 +981,11 @@ define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp {
 ; CHECK-NEXT:    [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32*
 ; CHECK-NEXT:    [[X1:%.*]] = load i32, i32* [[XX]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X1]], 127
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X1]], {{8|16}}
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]]
 ; CHECK:       land.lhs.true:
-; CHECK-NEXT:    [[CONV6:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[P]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1
+; CHECK-NEXT:    [[CONV6:%.*]] = zext i8 [[TMP5]] to i32
 ; CHECK-NEXT:    ret i32 [[CONV6]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret i32 52
@@ -998,146 +998,14 @@ entry:
 
 land.lhs.true:                                    ; preds = %entry
   %arrayidx4 = getelementptr inbounds i8, i8* %P, i64 1
-  %ttmp5 = load i8, i8* %arrayidx4, align 1
-  %conv6 = zext i8 %ttmp5 to i32
+  %tmp5 = load i8, i8* %arrayidx4, align 1
+  %conv6 = zext i8 %tmp5 to i32
   ret i32 %conv6
 
 if.end:
   ret i32 52
 }
 
-define i32 @load_load_partial_alias_cross_block_phi_trans(i8* %P) nounwind {
-; CHECK-LABEL: @load_load_partial_alias_cross_block_phi_trans(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32*
-; CHECK-NEXT:    [[X1:%.*]] = load i32, i32* [[XX]], align 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X1]], 127
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X1]], {{16|8}}
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[X1]], {{8|16}}
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]]
-; CHECK:       if:
-; CHECK-NEXT:    br label [[JOIN:%.*]]
-; CHECK:       else:
-; CHECK-NEXT:    br label [[JOIN]]
-; CHECK:       join:
-; CHECK-NEXT:    [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
-; CHECK-NEXT:    [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
-; CHECK-NEXT:    ret i32 [[CONV6]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret i32 52
-;
-entry:
-  %xx = bitcast i8* %P to i32*
-  %x1 = load i32, i32* %xx, align 4
-  %cmp = icmp eq i32 %x1, 127
-  br i1 %cmp, label %if, label %else
-
-if:
-  %arrayidx.if = getelementptr inbounds i8, i8* %P, i64 1
-  br label %join
-
-else:
-  %arrayidx.else = getelementptr inbounds i8, i8* %P, i64 2
-  br label %join
-
-join:
-  %idx = phi i64 [ 1, %if ], [ 2, %else ]
-  %arrayidx4 = getelementptr inbounds i8, i8* %P, i64 %idx
-  %ttmp5 = load i8, i8* %arrayidx4, align 1
-  %conv6 = zext i8 %ttmp5 to i32
-  ret i32 %conv6
-
-if.end:
-  ret i32 52
-}
-
-define void @load_load_partial_alias_loop(i8* %P) {
-; LE-LABEL: @load_load_partial_alias_loop(
-; LE-NEXT:  entry:
-; LE-NEXT:    [[P_1:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 1
-; LE-NEXT:    [[V_1:%.*]] = load i8, i8* [[P_1]], align 1
-; LE-NEXT:    call void @use.i8(i8 [[V_1]])
-; LE-NEXT:    [[P_1_32:%.*]] = bitcast i8* [[P_1]] to i32*
-; LE-NEXT:    [[V_1_32:%.*]] = load i32, i32* [[P_1_32]], align 4
-; LE-NEXT:    call void @use.i32(i32 [[V_1_32]])
-; LE-NEXT:    [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8
-; LE-NEXT:    br label [[LOOP:%.*]]
-; LE:       loop:
-; LE-NEXT:    [[V_I:%.*]] = phi i8 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
-; LE-NEXT:    [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ]
-; LE-NEXT:    [[P_I:%.*]] = getelementptr i8, i8* [[P]], i64 [[I]]
-; LE-NEXT:    call void @use.i8(i8 [[V_I]])
-; LE-NEXT:    [[P_I_32:%.*]] = bitcast i8* [[P_I]] to i32*
-; LE-NEXT:    [[V_I_32:%.*]] = load i32, i32* [[P_I_32]], align 4
-; LE-NEXT:    call void @use.i32(i32 [[V_I_32]])
-; LE-NEXT:    [[I_INC]] = add i64 [[I]], 1
-; LE-NEXT:    [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
-; LE-NEXT:    [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8
-; LE-NEXT:    [[TMP2]] = trunc i32 [[TMP1]] to i8
-; LE-NEXT:    br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
-; LE:       loop.loop_crit_edge:
-; LE-NEXT:    br label [[LOOP]]
-; LE:       exit:
-; LE-NEXT:    ret void
-;
-; BE-LABEL: @load_load_partial_alias_loop(
-; BE-NEXT:  entry:
-; BE-NEXT:    [[P_1:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 1
-; BE-NEXT:    [[V_1:%.*]] = load i8, i8* [[P_1]], align 1
-; BE-NEXT:    call void @use.i8(i8 [[V_1]])
-; BE-NEXT:    [[P_1_32:%.*]] = bitcast i8* [[P_1]] to i32*
-; BE-NEXT:    [[V_1_32:%.*]] = load i32, i32* [[P_1_32]], align 4
-; BE-NEXT:    call void @use.i32(i32 [[V_1_32]])
-; BE-NEXT:    [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24
-; BE-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; BE-NEXT:    br label [[LOOP:%.*]]
-; BE:       loop:
-; BE-NEXT:    [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
-; BE-NEXT:    [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ]
-; BE-NEXT:    [[P_I:%.*]] = getelementptr i8, i8* [[P]], i64 [[I]]
-; BE-NEXT:    call void @use.i8(i8 [[V_I]])
-; BE-NEXT:    [[P_I_32:%.*]] = bitcast i8* [[P_I]] to i32*
-; BE-NEXT:    [[V_I_32:%.*]] = load i32, i32* [[P_I_32]], align 4
-; BE-NEXT:    call void @use.i32(i32 [[V_I_32]])
-; BE-NEXT:    [[I_INC]] = add i64 [[I]], 1
-; BE-NEXT:    [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
-; BE-NEXT:    [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16
-; BE-NEXT:    [[TMP3]] = trunc i32 [[TMP2]] to i8
-; BE-NEXT:    br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
-; BE:       loop.loop_crit_edge:
-; BE-NEXT:    br label [[LOOP]]
-; BE:       exit:
-; BE-NEXT:    ret void
-;
-entry:
-  %P.1 = getelementptr i8, i8* %P, i64 1
-  %v.1 = load i8, i8* %P.1
-  call void @use.i8(i8 %v.1)
-  %P.1.32 = bitcast i8* %P.1 to i32*
-  %v.1.32 = load i32, i32* %P.1.32
-  call void @use.i32(i32 %v.1.32)
-  br label %loop
-
-loop:
-  %i = phi i64 [ 1, %entry ], [ %i.inc, %loop ]
-  %P.i = getelementptr i8, i8* %P, i64 %i
-  %v.i = load i8, i8* %P.i
-  call void @use.i8(i8 %v.i)
-  %P.i.32 = bitcast i8* %P.i to i32*
-  %v.i.32 = load i32, i32* %P.i.32
-  call void @use.i32(i32 %v.i.32)
-  %i.inc = add i64 %i, 1
-  %cmp = icmp ne i64 %i.inc, 64
-  br i1 %cmp, label %loop, label %exit
-
-exit:
-  ret void
-}
-
-declare void @use.i8(i8) readnone
-declare void @use.i32(i32) readnone
 
 ;;===----------------------------------------------------------------------===;;
 ;; Load Widening
@@ -1151,18 +1019,18 @@ declare void @use.i32(i32) readnone
 define i32 @test_widening1(i8* %P) nounwind ssp noredzone {
 ; CHECK-LABEL: @test_widening1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TTMP:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1:%.*]], %widening1* @f, i64 0, i32 1), align 4
-; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TTMP]] to i32
-; CHECK-NEXT:    [[TTMP1:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 2), align 1
-; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TTMP1]] to i32
+; CHECK-NEXT:    [[TMP:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1:%.*]], %widening1* @f, i64 0, i32 1), align 4
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 2), align 1
+; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]]
 ; CHECK-NEXT:    ret i32 [[ADD]]
 ;
 entry:
-  %ttmp = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 1), align 4
-  %conv = zext i8 %ttmp to i32
-  %ttmp1 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 2), align 1
-  %conv2 = zext i8 %ttmp1 to i32
+  %tmp = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 2), align 1
+  %conv2 = zext i8 %tmp1 to i32
   %add = add nsw i32 %conv, %conv2
   ret i32 %add
 }
@@ -1170,32 +1038,32 @@ entry:
 define i32 @test_widening2() nounwind ssp noredzone {
 ; CHECK-LABEL: @test_widening2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TTMP:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1:%.*]], %widening1* @f, i64 0, i32 1), align 4
-; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TTMP]] to i32
-; CHECK-NEXT:    [[TTMP1:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 2), align 1
-; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TTMP1]] to i32
+; CHECK-NEXT:    [[TMP:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1:%.*]], %widening1* @f, i64 0, i32 1), align 4
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 2), align 1
+; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]]
-; CHECK-NEXT:    [[TTMP2:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 3), align 2
-; CHECK-NEXT:    [[CONV3:%.*]] = zext i8 [[TTMP2]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 3), align 2
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CONV3]]
-; CHECK-NEXT:    [[TTMP3:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 4), align 1
-; CHECK-NEXT:    [[CONV4:%.*]] = zext i8 [[TTMP3]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 4), align 1
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i8 [[TMP3]] to i32
 ; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[CONV4]]
 ; CHECK-NEXT:    ret i32 [[ADD3]]
 ;
 entry:
-  %ttmp = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 1), align 4
-  %conv = zext i8 %ttmp to i32
-  %ttmp1 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 2), align 1
-  %conv2 = zext i8 %ttmp1 to i32
+  %tmp = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 2), align 1
+  %conv2 = zext i8 %tmp1 to i32
   %add = add nsw i32 %conv, %conv2
 
-  %ttmp2 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 3), align 2
-  %conv3 = zext i8 %ttmp2 to i32
+  %tmp2 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 3), align 2
+  %conv3 = zext i8 %tmp2 to i32
   %add2 = add nsw i32 %add, %conv3
 
-  %ttmp3 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 4), align 1
-  %conv4 = zext i8 %ttmp3 to i32
+  %tmp3 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 4), align 1
+  %conv4 = zext i8 %tmp3 to i32
   %add3 = add nsw i32 %add2, %conv4
 
   ret i32 %add3
@@ -1226,8 +1094,8 @@ define void @test_escape1() nounwind {
 ; CHECK-LABEL: @test_escape1(
 ; CHECK-NEXT:    [[X:%.*]] = alloca i8**, align 8
 ; CHECK-NEXT:    store i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @_ZTV1X, i64 0, i64 2), i8*** [[X]], align 8
-; CHECK-NEXT:    call void @use() #[[ATTR3]]
-; CHECK-NEXT:    call void @use3(i8*** [[X]], i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @_ZTV1X, i64 0, i64 2)) #[[ATTR3]]
+; CHECK-NEXT:    call void @use() #[[ATTR6]]
+; CHECK-NEXT:    call void @use3(i8*** [[X]], i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @_ZTV1X, i64 0, i64 2)) #[[ATTR6]]
 ; CHECK-NEXT:    ret void
 ;
   %x = alloca i8**, align 8