diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll
index 4303a924595a..e84d6120d69d 100644
--- a/llvm/test/Transforms/SROA/address-spaces.ll
+++ b/llvm/test/Transforms/SROA/address-spaces.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-p1:16:16:16-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
@@ -10,9 +11,11 @@ declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace
 ; Make sure an illegal bitcast isn't introduced
 define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) {
 ; CHECK-LABEL: @test_address_space_1_1(
-; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
-; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
-; CHECK: ret void
+; CHECK-NEXT:    [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64> addrspace(1)* [[A:%.*]], align 2
+; CHECK-NEXT:    [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16 addrspace(1)* [[B:%.*]] to <2 x i64> addrspace(1)*
+; CHECK-NEXT:    store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64> addrspace(1)* [[AA_0_BPTR_SROA_CAST]], align 2
+; CHECK-NEXT:    ret void
+;
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
   %aaptr = bitcast <2 x i64>* %aa to i8*
@@ -24,9 +27,11 @@ define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)
 
 define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) {
 ; CHECK-LABEL: @test_address_space_1_0(
-; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
-; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
-; CHECK: ret void
+; CHECK-NEXT:    [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64> addrspace(1)* [[A:%.*]], align 2
+; CHECK-NEXT:    [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16* [[B:%.*]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64>* [[AA_0_BPTR_SROA_CAST]], align 2
+; CHECK-NEXT:    ret void
+;
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)*
   %aaptr = bitcast <2 x i64>* %aa to i8*
@@ -38,9 +43,11 @@ define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) {
 
 define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) {
 ; CHECK-LABEL: @test_address_space_0_1(
-; CHECK: load <2 x i64>, <2 x i64>* %a, align 2
-; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
-; CHECK: ret void
+; CHECK-NEXT:    [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64>* [[A:%.*]], align 2
+; CHECK-NEXT:    [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16 addrspace(1)* [[B:%.*]] to <2 x i64> addrspace(1)*
+; CHECK-NEXT:    store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64> addrspace(1)* [[AA_0_BPTR_SROA_CAST]], align 2
+; CHECK-NEXT:    ret void
+;
   %aa = alloca <2 x i64>, align 16
   %aptr = bitcast <2 x i64>* %a to i8*
   %aaptr = bitcast <2 x i64>* %aa to i8*
@@ -55,7 +62,18 @@ define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) {
 ; Function Attrs: nounwind
 define void @copy_struct([5 x i64] %in.coerce) {
 ; CHECK-LABEL: @copy_struct(
-; CHECK-NOT: memcpy
+; CHECK-NEXT:  for.end:
+; CHECK-NEXT:    [[IN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE:%.*]], 0
+; CHECK-NEXT:    [[IN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 1
+; CHECK-NEXT:    [[IN_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 2
+; CHECK-NEXT:    [[IN_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 3
+; CHECK-NEXT:    [[IN_SROA_2_4_EXTRACT_SHIFT:%.*]] = lshr i64 [[IN_COERCE_FCA_2_EXTRACT]], 32
+; CHECK-NEXT:    [[IN_SROA_2_4_EXTRACT_TRUNC:%.*]] = trunc i64 [[IN_SROA_2_4_EXTRACT_SHIFT]] to i32
+; CHECK-NEXT:    store i32 [[IN_SROA_2_4_EXTRACT_TRUNC]], i32 addrspace(1)* undef, align 4
+; CHECK-NEXT:    store i64 [[IN_COERCE_FCA_3_EXTRACT]], i64 addrspace(1)* poison, align 4
+; CHECK-NEXT:    store i32 undef, i32 addrspace(1)* poison, align 4
+; CHECK-NEXT:    ret void
+;
 for.end:
   %in = alloca %struct.struct_test_27.0.13, align 8
   %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]*
@@ -65,7 +83,7 @@ for.end:
   call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 undef, i8* align 4 %scevgep910, i32 16, i1 false)
   ret void
 }
- 
+
 %union.anon = type { i32* }
 
 @g = common global i32 0, align 4
@@ -75,8 +93,12 @@ for.end:
 ; illegal bitcast isn't introduced
 define void @pr27557() {
 ; CHECK-LABEL: @pr27557(
-; CHECK: %[[CAST:.*]] = bitcast i32** {{.*}} to i32 addrspace(3)**
-; CHECK: store i32 addrspace(3)* @l, i32 addrspace(3)** %[[CAST]]
+; CHECK-NEXT:    [[DOTSROA_0:%.*]] = alloca i32*, align 8
+; CHECK-NEXT:    store i32* @g, i32** [[DOTSROA_0]], align 8
+; CHECK-NEXT:    [[DOTSROA_0_0__SROA_CAST1:%.*]] = bitcast i32** [[DOTSROA_0]] to i32 addrspace(3)**
+; CHECK-NEXT:    store i32 addrspace(3)* @l, i32 addrspace(3)** [[DOTSROA_0_0__SROA_CAST1]], align 8
+; CHECK-NEXT:    ret void
+;
   %1 = alloca %union.anon, align 8
   %2 = bitcast %union.anon* %1 to i32**
   store i32* @g, i32** %2, align 8
@@ -91,7 +113,8 @@ define void @pr27557() {
 ; should be promoted through the pair of `ptrtoint`/`inttoptr`.
 define i32* @pr27557.alt() {
 ; CHECK-LABEL: @pr27557.alt(
-; CHECK: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(2)* @l2 to i64) to i32*)
+; CHECK-NEXT:    ret i32* inttoptr (i64 ptrtoint (i32 addrspace(2)* @l2 to i64) to i32*)
+;
   %1 = alloca %union.anon, align 8
   %2 = bitcast %union.anon* %1 to i32 addrspace(2)**
   store i32 addrspace(2)* @l2, i32 addrspace(2)** %2, align 8
@@ -102,30 +125,52 @@ define i32* @pr27557.alt() {
 
 ; Make sure pre-splitting doesn't try to introduce an illegal bitcast
 define float @presplit(i64 addrspace(1)* %p) {
-entry:
 ; CHECK-LABEL: @presplit(
-; CHECK: %[[CAST:.*]] = bitcast i64 addrspace(1)* {{.*}} to i32 addrspace(1)*
-; CHECK: load i32, i32 addrspace(1)* %[[CAST]]
-   %b = alloca i64
-   %b.cast = bitcast i64* %b to [2 x float]*
-   %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0
-   %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1
-   %l = load i64, i64 addrspace(1)* %p
-   store i64 %l, i64* %b
-   %f1 = load float, float* %b.gep1
-   %f2 = load float, float* %b.gep2
-   %ret = fadd float %f1, %f2
-   ret float %ret
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P_SROA_CAST:%.*]] = bitcast i64 addrspace(1)* [[P:%.*]] to i32 addrspace(1)*
+; CHECK-NEXT:    [[L1:%.*]] = load i32, i32 addrspace(1)* [[P_SROA_CAST]], align 4
+; CHECK-NEXT:    [[P_SROA_RAW_CAST:%.*]] = bitcast i64 addrspace(1)* [[P]] to i8 addrspace(1)*
+; CHECK-NEXT:    [[P_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[P_SROA_RAW_CAST]], i16 4
+; CHECK-NEXT:    [[P_SROA_CAST2:%.*]] = bitcast i8 addrspace(1)* [[P_SROA_RAW_IDX]] to i32 addrspace(1)*
+; CHECK-NEXT:    [[L3:%.*]] = load i32, i32 addrspace(1)* [[P_SROA_CAST2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 [[L1]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[L3]] to float
+; CHECK-NEXT:    [[RET:%.*]] = fadd float [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret float [[RET]]
+;
+entry:
+  %b = alloca i64
+  %b.cast = bitcast i64* %b to [2 x float]*
+  %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0
+  %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1
+  %l = load i64, i64 addrspace(1)* %p
+  store i64 %l, i64* %b
+  %f1 = load float, float* %b.gep1
+  %f2 = load float, float* %b.gep2
+  %ret = fadd float %f1, %f2
+  ret float %ret
 }
 
 ; Test load from and store to non-zero address space.
 define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) {
-; CHECK-LABEL: @test_load_store_diff_addr_space
-; CHECK-NOT: alloca
-; CHECK: load i32, i32 addrspace(1)*
-; CHECK: load i32, i32 addrspace(1)*
-; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
-; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+; CHECK-LABEL: @test_load_store_diff_addr_space(
+; CHECK-NEXT:    [[P1_SROA_CAST:%.*]] = bitcast [2 x float] addrspace(1)* [[COMPLEX1:%.*]] to i32 addrspace(1)*
+; CHECK-NEXT:    [[V15:%.*]] = load i32, i32 addrspace(1)* [[P1_SROA_CAST]], align 4
+; CHECK-NEXT:    [[P1_SROA_IDX:%.*]] = getelementptr inbounds [2 x float], [2 x float] addrspace(1)* [[COMPLEX1]], i16 0, i16 1
+; CHECK-NEXT:    [[P1_SROA_CAST7:%.*]] = bitcast float addrspace(1)* [[P1_SROA_IDX]] to i32 addrspace(1)*
+; CHECK-NEXT:    [[V18:%.*]] = load i32, i32 addrspace(1)* [[P1_SROA_CAST7]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V15]] to float
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[V18]] to float
+; CHECK-NEXT:    [[SUM:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float [[SUM]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[SUM]] to i32
+; CHECK-NEXT:    [[P2_SROA_CAST:%.*]] = bitcast [2 x float] addrspace(1)* [[COMPLEX2:%.*]] to i32 addrspace(1)*
+; CHECK-NEXT:    store i32 [[TMP3]], i32 addrspace(1)* [[P2_SROA_CAST]], align 4
+; CHECK-NEXT:    [[P2_SROA_IDX:%.*]] = getelementptr inbounds [2 x float], [2 x float] addrspace(1)* [[COMPLEX2]], i16 0, i16 1
+; CHECK-NEXT:    [[P2_SROA_CAST4:%.*]] = bitcast float addrspace(1)* [[P2_SROA_IDX]] to i32 addrspace(1)*
+; CHECK-NEXT:    store i32 [[TMP4]], i32 addrspace(1)* [[P2_SROA_CAST4]], align 4
+; CHECK-NEXT:    ret void
+;
   %a = alloca i64
   %a.cast = bitcast i64* %a to [2 x float]*
   %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll
index 0fe9c849e964..de794852ac5f 100644
--- a/llvm/test/Transforms/SROA/alignment.ll
+++ b/llvm/test/Transforms/SROA/alignment.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
 ; RUN: opt -passes='debugify,function(sroa)' -S < %s | FileCheck %s -check-prefix DEBUGLOC
 
@@ -7,15 +8,33 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
 
 define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
 ; CHECK-LABEL: @test1(
-; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 0
-; CHECK: %[[a0:.*]] = load i8, i8* %[[gep_a0]], align 16
-; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 1
-; CHECK: %[[a1:.*]] = load i8, i8* %[[gep_a1]], align 1
-; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 0
-; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16
-; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 1
-; CHECK: store i8 %[[a1]], i8* %[[gep_b1]], align 1
-; CHECK: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ALLOCA_SROA_0_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[A:%.*]], i64 0, i32 0
+; CHECK-NEXT:    [[ALLOCA_SROA_0_0_COPYLOAD:%.*]] = load i8, i8* [[ALLOCA_SROA_0_0_GEP_A_SROA_IDX]], align 16
+; CHECK-NEXT:    [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[ALLOCA_SROA_3_0_COPYLOAD:%.*]] = load i8, i8* [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX]], align 1
+; CHECK-NEXT:    [[ALLOCA_SROA_0_0_GEP_B_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[B:%.*]], i64 0, i32 0
+; CHECK-NEXT:    store i8 [[ALLOCA_SROA_0_0_COPYLOAD]], i8* [[ALLOCA_SROA_0_0_GEP_B_SROA_IDX]], align 16
+; CHECK-NEXT:    [[ALLOCA_SROA_3_0_GEP_B_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[B]], i64 0, i32 1
+; CHECK-NEXT:    store i8 [[ALLOCA_SROA_3_0_COPYLOAD]], i8* [[ALLOCA_SROA_3_0_GEP_B_SROA_IDX]], align 1
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test1(
+; DEBUGLOC-NEXT:  entry:
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata { i8, i8 }* undef, metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]]
+; DEBUGLOC-NEXT:    [[ALLOCA_SROA_0_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[A:%.*]], i64 0, i32 0, !dbg [[DBG18:![0-9]+]]
+; DEBUGLOC-NEXT:    [[ALLOCA_SROA_0_0_COPYLOAD:%.*]] = load i8, i8* [[ALLOCA_SROA_0_0_GEP_A_SROA_IDX]], align 16, !dbg [[DBG18]]
+; DEBUGLOC-NEXT:    [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[A]], i64 0, i32 1, !dbg [[DBG18]]
+; DEBUGLOC-NEXT:    [[ALLOCA_SROA_3_0_COPYLOAD:%.*]] = load i8, i8* [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX]], align 1, !dbg [[DBG18]]
+; DEBUGLOC-NEXT:    [[ALLOCA_SROA_0_0_GEP_B_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[B:%.*]], i64 0, i32 0, !dbg [[DBG19:![0-9]+]]
+; DEBUGLOC-NEXT:    store i8 [[ALLOCA_SROA_0_0_COPYLOAD]], i8* [[ALLOCA_SROA_0_0_GEP_B_SROA_IDX]], align 16, !dbg [[DBG19]]
+; DEBUGLOC-NEXT:    [[ALLOCA_SROA_3_0_GEP_B_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[B]], i64 0, i32 1, !dbg [[DBG19]]
+; DEBUGLOC-NEXT:    store i8 [[ALLOCA_SROA_3_0_COPYLOAD]], i8* [[ALLOCA_SROA_3_0_GEP_B_SROA_IDX]], align 1, !dbg [[DBG19]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG20:![0-9]+]]
+;
 
 entry:
   %alloca = alloca { i8, i8 }, align 16
@@ -32,18 +51,37 @@ entry:
 
 define void @test2() {
 ; CHECK-LABEL: @test2(
-; CHECK: alloca i16
-; CHECK: load i8, i8* %{{.*}}
-; CHECK: store i8 42, i8* %{{.*}}
-; CHECK: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    store volatile i16 0, i16* [[A_SROA_0]], align 2
+; CHECK-NEXT:    [[A_SROA_0_1_GEP2_SROA_RAW_CAST:%.*]] = bitcast i16* [[A_SROA_0]] to i8*
+; CHECK-NEXT:    [[A_SROA_0_1_GEP2_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_CAST]], i64 1
+; CHECK-NEXT:    [[A_SROA_0_1_A_SROA_0_2_RESULT:%.*]] = load i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_IDX]], align 1
+; CHECK-NEXT:    [[A_SROA_0_1_GEP2_SROA_RAW_CAST3:%.*]] = bitcast i16* [[A_SROA_0]] to i8*
+; CHECK-NEXT:    [[A_SROA_0_1_GEP2_SROA_RAW_IDX4:%.*]] = getelementptr inbounds i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_CAST3]], i64 1
+; CHECK-NEXT:    store i8 42, i8* [[A_SROA_0_1_GEP2_SROA_RAW_IDX4]], align 1
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test2(
+; DEBUGLOC-NEXT:  entry:
+; DEBUGLOC-NEXT:    [[A_SROA_0:%.*]] = alloca i16, align 2, !dbg [[DBG29:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata { i8, i8, i8, i8 }* undef, metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i16* undef, metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]]
+; DEBUGLOC-NEXT:    store volatile i16 0, i16* [[A_SROA_0]], align 2, !dbg [[DBG32:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG33:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_1_GEP2_SROA_RAW_CAST:%.*]] = bitcast i16* [[A_SROA_0]] to i8*, !dbg [[DBG34:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_1_GEP2_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_CAST]], i64 1, !dbg [[DBG34]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_1_A_SROA_0_2_RESULT:%.*]] = load i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_IDX]], align 1, !dbg [[DBG34]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8 [[A_SROA_0_1_A_SROA_0_2_RESULT]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_1_GEP2_SROA_RAW_CAST3:%.*]] = bitcast i16* [[A_SROA_0]] to i8*, !dbg [[DBG35:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_1_GEP2_SROA_RAW_IDX4:%.*]] = getelementptr inbounds i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_CAST3]], i64 1, !dbg [[DBG35]]
+; DEBUGLOC-NEXT:    store i8 42, i8* [[A_SROA_0_1_GEP2_SROA_RAW_IDX4]], align 1, !dbg [[DBG35]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG36:![0-9]+]]
+;
 
 ; Check that when sroa rewrites the alloca partition
 ; it preserves the original DebugLocation.
-; DEBUGLOC-LABEL: @test2(
-; DEBUGLOC: {{.*}} = alloca {{.*}} !dbg ![[DbgLoc:[0-9]+]]
-; DEBUGLOC-LABEL: }
-;
-; DEBUGLOC: ![[DbgLoc]] = !DILocation(line: 9,
 
 entry:
   %a = alloca { i8, i8, i8, i8 }, align 2      ; "line 9" to -debugify
@@ -59,9 +97,23 @@ entry:
 define void @PR13920(<2 x i64>* %a, i16* %b) {
 ; Test that alignments on memcpy intrinsics get propagated to loads and stores.
 ; CHECK-LABEL: @PR13920(
-; CHECK: load <2 x i64>, <2 x i64>* %a, align 2
-; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
-; CHECK: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64>* [[A:%.*]], align 2
+; CHECK-NEXT:    [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16* [[B:%.*]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64>* [[AA_0_BPTR_SROA_CAST]], align 2
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @PR13920(
+; DEBUGLOC-NEXT:  entry:
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata <2 x i64>* undef, metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG44:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]]
+; DEBUGLOC-NEXT:    [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64>* [[A:%.*]], align 2, !dbg [[DBG46:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]]
+; DEBUGLOC-NEXT:    [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16* [[B:%.*]] to <2 x i64>*, !dbg [[DBG48:![0-9]+]]
+; DEBUGLOC-NEXT:    store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64>* [[AA_0_BPTR_SROA_CAST]], align 2, !dbg [[DBG48]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG49:![0-9]+]]
+;
 
 entry:
   %aa = alloca <2 x i64>, align 16
@@ -79,9 +131,30 @@ define void @test3(i8* %x) {
 ; expecting. However, also check that any offset within an alloca can in turn
 ; reduce the alignment.
 ; CHECK-LABEL: @test3(
-; CHECK: alloca [22 x i8], align 8
-; CHECK: alloca [18 x i8], align 2
-; CHECK: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca [22 x i8], align 8
+; CHECK-NEXT:    [[B_SROA_0:%.*]] = alloca [18 x i8], align 2
+; CHECK-NEXT:    [[A_SROA_0_0_A_RAW_SROA_IDX:%.*]] = getelementptr inbounds [22 x i8], [22 x i8]* [[A_SROA_0]], i64 0, i64 0
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[A_SROA_0_0_A_RAW_SROA_IDX]], i8* align 8 [[X:%.*]], i32 22, i1 false)
+; CHECK-NEXT:    [[B_SROA_0_6_B_GEP_SROA_IDX:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* [[B_SROA_0]], i64 0, i64 0
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[B_SROA_0_6_B_GEP_SROA_IDX]], i8* align 2 [[X]], i32 18, i1 false)
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test3(
+; DEBUGLOC-NEXT:  entry:
+; DEBUGLOC-NEXT:    [[A_SROA_0:%.*]] = alloca [22 x i8], align 8, !dbg [[DBG57:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata { i8*, i8*, i8* }* undef, metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG57]]
+; DEBUGLOC-NEXT:    [[B_SROA_0:%.*]] = alloca [18 x i8], align 2, !dbg [[DBG58:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata { i8*, i8*, i8* }* undef, metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG59:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_0_A_RAW_SROA_IDX:%.*]] = getelementptr inbounds [22 x i8], [22 x i8]* [[A_SROA_0]], i64 0, i64 0, !dbg [[DBG60:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[A_SROA_0_0_A_RAW_SROA_IDX]], i8* align 8 [[X:%.*]], i32 22, i1 false), !dbg [[DBG60]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META55:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]]
+; DEBUGLOC-NEXT:    [[B_SROA_0_6_B_GEP_SROA_IDX:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* [[B_SROA_0]], i64 0, i64 0, !dbg [[DBG63:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[B_SROA_0_6_B_GEP_SROA_IDX]], i8* align 2 [[X]], i32 18, i1 false), !dbg [[DBG63]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG64:![0-9]+]]
+;
 
 entry:
   %a = alloca { i8*, i8*, i8* }
@@ -100,14 +173,53 @@ define void @test5() {
 ; split or promoted out of existence.
 ;
 ; CHECK-LABEL: @test5(
-; CHECK: alloca [9 x i8]
-; CHECK: alloca [9 x i8]
-; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1
-; CHECK: load volatile i16, i16* %{{.*}}, align 1
-; CHECK: load double, double* %{{.*}}, align 1
-; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1
-; CHECK: load volatile i16, i16* %{{.*}}, align 1
-; CHECK: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT:    [[A_SROA_3:%.*]] = alloca [9 x i8], align 1
+; CHECK-NEXT:    [[A_SROA_0_0_PTR1_SROA_CAST2:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to double*
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* [[A_SROA_0_0_PTR1_SROA_CAST2]], align 1
+; CHECK-NEXT:    [[A_SROA_0_7_WEIRD_CAST1_SROA_IDX4:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* [[A_SROA_0]], i64 0, i64 7
+; CHECK-NEXT:    [[A_SROA_0_7_WEIRD_CAST1_SROA_CAST5:%.*]] = bitcast i8* [[A_SROA_0_7_WEIRD_CAST1_SROA_IDX4]] to i16*
+; CHECK-NEXT:    [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1:%.*]] = load volatile i16, i16* [[A_SROA_0_7_WEIRD_CAST1_SROA_CAST5]], align 1
+; CHECK-NEXT:    [[A_SROA_0_0_PTR1_SROA_CAST3:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to double*
+; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_D1:%.*]] = load double, double* [[A_SROA_0_0_PTR1_SROA_CAST3]], align 1
+; CHECK-NEXT:    [[A_SROA_3_0_PTR2_SROA_CAST:%.*]] = bitcast [9 x i8]* [[A_SROA_3]] to double*
+; CHECK-NEXT:    store volatile double [[A_SROA_0_0_A_SROA_0_0_D1]], double* [[A_SROA_3_0_PTR2_SROA_CAST]], align 1
+; CHECK-NEXT:    [[A_SROA_3_7_WEIRD_CAST2_SROA_IDX:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* [[A_SROA_3]], i64 0, i64 7
+; CHECK-NEXT:    [[A_SROA_3_7_WEIRD_CAST2_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_3_7_WEIRD_CAST2_SROA_IDX]] to i16*
+; CHECK-NEXT:    [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2:%.*]] = load volatile i16, i16* [[A_SROA_3_7_WEIRD_CAST2_SROA_CAST]], align 1
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test5(
+; DEBUGLOC-NEXT:  entry:
+; DEBUGLOC-NEXT:    [[A_SROA_0:%.*]] = alloca [9 x i8], align 1, !dbg [[DBG80:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_3:%.*]] = alloca [9 x i8], align 1, !dbg [[DBG80]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata [18 x i8]* undef, metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double* undef, metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_0_PTR1_SROA_CAST2:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to double*, !dbg [[DBG83:![0-9]+]]
+; DEBUGLOC-NEXT:    store volatile double 0.000000e+00, double* [[A_SROA_0_0_PTR1_SROA_CAST2]], align 1, !dbg [[DBG83]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i16* undef, metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_7_WEIRD_CAST1_SROA_IDX4:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* [[A_SROA_0]], i64 0, i64 7, !dbg [[DBG86:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_7_WEIRD_CAST1_SROA_CAST5:%.*]] = bitcast i8* [[A_SROA_0_7_WEIRD_CAST1_SROA_IDX4]] to i16*, !dbg [[DBG86]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1:%.*]] = load volatile i16, i16* [[A_SROA_0_7_WEIRD_CAST1_SROA_CAST5]], align 1, !dbg [[DBG86]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i16 [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG86]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG87:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double* undef, metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG88:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_0_PTR1_SROA_CAST3:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to double*, !dbg [[DBG89:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_0_A_SROA_0_0_D1:%.*]] = load double, double* [[A_SROA_0_0_PTR1_SROA_CAST3]], align 1, !dbg [[DBG89]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double [[A_SROA_0_0_A_SROA_0_0_D1]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG89]]
+; DEBUGLOC-NEXT:    [[A_SROA_3_0_PTR2_SROA_CAST:%.*]] = bitcast [9 x i8]* [[A_SROA_3]] to double*, !dbg [[DBG90:![0-9]+]]
+; DEBUGLOC-NEXT:    store volatile double [[A_SROA_0_0_A_SROA_0_0_D1]], double* [[A_SROA_3_0_PTR2_SROA_CAST]], align 1, !dbg [[DBG90]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG91:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i16* undef, metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_3_7_WEIRD_CAST2_SROA_IDX:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* [[A_SROA_3]], i64 0, i64 7, !dbg [[DBG93:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_3_7_WEIRD_CAST2_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_3_7_WEIRD_CAST2_SROA_IDX]] to i16*, !dbg [[DBG93]]
+; DEBUGLOC-NEXT:    [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2:%.*]] = load volatile i16, i16* [[A_SROA_3_7_WEIRD_CAST2_SROA_CAST]], align 1, !dbg [[DBG93]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i16 [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG94:![0-9]+]]
+;
 
 entry:
   %a = alloca [18 x i8]
@@ -133,13 +245,29 @@ define void @test6() {
 ; We should set the alignment on all load and store operations; make sure
 ; we choose an appropriate alignment.
 ; CHECK-LABEL: @test6(
-; CHECK: alloca double, align 8{{$}}
-; CHECK: alloca double, align 8{{$}}
-; CHECK: store{{.*}}, align 8
-; CHECK: load{{.*}}, align 8
-; CHECK: store{{.*}}, align 8
-; CHECK-NOT: align
-; CHECK: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[A_SROA_2:%.*]] = alloca double, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* [[A_SROA_0]], align 8
+; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_VAL:%.*]] = load double, double* [[A_SROA_0]], align 8
+; CHECK-NEXT:    store volatile double [[A_SROA_0_0_A_SROA_0_0_VAL]], double* [[A_SROA_2]], align 8
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test6(
+; DEBUGLOC-NEXT:  entry:
+; DEBUGLOC-NEXT:    [[A_SROA_0:%.*]] = alloca double, align 8, !dbg [[DBG103:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_2:%.*]] = alloca double, align 8, !dbg [[DBG103]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata [16 x i8]* undef, metadata [[META97:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double* undef, metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105:![0-9]+]]
+; DEBUGLOC-NEXT:    store volatile double 0.000000e+00, double* [[A_SROA_0]], align 8, !dbg [[DBG106:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double* undef, metadata [[META101:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_0_A_SROA_0_0_VAL:%.*]] = load double, double* [[A_SROA_0]], align 8, !dbg [[DBG109:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double [[A_SROA_0_0_A_SROA_0_0_VAL]], metadata [[META102:![0-9]+]], metadata !DIExpression()), !dbg [[DBG109]]
+; DEBUGLOC-NEXT:    store volatile double [[A_SROA_0_0_A_SROA_0_0_VAL]], double* [[A_SROA_2]], align 8, !dbg [[DBG110:![0-9]+]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG111:![0-9]+]]
+;
 
 entry:
   %a = alloca [16 x i8]
@@ -159,7 +287,40 @@ define void @test7(i8* %out) {
 ; Test that we properly compute the destination alignment when rewriting
 ; memcpys as direct loads or stores.
 ; CHECK-LABEL: @test7(
-; CHECK-NOT: alloca
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_0_0_OUT_SROA_CAST:%.*]] = bitcast i8* [[OUT:%.*]] to double*
+; CHECK-NEXT:    [[A_SROA_0_0_COPYLOAD:%.*]] = load double, double* [[A_SROA_0_0_OUT_SROA_CAST]], align 1
+; CHECK-NEXT:    [[A_SROA_4_0_OUT_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[OUT]], i64 8
+; CHECK-NEXT:    [[A_SROA_4_0_OUT_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_4_0_OUT_SROA_IDX]] to double*
+; CHECK-NEXT:    [[A_SROA_4_0_COPYLOAD:%.*]] = load double, double* [[A_SROA_4_0_OUT_SROA_CAST]], align 1
+; CHECK-NEXT:    [[A_SROA_0_0_OUT_SROA_CAST1:%.*]] = bitcast i8* [[OUT]] to double*
+; CHECK-NEXT:    store double [[A_SROA_4_0_COPYLOAD]], double* [[A_SROA_0_0_OUT_SROA_CAST1]], align 1
+; CHECK-NEXT:    [[A_SROA_4_0_OUT_SROA_IDX3:%.*]] = getelementptr inbounds i8, i8* [[OUT]], i64 8
+; CHECK-NEXT:    [[A_SROA_4_0_OUT_SROA_CAST4:%.*]] = bitcast i8* [[A_SROA_4_0_OUT_SROA_IDX3]] to double*
+; CHECK-NEXT:    store double [[A_SROA_0_0_COPYLOAD]], double* [[A_SROA_4_0_OUT_SROA_CAST4]], align 1
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test7(
+; DEBUGLOC-NEXT:  entry:
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata [16 x i8]* undef, metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double* undef, metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG123:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* undef, metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double* undef, metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_0_OUT_SROA_CAST:%.*]] = bitcast i8* [[OUT:%.*]] to double*, !dbg [[DBG126:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_0_COPYLOAD:%.*]] = load double, double* [[A_SROA_0_0_OUT_SROA_CAST]], align 1, !dbg [[DBG126]]
+; DEBUGLOC-NEXT:    [[A_SROA_4_0_OUT_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[OUT]], i64 8, !dbg [[DBG126]]
+; DEBUGLOC-NEXT:    [[A_SROA_4_0_OUT_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_4_0_OUT_SROA_IDX]] to double*, !dbg [[DBG126]]
+; DEBUGLOC-NEXT:    [[A_SROA_4_0_COPYLOAD:%.*]] = load double, double* [[A_SROA_4_0_OUT_SROA_CAST]], align 1, !dbg [[DBG126]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double [[A_SROA_4_0_COPYLOAD]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata double [[A_SROA_0_0_COPYLOAD]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]]
+; DEBUGLOC-NEXT:    [[A_SROA_0_0_OUT_SROA_CAST1:%.*]] = bitcast i8* [[OUT]] to double*, !dbg [[DBG129:![0-9]+]]
+; DEBUGLOC-NEXT:    store double [[A_SROA_4_0_COPYLOAD]], double* [[A_SROA_0_0_OUT_SROA_CAST1]], align 1, !dbg [[DBG129]]
+; DEBUGLOC-NEXT:    [[A_SROA_4_0_OUT_SROA_IDX3:%.*]] = getelementptr inbounds i8, i8* [[OUT]], i64 8, !dbg [[DBG129]]
+; DEBUGLOC-NEXT:    [[A_SROA_4_0_OUT_SROA_CAST4:%.*]] = bitcast i8* [[A_SROA_4_0_OUT_SROA_IDX3]] to double*, !dbg [[DBG129]]
+; DEBUGLOC-NEXT:    store double [[A_SROA_0_0_COPYLOAD]], double* [[A_SROA_4_0_OUT_SROA_CAST4]], align 1, !dbg [[DBG129]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG130:![0-9]+]]
+;
 
 entry:
   %a = alloca [16 x i8]
@@ -169,8 +330,6 @@ entry:
   %ptr2 = bitcast i8* %raw2 to double*
 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i1 false)
-; CHECK: %[[val2:.*]] = load double, double* %{{.*}}, align 1
-; CHECK: %[[val1:.*]] = load double, double* %{{.*}}, align 1
 
   %val1 = load double, double* %ptr2, align 1
   %val2 = load double, double* %ptr1, align 1
@@ -179,20 +338,56 @@ entry:
   store double %val2, double* %ptr2, align 1
 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i1 false)
-; CHECK: store double %[[val1]], double* %{{.*}}, align 1
-; CHECK: store double %[[val2]], double* %{{.*}}, align 1
 
   ret void
-; CHECK: ret void
 }
 
 define void @test8() {
 ; CHECK-LABEL: @test8(
-; CHECK: load i32, {{.*}}, align 1
-; CHECK: load i32, {{.*}}, align 1
-; CHECK: load i32, {{.*}}, align 1
-; CHECK: load i32, {{.*}}, align 1
-; CHECK: load i32, {{.*}}, align 1
+; CHECK-NEXT:    [[PTR:%.*]] = alloca [5 x i32], align 1
+; CHECK-NEXT:    [[PTR_8:%.*]] = bitcast [5 x i32]* [[PTR]] to i8*
+; CHECK-NEXT:    call void @populate(i8* [[PTR_8]])
+; CHECK-NEXT:    [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 0
+; CHECK-NEXT:    [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 1
+; CHECK-NEXT:    [[VAL_FCA_0_INSERT:%.*]] = insertvalue [5 x i32] poison, i32 [[VAL_FCA_0_LOAD]], 0
+; CHECK-NEXT:    [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 1
+; CHECK-NEXT:    [[VAL_FCA_1_LOAD:%.*]] = load i32, i32* [[VAL_FCA_1_GEP]], align 1
+; CHECK-NEXT:    [[VAL_FCA_1_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_0_INSERT]], i32 [[VAL_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 2
+; CHECK-NEXT:    [[VAL_FCA_2_LOAD:%.*]] = load i32, i32* [[VAL_FCA_2_GEP]], align 1
+; CHECK-NEXT:    [[VAL_FCA_2_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_1_INSERT]], i32 [[VAL_FCA_2_LOAD]], 2
+; CHECK-NEXT:    [[VAL_FCA_3_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 3
+; CHECK-NEXT:    [[VAL_FCA_3_LOAD:%.*]] = load i32, i32* [[VAL_FCA_3_GEP]], align 1
+; CHECK-NEXT:    [[VAL_FCA_3_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_2_INSERT]], i32 [[VAL_FCA_3_LOAD]], 3
+; CHECK-NEXT:    [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 4
+; CHECK-NEXT:    [[VAL_FCA_4_LOAD:%.*]] = load i32, i32* [[VAL_FCA_4_GEP]], align 1
+; CHECK-NEXT:    [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test8(
+; DEBUGLOC-NEXT:    [[PTR:%.*]] = alloca [5 x i32], align 1, !dbg [[DBG137:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata [5 x i32]* [[PTR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137]]
+; DEBUGLOC-NEXT:    [[PTR_8:%.*]] = bitcast [5 x i32]* [[PTR]] to i8*, !dbg [[DBG138:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* [[PTR_8]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]]
+; DEBUGLOC-NEXT:    call void @populate(i8* [[PTR_8]]), !dbg [[DBG139:![0-9]+]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 0, !dbg [[DBG140:![0-9]+]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 1, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_INSERT:%.*]] = insertvalue [5 x i32] poison, i32 [[VAL_FCA_0_LOAD]], 0, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 1, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_LOAD:%.*]] = load i32, i32* [[VAL_FCA_1_GEP]], align 1, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_0_INSERT]], i32 [[VAL_FCA_1_LOAD]], 1, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 2, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_LOAD:%.*]] = load i32, i32* [[VAL_FCA_2_GEP]], align 1, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_1_INSERT]], i32 [[VAL_FCA_2_LOAD]], 2, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 3, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_LOAD:%.*]] = load i32, i32* [[VAL_FCA_3_GEP]], align 1, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_2_INSERT]], i32 [[VAL_FCA_3_LOAD]], 3, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 4, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_4_LOAD:%.*]] = load i32, i32* [[VAL_FCA_4_GEP]], align 1, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4, !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata [5 x i32] [[VAL_FCA_4_INSERT]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG140]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG141:![0-9]+]]
+;
 
   %ptr = alloca [5 x i32], align 1
   %ptr.8 = bitcast [5 x i32]* %ptr to i8*
@@ -203,11 +398,50 @@ define void @test8() {
 
 define void @test9() {
 ; CHECK-LABEL: @test9(
-; CHECK: load i32, {{.*}}, align 8
-; CHECK: load i32, {{.*}}, align 4
-; CHECK: load i32, {{.*}}, align 8
-; CHECK: load i32, {{.*}}, align 4
-; CHECK: load i32, {{.*}}, align 8
+; CHECK-NEXT:    [[PTR:%.*]] = alloca [5 x i32], align 8
+; CHECK-NEXT:    [[PTR_8:%.*]] = bitcast [5 x i32]* [[PTR]] to i8*
+; CHECK-NEXT:    call void @populate(i8* [[PTR_8]])
+; CHECK-NEXT:    [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 0
+; CHECK-NEXT:    [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 8
+; CHECK-NEXT:    [[VAL_FCA_0_INSERT:%.*]] = insertvalue [5 x i32] poison, i32 [[VAL_FCA_0_LOAD]], 0
+; CHECK-NEXT:    [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 1
+; CHECK-NEXT:    [[VAL_FCA_1_LOAD:%.*]] = load i32, i32* [[VAL_FCA_1_GEP]], align 4
+; CHECK-NEXT:    [[VAL_FCA_1_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_0_INSERT]], i32 [[VAL_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 2
+; CHECK-NEXT:    [[VAL_FCA_2_LOAD:%.*]] = load i32, i32* [[VAL_FCA_2_GEP]], align 8
+; CHECK-NEXT:    [[VAL_FCA_2_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_1_INSERT]], i32 [[VAL_FCA_2_LOAD]], 2
+; CHECK-NEXT:    [[VAL_FCA_3_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 3
+; CHECK-NEXT:    [[VAL_FCA_3_LOAD:%.*]] = load i32, i32* [[VAL_FCA_3_GEP]], align 4
+; CHECK-NEXT:    [[VAL_FCA_3_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_2_INSERT]], i32 [[VAL_FCA_3_LOAD]], 3
+; CHECK-NEXT:    [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 4
+; CHECK-NEXT:    [[VAL_FCA_4_LOAD:%.*]] = load i32, i32* [[VAL_FCA_4_GEP]], align 8
+; CHECK-NEXT:    [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test9(
+; DEBUGLOC-NEXT:    [[PTR:%.*]] = alloca [5 x i32], align 8, !dbg [[DBG147:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata [5 x i32]* [[PTR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]]
+; DEBUGLOC-NEXT:    [[PTR_8:%.*]] = bitcast [5 x i32]* [[PTR]] to i8*, !dbg [[DBG148:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* [[PTR_8]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148]]
+; DEBUGLOC-NEXT:    call void @populate(i8* [[PTR_8]]), !dbg [[DBG149:![0-9]+]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 0, !dbg [[DBG150:![0-9]+]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 8, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_INSERT:%.*]] = insertvalue [5 x i32] poison, i32 [[VAL_FCA_0_LOAD]], 0, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 1, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_LOAD:%.*]] = load i32, i32* [[VAL_FCA_1_GEP]], align 4, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_0_INSERT]], i32 [[VAL_FCA_1_LOAD]], 1, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 2, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_LOAD:%.*]] = load i32, i32* [[VAL_FCA_2_GEP]], align 8, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_1_INSERT]], i32 [[VAL_FCA_2_LOAD]], 2, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 3, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_LOAD:%.*]] = load i32, i32* [[VAL_FCA_3_GEP]], align 4, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_2_INSERT]], i32 [[VAL_FCA_3_LOAD]], 3, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 4, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_4_LOAD:%.*]] = load i32, i32* [[VAL_FCA_4_GEP]], align 8, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4, !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata [5 x i32] [[VAL_FCA_4_INSERT]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG151:![0-9]+]]
+;
 
   %ptr = alloca [5 x i32], align 8
   %ptr.8 = bitcast [5 x i32]* %ptr to i8*
@@ -218,11 +452,50 @@ define void @test9() {
 
 define void @test10() {
 ; CHECK-LABEL: @test10(
-; CHECK: load i32, {{.*}}, align 2
-; CHECK: load i8, {{.*}}, align 2
-; CHECK: load i8, {{.*}}, align 1
-; CHECK: load i8, {{.*}}, align 2
-; CHECK: load i16, {{.*}}, align 2
+; CHECK-NEXT:    [[PTR:%.*]] = alloca { i32, i8, i8, { i8, i16 } }, align 2
+; CHECK-NEXT:    [[PTR_8:%.*]] = bitcast { i32, i8, i8, { i8, i16 } }* [[PTR]] to i8*
+; CHECK-NEXT:    call void @populate(i8* [[PTR_8]])
+; CHECK-NEXT:    [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 0
+; CHECK-NEXT:    [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 2
+; CHECK-NEXT:    [[VAL_FCA_0_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } poison, i32 [[VAL_FCA_0_LOAD]], 0
+; CHECK-NEXT:    [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 1
+; CHECK-NEXT:    [[VAL_FCA_1_LOAD:%.*]] = load i8, i8* [[VAL_FCA_1_GEP]], align 2
+; CHECK-NEXT:    [[VAL_FCA_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_0_INSERT]], i8 [[VAL_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 2
+; CHECK-NEXT:    [[VAL_FCA_2_LOAD:%.*]] = load i8, i8* [[VAL_FCA_2_GEP]], align 1
+; CHECK-NEXT:    [[VAL_FCA_2_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_1_INSERT]], i8 [[VAL_FCA_2_LOAD]], 2
+; CHECK-NEXT:    [[VAL_FCA_3_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 3, i32 0
+; CHECK-NEXT:    [[VAL_FCA_3_0_LOAD:%.*]] = load i8, i8* [[VAL_FCA_3_0_GEP]], align 2
+; CHECK-NEXT:    [[VAL_FCA_3_0_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_2_INSERT]], i8 [[VAL_FCA_3_0_LOAD]], 3, 0
+; CHECK-NEXT:    [[VAL_FCA_3_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 3, i32 1
+; CHECK-NEXT:    [[VAL_FCA_3_1_LOAD:%.*]] = load i16, i16* [[VAL_FCA_3_1_GEP]], align 2
+; CHECK-NEXT:    [[VAL_FCA_3_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_0_INSERT]], i16 [[VAL_FCA_3_1_LOAD]], 3, 1
+; CHECK-NEXT:    ret void
+;
+; DEBUGLOC-LABEL: @test10(
+; DEBUGLOC-NEXT:    [[PTR:%.*]] = alloca { i32, i8, i8, { i8, i16 } }, align 2, !dbg [[DBG158:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata { i32, i8, i8, { i8, i16 } }* [[PTR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158]]
+; DEBUGLOC-NEXT:    [[PTR_8:%.*]] = bitcast { i32, i8, i8, { i8, i16 } }* [[PTR]] to i8*, !dbg [[DBG159:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i8* [[PTR_8]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159]]
+; DEBUGLOC-NEXT:    call void @populate(i8* [[PTR_8]]), !dbg [[DBG160:![0-9]+]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 0, !dbg [[DBG161:![0-9]+]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 2, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_0_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } poison, i32 [[VAL_FCA_0_LOAD]], 0, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 1, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_LOAD:%.*]] = load i8, i8* [[VAL_FCA_1_GEP]], align 2, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_0_INSERT]], i8 [[VAL_FCA_1_LOAD]], 1, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 2, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_LOAD:%.*]] = load i8, i8* [[VAL_FCA_2_GEP]], align 1, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_2_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_1_INSERT]], i8 [[VAL_FCA_2_LOAD]], 2, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 3, i32 0, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_0_LOAD:%.*]] = load i8, i8* [[VAL_FCA_3_0_GEP]], align 2, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_0_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_2_INSERT]], i8 [[VAL_FCA_3_0_LOAD]], 3, 0, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 3, i32 1, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_1_LOAD:%.*]] = load i16, i16* [[VAL_FCA_3_1_GEP]], align 2, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    [[VAL_FCA_3_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_0_INSERT]], i16 [[VAL_FCA_3_1_LOAD]], 3, 1, !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata { i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_1_INSERT]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG161]]
+; DEBUGLOC-NEXT:    ret void, !dbg [[DBG162:![0-9]+]]
+;
 
   %ptr = alloca {i32, i8, i8, {i8, i16}}, align 2
   %ptr.8 = bitcast {i32, i8, i8, {i8, i16}}* %ptr to i8*
@@ -233,8 +506,28 @@ define void @test10() {
 
 %struct = type { i32, i32 }
 define dso_local i32 @pr45010(%struct* %A) {
-; CHECK-LABEL: @pr45010
-; CHECK: load atomic volatile i32, {{.*}}, align 4
+; CHECK-LABEL: @pr45010(
+; CHECK-NEXT:    [[B_SROA_0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A_I:%.*]] = getelementptr inbounds [[STRUCT:%.*]], %struct* [[A:%.*]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_I]], align 4
+; CHECK-NEXT:    store atomic volatile i32 [[TMP1]], i32* [[B_SROA_0]] release, align 4
+; CHECK-NEXT:    [[B_SROA_0_0_B_SROA_0_0_X:%.*]] = load atomic volatile i32, i32* [[B_SROA_0]] acquire, align 4
+; CHECK-NEXT:    ret i32 [[B_SROA_0_0_B_SROA_0_0_X]]
+;
+; DEBUGLOC-LABEL: @pr45010(
+; DEBUGLOC-NEXT:    [[B_SROA_0:%.*]] = alloca i32, align 4, !dbg [[DBG172:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata %struct* undef, metadata [[META165:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]]
+; DEBUGLOC-NEXT:    [[A_I:%.*]] = getelementptr inbounds [[STRUCT:%.*]], %struct* [[A:%.*]], i32 0, i32 0, !dbg [[DBG173:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i32* [[A_I]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i32* undef, metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174:![0-9]+]]
+; DEBUGLOC-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_I]], align 4, !dbg [[DBG175:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META168:![0-9]+]], metadata !DIExpression()), !dbg [[DBG175]]
+; DEBUGLOC-NEXT:    store atomic volatile i32 [[TMP1]], i32* [[B_SROA_0]] release, align 4, !dbg [[DBG176:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i32* undef, metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]]
+; DEBUGLOC-NEXT:    [[B_SROA_0_0_B_SROA_0_0_X:%.*]] = load atomic volatile i32, i32* [[B_SROA_0]] acquire, align 4, !dbg [[DBG178:![0-9]+]]
+; DEBUGLOC-NEXT:    call void @llvm.dbg.value(metadata i32 [[B_SROA_0_0_B_SROA_0_0_X]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG178]]
+; DEBUGLOC-NEXT:    ret i32 [[B_SROA_0_0_B_SROA_0_0_X]], !dbg [[DBG179:![0-9]+]]
+;
 
   %B = alloca %struct, align 4
   %A.i = getelementptr inbounds %struct, %struct* %A, i32 0, i32 0
diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll
index 0853f9e9ed4a..7538c311aa51 100644
--- a/llvm/test/Transforms/SROA/big-endian.ll
+++ b/llvm/test/Transforms/SROA/big-endian.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -9,11 +10,31 @@ define i8 @test1() {
 ; ordering.
 ;
 ; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i8 0 to i24
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_MASK:%.*]] = and i24 undef, -256
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_EXT]]
+; CHECK-NEXT:    [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i8 0 to i24
+; CHECK-NEXT:    [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i24 [[A_SROA_2_0_INSERT_EXT]], 8
+; CHECK-NEXT:    [[A_SROA_2_0_INSERT_MASK:%.*]] = and i24 [[A_SROA_3_0_INSERT_INSERT]], -65281
+; CHECK-NEXT:    [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_SHIFT]]
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i8 0 to i24
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_SHIFT:%.*]] = shl i24 [[A_SROA_0_0_INSERT_EXT]], 16
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_MASK:%.*]] = and i24 [[A_SROA_2_0_INSERT_INSERT]], 65535
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_SHIFT]]
+; CHECK-NEXT:    [[B_SROA_0_0_EXTRACT_SHIFT:%.*]] = lshr i24 [[A_SROA_0_0_INSERT_INSERT]], 16
+; CHECK-NEXT:    [[B_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[B_SROA_0_0_EXTRACT_SHIFT]] to i8
+; CHECK-NEXT:    [[B_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i24 [[A_SROA_0_0_INSERT_INSERT]], 8
+; CHECK-NEXT:    [[B_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[B_SROA_2_0_EXTRACT_SHIFT]] to i8
+; CHECK-NEXT:    [[B_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[A_SROA_0_0_INSERT_INSERT]] to i8
+; CHECK-NEXT:    [[BSUM0:%.*]] = add i8 [[B_SROA_0_0_EXTRACT_TRUNC]], [[B_SROA_2_0_EXTRACT_TRUNC]]
+; CHECK-NEXT:    [[BSUM1:%.*]] = add i8 [[BSUM0]], [[B_SROA_3_0_EXTRACT_TRUNC]]
+; CHECK-NEXT:    ret i8 [[BSUM1]]
+;
 
 entry:
   %a = alloca [3 x i8]
   %b = alloca [3 x i8]
-; CHECK-NOT: alloca
 
   %a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0
   store i8 0, i8* %a0ptr
@@ -23,19 +44,6 @@ entry:
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
   %ai = load i24, i24* %aiptr
-; CHECK-NOT: store
-; CHECK-NOT: load
-; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
-; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
-; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
-; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
-; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
-; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
-; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
-; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
-; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16
-; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535
-; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]]
 
   %biptr = bitcast [3 x i8]* %b to i24*
   store i24 %ai, i24* %biptr
@@ -45,20 +53,10 @@ entry:
   %b1 = load i8, i8* %b1ptr
   %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2
   %b2 = load i8, i8* %b2ptr
-; CHECK-NOT: store
-; CHECK-NOT: load
-; CHECK:      %[[shift0:.*]] = lshr i24 %[[insert0]], 16
-; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
-; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
-; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
-; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8
 
   %bsum0 = add i8 %b0, %b1
   %bsum1 = add i8 %bsum0, %b2
   ret i8 %bsum1
-; CHECK:      %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
-; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
-; CHECK-NEXT: ret i8 %[[sum1]]
 }
 
 define i64 @test2() {
@@ -66,18 +64,37 @@ define i64 @test2() {
 ; promoted.
 ;
 ; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_2_SROA_4_0_INSERT_EXT:%.*]] = zext i8 1 to i40
+; CHECK-NEXT:    [[A_SROA_2_SROA_4_0_INSERT_MASK:%.*]] = and i40 undef, -256
+; CHECK-NEXT:    [[A_SROA_2_SROA_4_0_INSERT_INSERT:%.*]] = or i40 [[A_SROA_2_SROA_4_0_INSERT_MASK]], [[A_SROA_2_SROA_4_0_INSERT_EXT]]
+; CHECK-NEXT:    [[A_SROA_2_SROA_3_0_INSERT_EXT:%.*]] = zext i24 0 to i40
+; CHECK-NEXT:    [[A_SROA_2_SROA_3_0_INSERT_SHIFT:%.*]] = shl i40 [[A_SROA_2_SROA_3_0_INSERT_EXT]], 8
+; CHECK-NEXT:    [[A_SROA_2_SROA_3_0_INSERT_MASK:%.*]] = and i40 [[A_SROA_2_SROA_4_0_INSERT_INSERT]], -4294967041
+; CHECK-NEXT:    [[A_SROA_2_SROA_3_0_INSERT_INSERT:%.*]] = or i40 [[A_SROA_2_SROA_3_0_INSERT_MASK]], [[A_SROA_2_SROA_3_0_INSERT_SHIFT]]
+; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_INSERT_EXT:%.*]] = zext i8 0 to i40
+; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_INSERT_SHIFT:%.*]] = shl i40 [[A_SROA_2_SROA_0_0_INSERT_EXT]], 32
+; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_INSERT_MASK:%.*]] = and i40 [[A_SROA_2_SROA_3_0_INSERT_INSERT]], 4294967295
+; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_INSERT_INSERT:%.*]] = or i40 [[A_SROA_2_SROA_0_0_INSERT_MASK]], [[A_SROA_2_SROA_0_0_INSERT_SHIFT]]
+; CHECK-NEXT:    [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i40 [[A_SROA_2_SROA_0_0_INSERT_INSERT]] to i56
+; CHECK-NEXT:    [[A_SROA_2_0_INSERT_MASK:%.*]] = and i56 undef, -1099511627776
+; CHECK-NEXT:    [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i56 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_EXT]]
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i16 1 to i56
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_SHIFT:%.*]] = shl i56 [[A_SROA_0_0_INSERT_EXT]], 40
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_MASK:%.*]] = and i56 [[A_SROA_2_0_INSERT_INSERT]], 1099511627775
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i56 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_SHIFT]]
+; CHECK-NEXT:    [[RET:%.*]] = zext i56 [[A_SROA_0_0_INSERT_INSERT]] to i64
+; CHECK-NEXT:    ret i64 [[RET]]
+;
 
 entry:
   %a = alloca [7 x i8]
-; CHECK-NOT: alloca
 
   %a0ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 0
   %a1ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 1
   %a2ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 2
   %a3ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 3
 
-; CHECK-NOT: store
-; CHECK-NOT: load
 
   %a0i16ptr = bitcast i8* %a0ptr to i16*
   store i16 1, i16* %a0i16ptr
@@ -92,44 +109,32 @@ entry:
 
 ; the alloca is splitted into multiple slices
 ; Here, i8 1 is for %a[6]
-; CHECK: %[[ext1:.*]] = zext i8 1 to i40
-; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, -256
-; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], %[[ext1]]
 
 ; Here, i24 0 is for %a[3] to %a[5]
-; CHECK-NEXT: %[[ext2:.*]] = zext i24 0 to i40
-; CHECK-NEXT: %[[shift2:.*]] = shl i40 %[[ext2]], 8
-; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
-; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], %[[shift2]]
 
 ; Here, i8 0 is for %a[2]
-; CHECK-NEXT: %[[ext3:.*]] = zext i8 0 to i40
-; CHECK-NEXT: %[[shift3:.*]] = shl i40 %[[ext3]], 32
-; CHECK-NEXT: %[[mask3:.*]] = and i40 %[[insert2]], 4294967295
-; CHECK-NEXT: %[[insert3:.*]] = or i40 %[[mask3]], %[[shift3]]
 
-; CHECK-NEXT: %[[ext4:.*]] = zext i40 %[[insert3]] to i56
-; CHECK-NEXT: %[[mask4:.*]] = and i56 undef, -1099511627776
-; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[ext4]]
 
-; CHECK-NOT: store
-; CHECK-NOT: load
 
   %aiptr = bitcast [7 x i8]* %a to i56*
   %ai = load i56, i56* %aiptr
   %ret = zext i56 %ai to i64
   ret i64 %ret
 ; Here, i16 1 is for %a[0] to %a[1]
-; CHECK-NEXT: %[[ext5:.*]] = zext i16 1 to i56
-; CHECK-NEXT: %[[shift5:.*]] = shl i56 %[[ext5]], 40
-; CHECK-NEXT: %[[mask5:.*]] = and i56 %[[insert4]], 1099511627775
-; CHECK-NEXT: %[[insert5:.*]] = or i56 %[[mask5]], %[[shift5]]
-; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert5]] to i64
-; CHECK-NEXT: ret i64 %[[ret]]
 }
 
 define i64 @PR14132(i1 %flag) {
 ; CHECK-LABEL: @PR14132(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[B_0_LOAD_EXT:%.*]] = zext i8 1 to i64
+; CHECK-NEXT:    [[B_0_ENDIAN_SHIFT:%.*]] = shl i64 [[B_0_LOAD_EXT]], 56
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[PTR_0_SROA_SPECULATED:%.*]] = phi i64 [ [[B_0_ENDIAN_SHIFT]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i64 [[PTR_0_SROA_SPECULATED]]
+;
 ; Here we form a PHI-node by promoting the pointer alloca first, and then in
 ; order to promote the other two allocas, we speculate the load of the
 ; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
@@ -142,7 +147,6 @@ entry:
   %a = alloca i64, align 8
   %b = alloca i8, align 8
   %ptr = alloca i64*, align 8
-; CHECK-NOT: alloca
 
   %ptr.cast = bitcast i64** %ptr to i8**
   store i64 0, i64* %a
@@ -153,24 +157,28 @@ entry:
 if.then:
   store i8* %b, i8** %ptr.cast
   br label %if.end
-; CHECK-NOT: store
-; CHECK: %[[ext:.*]] = zext i8 1 to i64
-; CHECK: %[[shift:.*]] = shl i64 %[[ext]], 56
 
 if.end:
   %tmp = load i64*, i64** %ptr
   %result = load i64, i64* %tmp
-; CHECK-NOT: load
-; CHECK: %[[result:.*]] = phi i64 [ %[[shift]], %if.then ], [ 0, %entry ]
 
   ret i64 %result
-; CHECK-NEXT: ret i64 %[[result]]
 }
 
 declare void @f(i64 %x, i32 %y)
 
 define void @test3() {
 ; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i32 134316040 to i64
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_MASK:%.*]] = and i64 undef, -4294967296
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_EXT]]
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i32 8 to i64
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_SHIFT:%.*]] = shl i64 [[A_SROA_0_0_INSERT_EXT]], 32
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_MASK:%.*]] = and i64 [[A_SROA_3_0_INSERT_INSERT]], 4294967295
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_SHIFT]]
+; CHECK-NEXT:    call void @f(i64 [[A_SROA_0_0_INSERT_INSERT]], i32 8)
+; CHECK-NEXT:    ret void
 ;
 ; This is a test that specifically exercises the big-endian lowering because it
 ; ends up splitting a 64-bit integer into two smaller integers and has a number
@@ -178,75 +186,61 @@ define void @test3() {
 ; would miscompile this by either dropping a most significant byte or least
 ; significant byte due to shrinking the [4,8) slice to an i24, or by failing to
 ; move the bytes around correctly.
-;
 ; The magical number 34494054408 is used because it has bits set in various
 ; bytes so that it is clear if those bytes fail to be propagated.
-;
 ; If you're debugging this, rather than using the direct magical numbers, run
 ; the IR through '-sroa -instcombine'. With '-instcombine' these will be
 ; constant folded, and if the i64 doesn't round-trip correctly, you've found
 ; a bug!
-;
 entry:
   %a = alloca { i32, i24 }, align 4
-; CHECK-NOT: alloca
 
   %tmp0 = bitcast { i32, i24 }* %a to i64*
   store i64 34494054408, i64* %tmp0
   %tmp1 = load i64, i64* %tmp0, align 4
   %tmp2 = bitcast { i32, i24 }* %a to i32*
   %tmp3 = load i32, i32* %tmp2, align 4
-; CHECK: %[[HI_EXT:.*]] = zext i32 134316040 to i64
-; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
-; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
-; CHECK: %[[LO_EXT:.*]] = zext i32 8 to i64
-; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
-; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
-; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
 
   call void @f(i64 %tmp1, i32 %tmp3)
-; CHECK: call void @f(i64 %[[LO_MERGE]], i32 8)
   ret void
-; CHECK: ret void
 }
 
 define void @test4() {
-; CHECK-LABEL: @test4
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_SROA_0_0_EXTRACT_SHIFT:%.*]] = lshr i64 34494054408, 32
+; CHECK-NEXT:    [[A_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A_SROA_0_0_EXTRACT_SHIFT]] to i32
+; CHECK-NEXT:    [[A_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i64 34494054408 to i32
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i32 [[A_SROA_3_0_EXTRACT_TRUNC]] to i64
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_MASK:%.*]] = and i64 undef, -4294967296
+; CHECK-NEXT:    [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_EXT]]
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[A_SROA_0_0_EXTRACT_TRUNC]] to i64
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_SHIFT:%.*]] = shl i64 [[A_SROA_0_0_INSERT_EXT]], 32
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_MASK:%.*]] = and i64 [[A_SROA_3_0_INSERT_INSERT]], 4294967295
+; CHECK-NEXT:    [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_SHIFT]]
+; CHECK-NEXT:    call void @f(i64 [[A_SROA_0_0_INSERT_INSERT]], i32 [[A_SROA_0_0_EXTRACT_TRUNC]])
+; CHECK-NEXT:    ret void
 ;
 ; Much like @test3, this is specifically testing big-endian management of data.
 ; Also similarly, it uses constants with particular bits set to help track
 ; whether values are corrupted, and can be easily evaluated by running through
 ; -instcombine to see that the i64 round-trips.
-;
 entry:
   %a = alloca { i32, i24 }, align 4
   %a2 = alloca i64, align 4
-; CHECK-NOT: alloca
 
   store i64 34494054408, i64* %a2
   %tmp0 = bitcast { i32, i24 }* %a to i8*
   %tmp1 = bitcast i64* %a2 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp0, i8* align 4 %tmp1, i64 8, i1 false)
-; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32
-; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32
-; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32
 
   %tmp2 = bitcast { i32, i24 }* %a to i64*
   %tmp3 = load i64, i64* %tmp2, align 4
   %tmp4 = bitcast { i32, i24 }* %a to i32*
   %tmp5 = load i32, i32* %tmp4, align 4
-; CHECK: %[[HI_EXT:.*]] = zext i32 %[[HI_START]] to i64
-; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
-; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
-; CHECK: %[[LO_EXT:.*]] = zext i32 %[[LO_START]] to i64
-; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
-; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
-; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
 
   call void @f(i64 %tmp3, i32 %tmp5)
-; CHECK: call void @f(i64 %[[LO_MERGE]], i32 %[[LO_START]])
   ret void
-; CHECK: ret void
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
diff --git a/llvm/test/Transforms/SROA/dbg-inline.ll b/llvm/test/Transforms/SROA/dbg-inline.ll
index b3b3660f6414..27b5d68961a2 100644
--- a/llvm/test/Transforms/SROA/dbg-inline.ll
+++ b/llvm/test/Transforms/SROA/dbg-inline.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; Test that SROA can deal with allocas that have more than one
 ; dbg.declare hanging off of it.
 
@@ -10,16 +11,20 @@ target triple = "x86_64-apple-macosx10.15.0"
 
 ; Function Attrs: noinline optnone ssp uwtable
 define i64 @_Z1g4pair(i64 %p.coerce0, i64 %p.coerce1) #0 !dbg !8 {
+; CHECK-LABEL: @_Z1g4pair(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[P_COERCE0:%.*]], metadata [[META16:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg [[DBG17:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[P_COERCE0]], metadata [[META18:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg [[DBG20:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[P_COERCE1:%.*]], metadata [[META16]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG17]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[P_COERCE1]], metadata [[META18]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG20]]
+; CHECK-NEXT:    ret i64 [[P_COERCE0]], !dbg [[DBG22:![0-9]+]]
+;
 entry:
   %p = alloca %struct.pair, align 8
   %0 = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 0
   store i64 %p.coerce0, i64* %0, align 8
   %1 = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 1
   store i64 %p.coerce1, i64* %1, align 8
-  ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce0, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg ![[LOC:[0-9]+]]
-  ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce1, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg ![[LOC]]
-  ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce0, metadata ![[INLINED_VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg ![[INLINED_LOC:[0-9]+]]
-  ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce1, metadata ![[INLINED_VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg ![[INLINED_LOC]]
   call void @llvm.dbg.declare(metadata %struct.pair* %p, metadata !17, metadata !DIExpression()), !dbg !18
   call void @llvm.dbg.declare(metadata %struct.pair* %p, metadata !21, metadata !DIExpression()), !dbg !23
   %a.i = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 0, !dbg !25
@@ -57,7 +62,6 @@ attributes #2 = { argmemonly nounwind willreturn }
 !15 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !13, file: !9, line: 1, baseType: !12, size: 64)
 !16 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !13, file: !9, line: 1, baseType: !12, size: 64, offset: 64)
 !17 = !DILocalVariable(name: "p", arg: 1, scope: !8, file: !9, line: 9, type: !13)
-; CHECK: ![[LOC]] = !DILocation
 ; CHECK-NOT: inlinedAt
 ; CHECK: =
 !18 = !DILocation(line: 9, column: 27, scope: !8)
@@ -65,7 +69,6 @@ attributes #2 = { argmemonly nounwind willreturn }
 !20 = !DILocation(line: 10, column: 10, scope: !8)
 !21 = !DILocalVariable(name: "p", arg: 1, scope: !22, file: !9, line: 5, type: !13)
 !22 = distinct !DISubprogram(name: "f", linkageName: "_ZL1f4pair", scope: !9, file: !9, line: 5, type: !10, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !2)
-; CHECK: ![[INLINED_LOC]] = !DILocation({{.*}}inlinedAt
 !23 = !DILocation(line: 5, column: 27, scope: !22, inlinedAt: !24)
 !24 = distinct !DILocation(line: 10, column: 10, scope: !8)
 !25 = !DILocation(line: 6, column: 12, scope: !22, inlinedAt: !24)
diff --git a/llvm/test/Transforms/SROA/dbg-single-piece.ll b/llvm/test/Transforms/SROA/dbg-single-piece.ll
index d9eb41b34772..55aa3070aabc 100644
--- a/llvm/test/Transforms/SROA/dbg-single-piece.ll
+++ b/llvm/test/Transforms/SROA/dbg-single-piece.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=sroa %s -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -5,14 +6,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
 define void @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE() {
+; CHECK-LABEL: @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata %foo* undef, metadata [[META3:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
 entry:
   %retval = alloca %foo, align 8
   call void @llvm.dbg.declare(metadata %foo* %retval, metadata !1, metadata !7), !dbg !8
 ; Checks that SROA still inserts a bit_piece expression, even if it produces only one piece
 ; (as long as that piece is smaller than the whole thing)
-; CHECK-NOT: call void @llvm.dbg.value
-; CHECK: call void @llvm.dbg.value(metadata %foo* undef, {{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg
-; CHECK-NOT: call void @llvm.dbg.value
   %0 = bitcast %foo* %retval to i8*
   %1 = getelementptr inbounds i8, i8* %0, i64 8
   %2 = bitcast i8* %1 to %foo**
diff --git a/llvm/test/Transforms/SROA/dead-inst.ll b/llvm/test/Transforms/SROA/dead-inst.ll
index 083c8a6221e1..fe320c790b39 100644
--- a/llvm/test/Transforms/SROA/dead-inst.ll
+++ b/llvm/test/Transforms/SROA/dead-inst.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; SROA fails to rewrite allocs but does rewrite some phis and delete
 ; dead instructions. Ensure that this invalidates analyses required
 ; for other passes.
@@ -23,8 +24,8 @@ define hidden fastcc void @H(%class.b* noalias nocapture readnone, [2 x i64]) un
   store i64 0, i64* %.sroa.0, align 8
   %4 = extractvalue [2 x i64] %1, 1
   switch i64 %4, label %6 [
-    i64 4, label %foo
-    i64 5, label %5
+  i64 4, label %foo
+  i64 5, label %5
   ]
 
 ; <label>:5:
diff --git a/llvm/test/Transforms/SROA/fca.ll b/llvm/test/Transforms/SROA/fca.ll
index 19be9e79a7cd..5174751c1682 100644
--- a/llvm/test/Transforms/SROA/fca.ll
+++ b/llvm/test/Transforms/SROA/fca.ll
@@ -1,12 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 define { i32, i32 } @test0(i32 %x, i32 %y) {
 ; CHECK-LABEL: @test0(
-; CHECK-NOT: alloca
-; CHECK: insertvalue { i32, i32 }
-; CHECK: insertvalue { i32, i32 }
-; CHECK: ret { i32, i32 }
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT_FCA_0_INSERT:%.*]] = insertvalue { i32, i32 } poison, i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[RESULT_FCA_1_INSERT:%.*]] = insertvalue { i32, i32 } [[RESULT_FCA_0_INSERT]], i32 [[Y:%.*]], 1
+; CHECK-NEXT:    ret { i32, i32 } [[RESULT_FCA_1_INSERT]]
+;
 
 entry:
   %a = alloca { i32, i32 }
@@ -27,11 +29,17 @@ define { i32, i32 } @test1(i32 %x, i32 %y) {
 ; split the volatile load and store here but must produce volatile scalar loads
 ; and stores from them.
 ; CHECK-LABEL: @test1(
-; CHECK: alloca
-; CHECK: alloca
-; CHECK: load volatile { i32, i32 }, { i32, i32 }*
-; CHECK: store volatile { i32, i32 }
-; CHECK: ret { i32, i32 }
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT:    [[B:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT:    [[A_0_GEP1_SROA_IDX:%.*]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* [[A]], i64 0, i32 0
+; CHECK-NEXT:    store i32 [[X:%.*]], i32* [[A_0_GEP1_SROA_IDX]], align 8
+; CHECK-NEXT:    [[A_4_GEP2_SROA_IDX:%.*]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* [[A]], i64 0, i32 1
+; CHECK-NEXT:    store i32 [[Y:%.*]], i32* [[A_4_GEP2_SROA_IDX]], align 4
+; CHECK-NEXT:    [[A_0_RESULT:%.*]] = load volatile { i32, i32 }, { i32, i32 }* [[A]], align 8
+; CHECK-NEXT:    store volatile { i32, i32 } [[A_0_RESULT]], { i32, i32 }* [[B]], align 8
+; CHECK-NEXT:    ret { i32, i32 } [[A_0_RESULT]]
+;
 
 entry:
   %a = alloca { i32, i32 }
diff --git a/llvm/test/Transforms/SROA/preserve-nonnull.ll b/llvm/test/Transforms/SROA/preserve-nonnull.ll
index 81b23cb93dc4..dcd50fb882ec 100644
--- a/llvm/test/Transforms/SROA/preserve-nonnull.ll
+++ b/llvm/test/Transforms/SROA/preserve-nonnull.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
 ;
 ; Make sure that SROA doesn't lose nonnull metadata
@@ -7,13 +8,14 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
 
 ; Check that we do basic propagation of nonnull when rewriting.
 define i8* @propagate_nonnull(i32* %v) {
-; CHECK-LABEL: define i8* @propagate_nonnull(
+; CHECK-LABEL: @propagate_nonnull(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    %[[A:.*]] = alloca i8*
-; CHECK-NEXT:    %[[V_CAST:.*]] = bitcast i32* %v to i8*
-; CHECK-NEXT:    store i8* %[[V_CAST]], i8** %[[A]]
-; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], align 8, !nonnull !0
-; CHECK-NEXT:    ret i8* %[[LOAD]]
+; CHECK-NEXT:    [[A_SROA_1:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[V:%.*]] to i8*
+; CHECK-NEXT:    store i8* [[TMP0]], i8** [[A_SROA_1]], align 8
+; CHECK-NEXT:    [[A_SROA_1_0_A_SROA_1_8_LOAD:%.*]] = load volatile i8*, i8** [[A_SROA_1]], align 8, !nonnull !0
+; CHECK-NEXT:    ret i8* [[A_SROA_1_0_A_SROA_1_8_LOAD]]
+;
 entry:
   %a = alloca [2 x i8*]
   %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
@@ -27,12 +29,13 @@ entry:
 }
 
 define float* @turn_nonnull_into_assume(float** %arg) {
-; CHECK-LABEL: define float* @turn_nonnull_into_assume(
+; CHECK-LABEL: @turn_nonnull_into_assume(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
-; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
-; CHECK-NEXT:    call void @llvm.assume(i1 %[[ASSUME]])
-; CHECK-NEXT:    ret float* %[[RETURN]]
+; CHECK-NEXT:    [[BUF_0_COPYLOAD:%.*]] = load float*, float** [[ARG:%.*]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ne float* [[BUF_0_COPYLOAD]], null
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
+; CHECK-NEXT:    ret float* [[BUF_0_COPYLOAD]]
+;
 entry:
   %buf = alloca float*
   %_arg_i8 = bitcast float** %arg to i8*
@@ -49,12 +52,13 @@ entry:
 ; *does* initially, but then we lose that !range metadata before we finish
 ; SROA.
 define i8* @propagate_nonnull_to_int() {
-; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
+; CHECK-LABEL: @propagate_nonnull_to_int(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    %[[A:.*]] = alloca i8*
-; CHECK-NEXT:    store i8* inttoptr (i64 42 to i8*), i8** %[[A]]
-; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i8*, i8** %[[A]]
-; CHECK-NEXT:    ret i8* %[[LOAD]]
+; CHECK-NEXT:    [[A_SROA_1:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    store i8* inttoptr (i64 42 to i8*), i8** [[A_SROA_1]], align 8
+; CHECK-NEXT:    [[A_SROA_1_0_A_SROA_1_8_LOAD:%.*]] = load volatile i8*, i8** [[A_SROA_1]], align 8, !nonnull !0
+; CHECK-NEXT:    ret i8* [[A_SROA_1_0_A_SROA_1_8_LOAD]]
+;
 entry:
   %a = alloca [2 x i8*]
   %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
@@ -72,9 +76,10 @@ entry:
 ; register. This can fail in interesting ways due to the rewrite iteration of
 ; SROA, resulting in PR32902.
 define i8* @propagate_nonnull_to_int_and_promote() {
-; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
+; CHECK-LABEL: @propagate_nonnull_to_int_and_promote(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret i8* inttoptr (i64 42 to i8*)
+;
 entry:
   %a = alloca [2 x i8*], align 8
   %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
diff --git a/llvm/test/Transforms/SROA/slice-order-independence.ll b/llvm/test/Transforms/SROA/slice-order-independence.ll
index 2e06e5dd1f77..7126662baa74 100644
--- a/llvm/test/Transforms/SROA/slice-order-independence.ll
+++ b/llvm/test/Transforms/SROA/slice-order-independence.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
@@ -7,8 +8,19 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) n
 ; slices even in case of types that are skipped because their width is not a
 ; byte width multiple
 define void @skipped_inttype_first({ i16*, i32 }*) {
-; CHECK-LABEL: @skipped_inttype_first
-; CHECK: alloca i8*
+; CHECK-LABEL: @skipped_inttype_first(
+; CHECK-NEXT:    [[ARG_SROA_0:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[ARG_SROA_0_0__SROA_CAST:%.*]] = bitcast { i16*, i32 }* [[TMP0:%.*]] to i8**
+; CHECK-NEXT:    [[ARG_SROA_0_0_COPYLOAD:%.*]] = load i8*, i8** [[ARG_SROA_0_0__SROA_CAST]], align 8
+; CHECK-NEXT:    store i8* [[ARG_SROA_0_0_COPYLOAD]], i8** [[ARG_SROA_0]], align 8
+; CHECK-NEXT:    [[ARG_SROA_3_0__SROA_IDX1:%.*]] = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* [[TMP0]], i64 0, i32 1
+; CHECK-NEXT:    [[ARG_SROA_3_0__SROA_CAST:%.*]] = bitcast i32* [[ARG_SROA_3_0__SROA_IDX1]] to i64*
+; CHECK-NEXT:    [[ARG_SROA_3_0_COPYLOAD:%.*]] = load i64, i64* [[ARG_SROA_3_0__SROA_CAST]], align 8
+; CHECK-NEXT:    [[ARG_SROA_0_0_PB0_SROA_CAST2:%.*]] = bitcast i8** [[ARG_SROA_0]] to i63*
+; CHECK-NEXT:    [[ARG_SROA_0_0_ARG_SROA_0_0_B0:%.*]] = load i63, i63* [[ARG_SROA_0_0_PB0_SROA_CAST2]], align 8
+; CHECK-NEXT:    [[ARG_SROA_0_0_ARG_SROA_0_0_B1:%.*]] = load i8*, i8** [[ARG_SROA_0]], align 8
+; CHECK-NEXT:    ret void
+;
   %arg = alloca { i16*, i32 }, align 8
   %2 = bitcast { i16*, i32 }* %0 to i8*
   %3 = bitcast { i16*, i32 }* %arg to i8*
@@ -22,8 +34,19 @@ define void @skipped_inttype_first({ i16*, i32 }*) {
 }
 
 define void @skipped_inttype_last({ i16*, i32 }*) {
-; CHECK-LABEL: @skipped_inttype_last
-; CHECK: alloca i8*
+; CHECK-LABEL: @skipped_inttype_last(
+; CHECK-NEXT:    [[ARG_SROA_0:%.*]] = alloca i8*, align 8
+; CHECK-NEXT:    [[ARG_SROA_0_0__SROA_CAST:%.*]] = bitcast { i16*, i32 }* [[TMP0:%.*]] to i8**
+; CHECK-NEXT:    [[ARG_SROA_0_0_COPYLOAD:%.*]] = load i8*, i8** [[ARG_SROA_0_0__SROA_CAST]], align 8
+; CHECK-NEXT:    store i8* [[ARG_SROA_0_0_COPYLOAD]], i8** [[ARG_SROA_0]], align 8
+; CHECK-NEXT:    [[ARG_SROA_3_0__SROA_IDX1:%.*]] = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* [[TMP0]], i64 0, i32 1
+; CHECK-NEXT:    [[ARG_SROA_3_0__SROA_CAST:%.*]] = bitcast i32* [[ARG_SROA_3_0__SROA_IDX1]] to i64*
+; CHECK-NEXT:    [[ARG_SROA_3_0_COPYLOAD:%.*]] = load i64, i64* [[ARG_SROA_3_0__SROA_CAST]], align 8
+; CHECK-NEXT:    [[ARG_SROA_0_0_ARG_SROA_0_0_B1:%.*]] = load i8*, i8** [[ARG_SROA_0]], align 8
+; CHECK-NEXT:    [[ARG_SROA_0_0_PB0_SROA_CAST2:%.*]] = bitcast i8** [[ARG_SROA_0]] to i63*
+; CHECK-NEXT:    [[ARG_SROA_0_0_ARG_SROA_0_0_B0:%.*]] = load i63, i63* [[ARG_SROA_0_0_PB0_SROA_CAST2]], align 8
+; CHECK-NEXT:    ret void
+;
   %arg = alloca { i16*, i32 }, align 8
   %2 = bitcast { i16*, i32 }* %0 to i8*
   %3 = bitcast { i16*, i32 }* %arg to i8*
diff --git a/llvm/test/Transforms/SROA/vector-conversion.ll b/llvm/test/Transforms/SROA/vector-conversion.ll
index ae3e4dc52030..b57cb027643b 100644
--- a/llvm/test/Transforms/SROA/vector-conversion.ll
+++ b/llvm/test/Transforms/SROA/vector-conversion.ll
@@ -1,91 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 define <4 x i64> @vector_ptrtoint({<2 x i32*>, <2 x i32*>} %x) {
-; CHECK-LABEL: @vector_ptrtoint
+; CHECK-LABEL: @vector_ptrtoint(
+; CHECK-NEXT:    [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i32*>, <2 x i32*> } [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x i32*> [[X_FCA_0_EXTRACT]] to <2 x i64>
+; CHECK-NEXT:    [[A_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[A_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i64> [[A_SROA_0_0_VEC_EXPAND]], <4 x i64> undef
+; CHECK-NEXT:    [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i32*>, <2 x i32*> } [[X]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint <2 x i32*> [[X_FCA_1_EXTRACT]] to <2 x i64>
+; CHECK-NEXT:    [[A_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT:    [[A_SROA_0_16_VECBLEND:%.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i64> [[A_SROA_0_16_VEC_EXPAND]], <4 x i64> [[A_SROA_0_0_VECBLEND]]
+; CHECK-NEXT:    ret <4 x i64> [[A_SROA_0_16_VECBLEND]]
+;
   %a = alloca {<2 x i32*>, <2 x i32*>}
-; CHECK-NOT: alloca
 
   store {<2 x i32*>, <2 x i32*>} %x, {<2 x i32*>, <2 x i32*>}* %a
-; CHECK-NOT: store
 
   %cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>*
   %vec = load <4 x i64>, <4 x i64>* %cast
-; CHECK-NOT: load
-; CHECK: ptrtoint
 
   ret <4 x i64> %vec
 }
 
 define <4 x i32*> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) {
-; CHECK-LABEL: @vector_inttoptr
+; CHECK-LABEL: @vector_inttoptr(
+; CHECK-NEXT:    [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr <2 x i64> [[X_FCA_0_EXTRACT]] to <2 x i32*>
+; CHECK-NEXT:    [[A_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32*> [[TMP1]], <2 x i32*> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[A_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32*> [[A_SROA_0_0_VEC_EXPAND]], <4 x i32*> undef
+; CHECK-NEXT:    [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[X]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr <2 x i64> [[X_FCA_1_EXTRACT]] to <2 x i32*>
+; CHECK-NEXT:    [[A_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <2 x i32*> [[TMP2]], <2 x i32*> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT:    [[A_SROA_0_16_VECBLEND:%.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32*> [[A_SROA_0_16_VEC_EXPAND]], <4 x i32*> [[A_SROA_0_0_VECBLEND]]
+; CHECK-NEXT:    ret <4 x i32*> [[A_SROA_0_16_VECBLEND]]
+;
   %a = alloca {<2 x i64>, <2 x i64>}
-; CHECK-NOT: alloca
 
   store {<2 x i64>, <2 x i64>} %x, {<2 x i64>, <2 x i64>}* %a
-; CHECK-NOT: store
 
   %cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>*
   %vec = load <4 x i32*>, <4 x i32*>* %cast
-; CHECK-NOT: load
-; CHECK: inttoptr
 
   ret <4 x i32*> %vec
 }
 
 define <2 x i64> @vector_ptrtointbitcast({<1 x i32*>, <1 x i32*>} %x) {
 ; CHECK-LABEL: @vector_ptrtointbitcast(
+; CHECK-NEXT:    [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <1 x i32*>, <1 x i32*> } [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <1 x i32*> [[X_FCA_0_EXTRACT]] to <1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64
+; CHECK-NEXT:    [[A_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
+; CHECK-NEXT:    [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <1 x i32*>, <1 x i32*> } [[X]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint <1 x i32*> [[X_FCA_1_EXTRACT]] to <1 x i64>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to i64
+; CHECK-NEXT:    [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[A_SROA_0_0_VEC_INSERT]], i64 [[TMP4]], i32 1
+; CHECK-NEXT:    ret <2 x i64> [[A_SROA_0_8_VEC_INSERT]]
+;
   %a = alloca {<1 x i32*>, <1 x i32*>}
-; CHECK-NOT: alloca
 
   store {<1 x i32*>, <1 x i32*>} %x, {<1 x i32*>, <1 x i32*>}* %a
-; CHECK-NOT: store
 
   %cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>*
   %vec = load <2 x i64>, <2 x i64>* %cast
-; CHECK-NOT: load
-; CHECK: ptrtoint
-; CHECK: bitcast
-; CHECK: ptrtoint
-; CHECK: bitcast
 
   ret <2 x i64> %vec
 }
 
 define <2 x i8*> @vector_inttoptrbitcast_vector({<16 x i8>, <16 x i8>} %x) {
 ; CHECK-LABEL: @vector_inttoptrbitcast_vector(
+; CHECK-NEXT:    [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[X:%.*]], 0
+; CHECK-NEXT:    [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[X]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[X_FCA_0_EXTRACT]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr <2 x i64> [[TMP1]] to <2 x i8*>
+; CHECK-NEXT:    ret <2 x i8*> [[TMP2]]
+;
   %a = alloca {<16 x i8>, <16 x i8>}
-; CHECK-NOT: alloca
 
   store {<16 x i8>, <16 x i8>} %x, {<16 x i8>, <16 x i8>}* %a
-; CHECK-NOT: store
 
   %cast = bitcast {<16 x i8>, <16 x i8>}* %a to <2 x i8*>*
   %vec = load <2 x i8*>, <2 x i8*>* %cast
-; CHECK-NOT: load
-; CHECK: extractvalue
-; CHECK: extractvalue
-; CHECK: bitcast
-; CHECK: inttoptr
 
   ret <2 x i8*> %vec
 }
 
 define <16 x i8> @vector_ptrtointbitcast_vector({<2 x i8*>, <2 x i8*>} %x) {
 ; CHECK-LABEL: @vector_ptrtointbitcast_vector(
+; CHECK-NEXT:    [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i8*>, <2 x i8*> } [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x i8*> [[X_FCA_0_EXTRACT]] to <2 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+; CHECK-NEXT:    [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i8*>, <2 x i8*> } [[X]], 1
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+;
   %a = alloca {<2 x i8*>, <2 x i8*>}
-; CHECK-NOT: alloca
 
   store {<2 x i8*>, <2 x i8*>} %x, {<2 x i8*>, <2 x i8*>}* %a
-; CHECK-NOT: store
 
   %cast = bitcast {<2 x i8*>, <2 x i8*>}* %a to <16 x i8>*
   %vec = load <16 x i8>, <16 x i8>* %cast
-; CHECK-NOT: load
-; CHECK: extractvalue
-; CHECK: ptrtoint
-; CHECK: bitcast
-; CHECK: extractvalue
 
   ret <16 x i8> %vec
 }
diff --git a/llvm/test/Transforms/SROA/vector-promotion-different-size.ll b/llvm/test/Transforms/SROA/vector-promotion-different-size.ll
index ff7a5319f2db..4a6efe8e9aea 100644
--- a/llvm/test/Transforms/SROA/vector-promotion-different-size.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion-different-size.ll
@@ -1,24 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=sroa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 define <4 x i1> @vector_bitcast() {
-  ; CHECK-LABEL: @vector_bitcast
-  ; CHECK: alloca <3 x i1>
+; CHECK-LABEL: @vector_bitcast(
+; CHECK-NEXT:    [[A:%.*]] = alloca <3 x i1>, align 1
+; CHECK-NEXT:    store <3 x i1> <i1 true, i1 false, i1 true>, <3 x i1>* [[A]], align 1
+; CHECK-NEXT:    [[A_0_CAST_SROA_CAST:%.*]] = bitcast <3 x i1>* [[A]] to <4 x i1>*
+; CHECK-NEXT:    [[A_0_VEC:%.*]] = load <4 x i1>, <4 x i1>* [[A_0_CAST_SROA_CAST]], align 1
+; CHECK-NEXT:    ret <4 x i1> [[A_0_VEC]]
+;
 
-    %a = alloca <3 x i1>
-    store <3 x i1> <i1 1,i1 0,i1 1>, <3 x i1>* %a
-    %cast = bitcast <3 x i1>* %a to <4 x i1>*
-    %vec = load <4 x i1>, <4 x i1>* %cast
-    ret <4 x i1> %vec
+  %a = alloca <3 x i1>
+  store <3 x i1> <i1 1,i1 0,i1 1>, <3 x i1>* %a
+  %cast = bitcast <3 x i1>* %a to <4 x i1>*
+  %vec = load <4 x i1>, <4 x i1>* %cast
+  ret <4 x i1> %vec
 }
 
 define void @vector_bitcast_2() {
-  ; CHECK-LABEL: @vector_bitcast_2
-  ; CHECK: alloca <32 x i16>
+; CHECK-LABEL: @vector_bitcast_2(
+; CHECK-NEXT:    %"sum$1.host2" = alloca <32 x i16>, align 64
+; CHECK-NEXT:    store <32 x i16> undef, <32 x i16>* %"sum$1.host2", align 64
+; CHECK-NEXT:    %"sum$1.host2.0.bc.sroa_cast" = bitcast <32 x i16>* %"sum$1.host2" to <64 x i16>*
+; CHECK-NEXT:    %"sum$1.host2.0.bcl" = load <64 x i16>, <64 x i16>* %"sum$1.host2.0.bc.sroa_cast", align 64
+; CHECK-NEXT:    ret void
+;
 
-    %"sum$1.host2" = alloca <32 x i16>
-    store <32 x i16> undef, <32 x i16>* %"sum$1.host2"
-    %bc = bitcast <32 x i16>* %"sum$1.host2" to <64 x i16>*
-    %bcl = load <64 x i16>, <64 x i16>* %bc
-    ret void
+  %"sum$1.host2" = alloca <32 x i16>
+  store <32 x i16> undef, <32 x i16>* %"sum$1.host2"
+  %bc = bitcast <32 x i16>* %"sum$1.host2" to <64 x i16>*
+  %bcl = load <64 x i16>, <64 x i16>* %bc
+  ret void
 }