diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll index 4303a924595a..e84d6120d69d 100644 --- a/llvm/test/Transforms/SROA/address-spaces.ll +++ b/llvm/test/Transforms/SROA/address-spaces.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=sroa -S | FileCheck %s target datalayout = "e-p:64:64:64-p1:16:16:16-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -10,9 +11,11 @@ declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace ; Make sure an illegal bitcast isn't introduced define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) { ; CHECK-LABEL: @test_address_space_1_1( -; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 -; CHECK: ret void +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64> addrspace(1)* [[A:%.*]], align 2 +; CHECK-NEXT: [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16 addrspace(1)* [[B:%.*]] to <2 x i64> addrspace(1)* +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64> addrspace(1)* [[AA_0_BPTR_SROA_CAST]], align 2 +; CHECK-NEXT: ret void +; %aa = alloca <2 x i64>, align 16 %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* %aaptr = bitcast <2 x i64>* %aa to i8* @@ -24,9 +27,11 @@ define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1) define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) { ; CHECK-LABEL: @test_address_space_1_0( -; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2 -; CHECK: ret void +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64> addrspace(1)* [[A:%.*]], align 2 +; CHECK-NEXT: [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16* [[B:%.*]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64>* [[AA_0_BPTR_SROA_CAST]], align 2 +; CHECK-NEXT: ret void +; %aa = alloca <2 x i64>, align 16 %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* %aaptr = bitcast <2 x i64>* %aa to i8* @@ -38,9 +43,11 @@ define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) { define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) { ; CHECK-LABEL: @test_address_space_0_1( -; CHECK: load <2 x i64>, <2 x i64>* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 -; CHECK: ret void +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64>* [[A:%.*]], align 2 +; CHECK-NEXT: [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16 addrspace(1)* [[B:%.*]] to <2 x i64> addrspace(1)* +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64> addrspace(1)* [[AA_0_BPTR_SROA_CAST]], align 2 +; CHECK-NEXT: ret void +; %aa = alloca <2 x i64>, align 16 %aptr = bitcast <2 x i64>* %a to i8* %aaptr = bitcast <2 x i64>* %aa to i8* @@ -55,7 +62,18 @@ define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) { ; Function Attrs: nounwind define void @copy_struct([5 x i64] %in.coerce) { ; CHECK-LABEL: @copy_struct( -; CHECK-NOT: memcpy +; CHECK-NEXT: for.end: +; CHECK-NEXT: [[IN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE:%.*]], 0 +; CHECK-NEXT: [[IN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 1 +; CHECK-NEXT: [[IN_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 2 +; CHECK-NEXT: [[IN_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [5 x i64] [[IN_COERCE]], 3 +; CHECK-NEXT: [[IN_SROA_2_4_EXTRACT_SHIFT:%.*]] = lshr i64 [[IN_COERCE_FCA_2_EXTRACT]], 32 +; CHECK-NEXT: [[IN_SROA_2_4_EXTRACT_TRUNC:%.*]] = trunc i64 [[IN_SROA_2_4_EXTRACT_SHIFT]] to i32 +; CHECK-NEXT: store i32 [[IN_SROA_2_4_EXTRACT_TRUNC]], i32 addrspace(1)* undef, align 4 +; CHECK-NEXT: store i64 [[IN_COERCE_FCA_3_EXTRACT]], i64 addrspace(1)* poison, align 4 +; CHECK-NEXT: store i32 undef, i32 addrspace(1)* poison, align 4 +; CHECK-NEXT: ret void +; for.end: %in = alloca %struct.struct_test_27.0.13, align 8 %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]* @@ -65,7 +83,7 @@ for.end: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 undef, i8* align 4 %scevgep910, i32 16, i1 false) ret void } - + %union.anon = type { i32* } @g = common global i32 0, align 4 @@ -75,8 +93,12 @@ for.end: ; illegal bitcast isn't introduced define void @pr27557() { ; CHECK-LABEL: @pr27557( -; CHECK: %[[CAST:.*]] = bitcast i32** {{.*}} to i32 addrspace(3)** -; CHECK: store i32 addrspace(3)* @l, i32 addrspace(3)** %[[CAST]] +; CHECK-NEXT: [[DOTSROA_0:%.*]] = alloca i32*, align 8 +; CHECK-NEXT: store i32* @g, i32** [[DOTSROA_0]], align 8 +; CHECK-NEXT: [[DOTSROA_0_0__SROA_CAST1:%.*]] = bitcast i32** [[DOTSROA_0]] to i32 addrspace(3)** +; CHECK-NEXT: store i32 addrspace(3)* @l, i32 addrspace(3)** [[DOTSROA_0_0__SROA_CAST1]], align 8 +; CHECK-NEXT: ret void +; %1 = alloca %union.anon, align 8 %2 = bitcast %union.anon* %1 to i32** store i32* @g, i32** %2, align 8 @@ -91,7 +113,8 @@ define void @pr27557() { ; should be promoted through the pair of `ptrtoint`/`inttoptr`. define i32* @pr27557.alt() { ; CHECK-LABEL: @pr27557.alt( -; CHECK: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(2)* @l2 to i64) to i32*) +; CHECK-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(2)* @l2 to i64) to i32*) +; %1 = alloca %union.anon, align 8 %2 = bitcast %union.anon* %1 to i32 addrspace(2)** store i32 addrspace(2)* @l2, i32 addrspace(2)** %2, align 8 @@ -102,30 +125,52 @@ define i32* @pr27557.alt() { ; Make sure pre-splitting doesn't try to introduce an illegal bitcast define float @presplit(i64 addrspace(1)* %p) { -entry: ; CHECK-LABEL: @presplit( -; CHECK: %[[CAST:.*]] = bitcast i64 addrspace(1)* {{.*}} to i32 addrspace(1)* -; CHECK: load i32, i32 addrspace(1)* %[[CAST]] - %b = alloca i64 - %b.cast = bitcast i64* %b to [2 x float]* - %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0 - %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1 - %l = load i64, i64 addrspace(1)* %p - store i64 %l, i64* %b - %f1 = load float, float* %b.gep1 - %f2 = load float, float* %b.gep2 - %ret = fadd float %f1, %f2 - ret float %ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P_SROA_CAST:%.*]] = bitcast i64 addrspace(1)* [[P:%.*]] to i32 addrspace(1)* +; CHECK-NEXT: [[L1:%.*]] = load i32, i32 addrspace(1)* [[P_SROA_CAST]], align 4 +; CHECK-NEXT: [[P_SROA_RAW_CAST:%.*]] = bitcast i64 addrspace(1)* [[P]] to i8 addrspace(1)* +; CHECK-NEXT: [[P_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[P_SROA_RAW_CAST]], i16 4 +; CHECK-NEXT: [[P_SROA_CAST2:%.*]] = bitcast i8 addrspace(1)* [[P_SROA_RAW_IDX]] to i32 addrspace(1)* +; CHECK-NEXT: [[L3:%.*]] = load i32, i32 addrspace(1)* [[P_SROA_CAST2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[L1]] to float +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[L3]] to float +; CHECK-NEXT: [[RET:%.*]] = fadd float [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret float [[RET]] +; +entry: + %b = alloca i64 + %b.cast = bitcast i64* %b to [2 x float]* + %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0 + %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1 + %l = load i64, i64 addrspace(1)* %p + store i64 %l, i64* %b + %f1 = load float, float* %b.gep1 + %f2 = load float, float* %b.gep2 + %ret = fadd float %f1, %f2 + ret float %ret } ; Test load from and store to non-zero address space. define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) { -; CHECK-LABEL: @test_load_store_diff_addr_space -; CHECK-NOT: alloca -; CHECK: load i32, i32 addrspace(1)* -; CHECK: load i32, i32 addrspace(1)* -; CHECK: store i32 %{{.*}}, i32 addrspace(1)* -; CHECK: store i32 %{{.*}}, i32 addrspace(1)* +; CHECK-LABEL: @test_load_store_diff_addr_space( +; CHECK-NEXT: [[P1_SROA_CAST:%.*]] = bitcast [2 x float] addrspace(1)* [[COMPLEX1:%.*]] to i32 addrspace(1)* +; CHECK-NEXT: [[V15:%.*]] = load i32, i32 addrspace(1)* [[P1_SROA_CAST]], align 4 +; CHECK-NEXT: [[P1_SROA_IDX:%.*]] = getelementptr inbounds [2 x float], [2 x float] addrspace(1)* [[COMPLEX1]], i16 0, i16 1 +; CHECK-NEXT: [[P1_SROA_CAST7:%.*]] = bitcast float addrspace(1)* [[P1_SROA_IDX]] to i32 addrspace(1)* +; CHECK-NEXT: [[V18:%.*]] = load i32, i32 addrspace(1)* [[P1_SROA_CAST7]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[V15]] to float +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[V18]] to float +; CHECK-NEXT: [[SUM:%.*]] = fadd float [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[SUM]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[SUM]] to i32 +; CHECK-NEXT: [[P2_SROA_CAST:%.*]] = bitcast [2 x float] addrspace(1)* [[COMPLEX2:%.*]] to i32 addrspace(1)* +; CHECK-NEXT: store i32 [[TMP3]], i32 addrspace(1)* [[P2_SROA_CAST]], align 4 +; CHECK-NEXT: [[P2_SROA_IDX:%.*]] = getelementptr inbounds [2 x float], [2 x float] addrspace(1)* [[COMPLEX2]], i16 0, i16 1 +; CHECK-NEXT: [[P2_SROA_CAST4:%.*]] = bitcast float addrspace(1)* [[P2_SROA_IDX]] to i32 addrspace(1)* +; CHECK-NEXT: store i32 [[TMP4]], i32 addrspace(1)* [[P2_SROA_CAST4]], align 4 +; CHECK-NEXT: ret void +; %a = alloca i64 %a.cast = bitcast i64* %a to [2 x float]* %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0 diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll index 0fe9c849e964..de794852ac5f 100644 --- a/llvm/test/Transforms/SROA/alignment.ll +++ b/llvm/test/Transforms/SROA/alignment.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=sroa -S | FileCheck %s ; RUN: opt -passes='debugify,function(sroa)' -S < %s | FileCheck %s -check-prefix DEBUGLOC @@ -7,15 +8,33 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) { ; CHECK-LABEL: @test1( -; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 0 -; CHECK: %[[a0:.*]] = load i8, i8* %[[gep_a0]], align 16 -; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 1 -; CHECK: %[[a1:.*]] = load i8, i8* %[[gep_a1]], align 1 -; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 0 -; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16 -; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 1 -; CHECK: store i8 %[[a1]], i8* %[[gep_b1]], align 1 -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA_SROA_0_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[A:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[ALLOCA_SROA_0_0_COPYLOAD:%.*]] = load i8, i8* [[ALLOCA_SROA_0_0_GEP_A_SROA_IDX]], align 16 +; CHECK-NEXT: [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[A]], i64 0, i32 1 +; CHECK-NEXT: [[ALLOCA_SROA_3_0_COPYLOAD:%.*]] = load i8, i8* [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX]], align 1 +; CHECK-NEXT: [[ALLOCA_SROA_0_0_GEP_B_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[B:%.*]], i64 0, i32 0 +; CHECK-NEXT: store i8 [[ALLOCA_SROA_0_0_COPYLOAD]], i8* [[ALLOCA_SROA_0_0_GEP_B_SROA_IDX]], align 16 +; CHECK-NEXT: [[ALLOCA_SROA_3_0_GEP_B_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[B]], i64 0, i32 1 +; CHECK-NEXT: store i8 [[ALLOCA_SROA_3_0_COPYLOAD]], i8* [[ALLOCA_SROA_3_0_GEP_B_SROA_IDX]], align 1 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test1( +; DEBUGLOC-NEXT: entry: +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata { i8, i8 }* undef, metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]] +; DEBUGLOC-NEXT: [[ALLOCA_SROA_0_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[A:%.*]], i64 0, i32 0, !dbg [[DBG18:![0-9]+]] +; DEBUGLOC-NEXT: [[ALLOCA_SROA_0_0_COPYLOAD:%.*]] = load i8, i8* [[ALLOCA_SROA_0_0_GEP_A_SROA_IDX]], align 16, !dbg [[DBG18]] +; DEBUGLOC-NEXT: [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[A]], i64 0, i32 1, !dbg [[DBG18]] +; DEBUGLOC-NEXT: [[ALLOCA_SROA_3_0_COPYLOAD:%.*]] = load i8, i8* [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX]], align 1, !dbg [[DBG18]] +; DEBUGLOC-NEXT: [[ALLOCA_SROA_0_0_GEP_B_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[B:%.*]], i64 0, i32 0, !dbg [[DBG19:![0-9]+]] +; DEBUGLOC-NEXT: store i8 [[ALLOCA_SROA_0_0_COPYLOAD]], i8* [[ALLOCA_SROA_0_0_GEP_B_SROA_IDX]], align 16, !dbg [[DBG19]] +; DEBUGLOC-NEXT: [[ALLOCA_SROA_3_0_GEP_B_SROA_IDX:%.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* [[B]], i64 0, i32 1, !dbg [[DBG19]] +; DEBUGLOC-NEXT: store i8 [[ALLOCA_SROA_3_0_COPYLOAD]], i8* [[ALLOCA_SROA_3_0_GEP_B_SROA_IDX]], align 1, !dbg [[DBG19]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG20:![0-9]+]] +; entry: %alloca = alloca { i8, i8 }, align 16 @@ -32,18 +51,37 @@ entry: define void @test2() { ; CHECK-LABEL: @test2( -; CHECK: alloca i16 -; CHECK: load i8, i8* %{{.*}} -; CHECK: store i8 42, i8* %{{.*}} -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i16, align 2 +; CHECK-NEXT: store volatile i16 0, i16* [[A_SROA_0]], align 2 +; CHECK-NEXT: [[A_SROA_0_1_GEP2_SROA_RAW_CAST:%.*]] = bitcast i16* [[A_SROA_0]] to i8* +; CHECK-NEXT: [[A_SROA_0_1_GEP2_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_CAST]], i64 1 +; CHECK-NEXT: [[A_SROA_0_1_A_SROA_0_2_RESULT:%.*]] = load i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_0_1_GEP2_SROA_RAW_CAST3:%.*]] = bitcast i16* [[A_SROA_0]] to i8* +; CHECK-NEXT: [[A_SROA_0_1_GEP2_SROA_RAW_IDX4:%.*]] = getelementptr inbounds i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_CAST3]], i64 1 +; CHECK-NEXT: store i8 42, i8* [[A_SROA_0_1_GEP2_SROA_RAW_IDX4]], align 1 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test2( +; DEBUGLOC-NEXT: entry: +; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca i16, align 2, !dbg [[DBG29:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata { i8, i8, i8, i8 }* undef, metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i16* undef, metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] +; DEBUGLOC-NEXT: store volatile i16 0, i16* [[A_SROA_0]], align 2, !dbg [[DBG32:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG33:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_1_GEP2_SROA_RAW_CAST:%.*]] = bitcast i16* [[A_SROA_0]] to i8*, !dbg [[DBG34:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_1_GEP2_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_CAST]], i64 1, !dbg [[DBG34]] +; DEBUGLOC-NEXT: [[A_SROA_0_1_A_SROA_0_2_RESULT:%.*]] = load i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_IDX]], align 1, !dbg [[DBG34]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8 [[A_SROA_0_1_A_SROA_0_2_RESULT]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34]] +; DEBUGLOC-NEXT: [[A_SROA_0_1_GEP2_SROA_RAW_CAST3:%.*]] = bitcast i16* [[A_SROA_0]] to i8*, !dbg [[DBG35:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_1_GEP2_SROA_RAW_IDX4:%.*]] = getelementptr inbounds i8, i8* [[A_SROA_0_1_GEP2_SROA_RAW_CAST3]], i64 1, !dbg [[DBG35]] +; DEBUGLOC-NEXT: store i8 42, i8* [[A_SROA_0_1_GEP2_SROA_RAW_IDX4]], align 1, !dbg [[DBG35]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG36:![0-9]+]] +; ; Check that when sroa rewrites the alloca partition ; it preserves the original DebugLocation. -; DEBUGLOC-LABEL: @test2( -; DEBUGLOC: {{.*}} = alloca {{.*}} !dbg ![[DbgLoc:[0-9]+]] -; DEBUGLOC-LABEL: } -; -; DEBUGLOC: ![[DbgLoc]] = !DILocation(line: 9, entry: %a = alloca { i8, i8, i8, i8 }, align 2 ; "line 9" to -debugify @@ -59,9 +97,23 @@ entry: define void @PR13920(<2 x i64>* %a, i16* %b) { ; Test that alignments on memcpy intrinsics get propagated to loads and stores. ; CHECK-LABEL: @PR13920( -; CHECK: load <2 x i64>, <2 x i64>* %a, align 2 -; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2 -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64>* [[A:%.*]], align 2 +; CHECK-NEXT: [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16* [[B:%.*]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64>* [[AA_0_BPTR_SROA_CAST]], align 2 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @PR13920( +; DEBUGLOC-NEXT: entry: +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata <2 x i64>* undef, metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG44:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]] +; DEBUGLOC-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, <2 x i64>* [[A:%.*]], align 2, !dbg [[DBG46:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] +; DEBUGLOC-NEXT: [[AA_0_BPTR_SROA_CAST:%.*]] = bitcast i16* [[B:%.*]] to <2 x i64>*, !dbg [[DBG48:![0-9]+]] +; DEBUGLOC-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], <2 x i64>* [[AA_0_BPTR_SROA_CAST]], align 2, !dbg [[DBG48]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG49:![0-9]+]] +; entry: %aa = alloca <2 x i64>, align 16 @@ -79,9 +131,30 @@ define void @test3(i8* %x) { ; expecting. However, also check that any offset within an alloca can in turn ; reduce the alignment. ; CHECK-LABEL: @test3( -; CHECK: alloca [22 x i8], align 8 -; CHECK: alloca [18 x i8], align 2 -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8 +; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2 +; CHECK-NEXT: [[A_SROA_0_0_A_RAW_SROA_IDX:%.*]] = getelementptr inbounds [22 x i8], [22 x i8]* [[A_SROA_0]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[A_SROA_0_0_A_RAW_SROA_IDX]], i8* align 8 [[X:%.*]], i32 22, i1 false) +; CHECK-NEXT: [[B_SROA_0_6_B_GEP_SROA_IDX:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* [[B_SROA_0]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[B_SROA_0_6_B_GEP_SROA_IDX]], i8* align 2 [[X]], i32 18, i1 false) +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test3( +; DEBUGLOC-NEXT: entry: +; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8, !dbg [[DBG57:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata { i8*, i8*, i8* }* undef, metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG57]] +; DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2, !dbg [[DBG58:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata { i8*, i8*, i8* }* undef, metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG59:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_A_RAW_SROA_IDX:%.*]] = getelementptr inbounds [22 x i8], [22 x i8]* [[A_SROA_0]], i64 0, i64 0, !dbg [[DBG60:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[A_SROA_0_0_A_RAW_SROA_IDX]], i8* align 8 [[X:%.*]], i32 22, i1 false), !dbg [[DBG60]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META55:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]] +; DEBUGLOC-NEXT: [[B_SROA_0_6_B_GEP_SROA_IDX:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* [[B_SROA_0]], i64 0, i64 0, !dbg [[DBG63:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[B_SROA_0_6_B_GEP_SROA_IDX]], i8* align 2 [[X]], i32 18, i1 false), !dbg [[DBG63]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG64:![0-9]+]] +; entry: %a = alloca { i8*, i8*, i8* } @@ -100,14 +173,53 @@ define void @test5() { ; split or promoted out of existence. ; ; CHECK-LABEL: @test5( -; CHECK: alloca [9 x i8] -; CHECK: alloca [9 x i8] -; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1 -; CHECK: load volatile i16, i16* %{{.*}}, align 1 -; CHECK: load double, double* %{{.*}}, align 1 -; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1 -; CHECK: load volatile i16, i16* %{{.*}}, align 1 -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [9 x i8], align 1 +; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [9 x i8], align 1 +; CHECK-NEXT: [[A_SROA_0_0_PTR1_SROA_CAST2:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to double* +; CHECK-NEXT: store volatile double 0.000000e+00, double* [[A_SROA_0_0_PTR1_SROA_CAST2]], align 1 +; CHECK-NEXT: [[A_SROA_0_7_WEIRD_CAST1_SROA_IDX4:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* [[A_SROA_0]], i64 0, i64 7 +; CHECK-NEXT: [[A_SROA_0_7_WEIRD_CAST1_SROA_CAST5:%.*]] = bitcast i8* [[A_SROA_0_7_WEIRD_CAST1_SROA_IDX4]] to i16* +; CHECK-NEXT: [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1:%.*]] = load volatile i16, i16* [[A_SROA_0_7_WEIRD_CAST1_SROA_CAST5]], align 1 +; CHECK-NEXT: [[A_SROA_0_0_PTR1_SROA_CAST3:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to double* +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_D1:%.*]] = load double, double* [[A_SROA_0_0_PTR1_SROA_CAST3]], align 1 +; CHECK-NEXT: [[A_SROA_3_0_PTR2_SROA_CAST:%.*]] = bitcast [9 x i8]* [[A_SROA_3]] to double* +; CHECK-NEXT: store volatile double [[A_SROA_0_0_A_SROA_0_0_D1]], double* [[A_SROA_3_0_PTR2_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_3_7_WEIRD_CAST2_SROA_IDX:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* [[A_SROA_3]], i64 0, i64 7 +; CHECK-NEXT: [[A_SROA_3_7_WEIRD_CAST2_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_3_7_WEIRD_CAST2_SROA_IDX]] to i16* +; CHECK-NEXT: [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2:%.*]] = load volatile i16, i16* [[A_SROA_3_7_WEIRD_CAST2_SROA_CAST]], align 1 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test5( +; DEBUGLOC-NEXT: entry: +; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [9 x i8], align 1, !dbg [[DBG80:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_3:%.*]] = alloca [9 x i8], align 1, !dbg [[DBG80]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [18 x i8]* undef, metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double* undef, metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_PTR1_SROA_CAST2:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to double*, !dbg [[DBG83:![0-9]+]] +; DEBUGLOC-NEXT: store volatile double 0.000000e+00, double* [[A_SROA_0_0_PTR1_SROA_CAST2]], align 1, !dbg [[DBG83]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i16* undef, metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_7_WEIRD_CAST1_SROA_IDX4:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* [[A_SROA_0]], i64 0, i64 7, !dbg [[DBG86:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_7_WEIRD_CAST1_SROA_CAST5:%.*]] = bitcast i8* [[A_SROA_0_7_WEIRD_CAST1_SROA_IDX4]] to i16*, !dbg [[DBG86]] +; DEBUGLOC-NEXT: [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1:%.*]] = load volatile i16, i16* [[A_SROA_0_7_WEIRD_CAST1_SROA_CAST5]], align 1, !dbg [[DBG86]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i16 [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG86]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG87:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double* undef, metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG88:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_PTR1_SROA_CAST3:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to double*, !dbg [[DBG89:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_A_SROA_0_0_D1:%.*]] = load double, double* [[A_SROA_0_0_PTR1_SROA_CAST3]], align 1, !dbg [[DBG89]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double [[A_SROA_0_0_A_SROA_0_0_D1]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG89]] +; DEBUGLOC-NEXT: [[A_SROA_3_0_PTR2_SROA_CAST:%.*]] = bitcast [9 x i8]* [[A_SROA_3]] to double*, !dbg [[DBG90:![0-9]+]] +; DEBUGLOC-NEXT: store volatile double [[A_SROA_0_0_A_SROA_0_0_D1]], double* [[A_SROA_3_0_PTR2_SROA_CAST]], align 1, !dbg [[DBG90]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG91:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i16* undef, metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_3_7_WEIRD_CAST2_SROA_IDX:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* [[A_SROA_3]], i64 0, i64 7, !dbg [[DBG93:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_3_7_WEIRD_CAST2_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_3_7_WEIRD_CAST2_SROA_IDX]] to i16*, !dbg [[DBG93]] +; DEBUGLOC-NEXT: [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2:%.*]] = load volatile i16, i16* [[A_SROA_3_7_WEIRD_CAST2_SROA_CAST]], align 1, !dbg [[DBG93]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i16 [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG94:![0-9]+]] +; entry: %a = alloca [18 x i8] @@ -133,13 +245,29 @@ define void @test6() { ; We should set the alignment on all load and store operations; make sure ; we choose an appropriate alignment. ; CHECK-LABEL: @test6( -; CHECK: alloca double, align 8{{$}} -; CHECK: alloca double, align 8{{$}} -; CHECK: store{{.*}}, align 8 -; CHECK: load{{.*}}, align 8 -; CHECK: store{{.*}}, align 8 -; CHECK-NOT: align -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[A_SROA_2:%.*]] = alloca double, align 8 +; CHECK-NEXT: store volatile double 0.000000e+00, double* [[A_SROA_0]], align 8 +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_VAL:%.*]] = load double, double* [[A_SROA_0]], align 8 +; CHECK-NEXT: store volatile double [[A_SROA_0_0_A_SROA_0_0_VAL]], double* [[A_SROA_2]], align 8 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test6( +; DEBUGLOC-NEXT: entry: +; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca double, align 8, !dbg [[DBG103:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_2:%.*]] = alloca double, align 8, !dbg [[DBG103]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [16 x i8]* undef, metadata [[META97:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double* undef, metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105:![0-9]+]] +; DEBUGLOC-NEXT: store volatile double 0.000000e+00, double* [[A_SROA_0]], align 8, !dbg [[DBG106:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double* undef, metadata [[META101:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_A_SROA_0_0_VAL:%.*]] = load double, double* [[A_SROA_0]], align 8, !dbg [[DBG109:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double [[A_SROA_0_0_A_SROA_0_0_VAL]], metadata [[META102:![0-9]+]], metadata !DIExpression()), !dbg [[DBG109]] +; DEBUGLOC-NEXT: store volatile double [[A_SROA_0_0_A_SROA_0_0_VAL]], double* [[A_SROA_2]], align 8, !dbg [[DBG110:![0-9]+]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG111:![0-9]+]] +; entry: %a = alloca [16 x i8] @@ -159,7 +287,40 @@ define void @test7(i8* %out) { ; Test that we properly compute the destination alignment when rewriting ; memcpys as direct loads or stores. ; CHECK-LABEL: @test7( -; CHECK-NOT: alloca +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_0_OUT_SROA_CAST:%.*]] = bitcast i8* [[OUT:%.*]] to double* +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load double, double* [[A_SROA_0_0_OUT_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_4_0_OUT_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[OUT]], i64 8 +; CHECK-NEXT: [[A_SROA_4_0_OUT_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_4_0_OUT_SROA_IDX]] to double* +; CHECK-NEXT: [[A_SROA_4_0_COPYLOAD:%.*]] = load double, double* [[A_SROA_4_0_OUT_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_0_0_OUT_SROA_CAST1:%.*]] = bitcast i8* [[OUT]] to double* +; CHECK-NEXT: store double [[A_SROA_4_0_COPYLOAD]], double* [[A_SROA_0_0_OUT_SROA_CAST1]], align 1 +; CHECK-NEXT: [[A_SROA_4_0_OUT_SROA_IDX3:%.*]] = getelementptr inbounds i8, i8* [[OUT]], i64 8 +; CHECK-NEXT: [[A_SROA_4_0_OUT_SROA_CAST4:%.*]] = bitcast i8* [[A_SROA_4_0_OUT_SROA_IDX3]] to double* +; CHECK-NEXT: store double [[A_SROA_0_0_COPYLOAD]], double* [[A_SROA_4_0_OUT_SROA_CAST4]], align 1 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test7( +; DEBUGLOC-NEXT: entry: +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [16 x i8]* undef, metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double* undef, metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG123:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* undef, metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double* undef, metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_OUT_SROA_CAST:%.*]] = bitcast i8* [[OUT:%.*]] to double*, !dbg [[DBG126:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load double, double* [[A_SROA_0_0_OUT_SROA_CAST]], align 1, !dbg [[DBG126]] +; DEBUGLOC-NEXT: [[A_SROA_4_0_OUT_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[OUT]], i64 8, !dbg [[DBG126]] +; DEBUGLOC-NEXT: [[A_SROA_4_0_OUT_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_4_0_OUT_SROA_IDX]] to double*, !dbg [[DBG126]] +; DEBUGLOC-NEXT: [[A_SROA_4_0_COPYLOAD:%.*]] = load double, double* [[A_SROA_4_0_OUT_SROA_CAST]], align 1, !dbg [[DBG126]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double [[A_SROA_4_0_COPYLOAD]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double [[A_SROA_0_0_COPYLOAD]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_OUT_SROA_CAST1:%.*]] = bitcast i8* [[OUT]] to double*, !dbg [[DBG129:![0-9]+]] +; DEBUGLOC-NEXT: store double [[A_SROA_4_0_COPYLOAD]], double* [[A_SROA_0_0_OUT_SROA_CAST1]], align 1, !dbg [[DBG129]] +; DEBUGLOC-NEXT: [[A_SROA_4_0_OUT_SROA_IDX3:%.*]] = getelementptr inbounds i8, i8* [[OUT]], i64 8, !dbg [[DBG129]] +; DEBUGLOC-NEXT: [[A_SROA_4_0_OUT_SROA_CAST4:%.*]] = bitcast i8* [[A_SROA_4_0_OUT_SROA_IDX3]] to double*, !dbg [[DBG129]] +; DEBUGLOC-NEXT: store double [[A_SROA_0_0_COPYLOAD]], double* [[A_SROA_4_0_OUT_SROA_CAST4]], align 1, !dbg [[DBG129]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG130:![0-9]+]] +; entry: %a = alloca [16 x i8] @@ -169,8 +330,6 @@ entry: %ptr2 = bitcast i8* %raw2 to double* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i1 false) -; CHECK: %[[val2:.*]] = load double, double* %{{.*}}, align 1 -; CHECK: %[[val1:.*]] = load double, double* %{{.*}}, align 1 %val1 = load double, double* %ptr2, align 1 %val2 = load double, double* %ptr1, align 1 @@ -179,20 +338,56 @@ entry: store double %val2, double* %ptr2, align 1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i1 false) -; CHECK: store double %[[val1]], double* %{{.*}}, align 1 -; CHECK: store double %[[val2]], double* %{{.*}}, align 1 ret void -; CHECK: ret void } define void @test8() { ; CHECK-LABEL: @test8( -; CHECK: load i32, {{.*}}, align 1 -; CHECK: load i32, {{.*}}, align 1 -; CHECK: load i32, {{.*}}, align 1 -; CHECK: load i32, {{.*}}, align 1 -; CHECK: load i32, {{.*}}, align 1 +; CHECK-NEXT: [[PTR:%.*]] = alloca [5 x i32], align 1 +; CHECK-NEXT: [[PTR_8:%.*]] = bitcast [5 x i32]* [[PTR]] to i8* +; CHECK-NEXT: call void @populate(i8* [[PTR_8]]) +; CHECK-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 1 +; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [5 x i32] poison, i32 [[VAL_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 1 +; CHECK-NEXT: [[VAL_FCA_1_LOAD:%.*]] = load i32, i32* [[VAL_FCA_1_GEP]], align 1 +; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_0_INSERT]], i32 [[VAL_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 2 +; CHECK-NEXT: [[VAL_FCA_2_LOAD:%.*]] = load i32, i32* [[VAL_FCA_2_GEP]], align 1 +; CHECK-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_1_INSERT]], i32 [[VAL_FCA_2_LOAD]], 2 +; CHECK-NEXT: [[VAL_FCA_3_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 3 +; CHECK-NEXT: [[VAL_FCA_3_LOAD:%.*]] = load i32, i32* [[VAL_FCA_3_GEP]], align 1 +; CHECK-NEXT: [[VAL_FCA_3_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_2_INSERT]], i32 [[VAL_FCA_3_LOAD]], 3 +; CHECK-NEXT: [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 4 +; CHECK-NEXT: [[VAL_FCA_4_LOAD:%.*]] = load i32, i32* [[VAL_FCA_4_GEP]], align 1 +; CHECK-NEXT: [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test8( +; DEBUGLOC-NEXT: [[PTR:%.*]] = alloca [5 x i32], align 1, !dbg [[DBG137:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [5 x i32]* [[PTR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137]] +; DEBUGLOC-NEXT: [[PTR_8:%.*]] = bitcast [5 x i32]* [[PTR]] to i8*, !dbg [[DBG138:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* [[PTR_8]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]] +; DEBUGLOC-NEXT: call void @populate(i8* [[PTR_8]]), !dbg [[DBG139:![0-9]+]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 0, !dbg [[DBG140:![0-9]+]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 1, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [5 x i32] poison, i32 [[VAL_FCA_0_LOAD]], 0, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 1, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_LOAD:%.*]] = load i32, i32* [[VAL_FCA_1_GEP]], align 1, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_0_INSERT]], i32 [[VAL_FCA_1_LOAD]], 1, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 2, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_LOAD:%.*]] = load i32, i32* [[VAL_FCA_2_GEP]], align 1, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_1_INSERT]], i32 [[VAL_FCA_2_LOAD]], 2, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 3, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_LOAD:%.*]] = load i32, i32* [[VAL_FCA_3_GEP]], align 1, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_2_INSERT]], i32 [[VAL_FCA_3_LOAD]], 3, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 4, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_4_LOAD:%.*]] = load i32, i32* [[VAL_FCA_4_GEP]], align 1, !dbg [[DBG140]] +; DEBUGLOC-NEXT: [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4, !dbg [[DBG140]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [5 x i32] [[VAL_FCA_4_INSERT]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG140]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG141:![0-9]+]] +; %ptr = alloca [5 x i32], align 1 %ptr.8 = bitcast [5 x i32]* %ptr to i8* @@ -203,11 +398,50 @@ define void @test8() { define void @test9() { ; CHECK-LABEL: @test9( -; CHECK: load i32, {{.*}}, align 8 -; CHECK: load i32, {{.*}}, align 4 -; CHECK: load i32, {{.*}}, align 8 -; CHECK: load i32, {{.*}}, align 4 -; CHECK: load i32, {{.*}}, align 8 +; CHECK-NEXT: [[PTR:%.*]] = alloca [5 x i32], align 8 +; CHECK-NEXT: [[PTR_8:%.*]] = bitcast [5 x i32]* [[PTR]] to i8* +; CHECK-NEXT: call void @populate(i8* [[PTR_8]]) +; CHECK-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 8 +; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [5 x i32] poison, i32 [[VAL_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 1 +; CHECK-NEXT: [[VAL_FCA_1_LOAD:%.*]] = load i32, i32* [[VAL_FCA_1_GEP]], align 4 +; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_0_INSERT]], i32 [[VAL_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 2 +; CHECK-NEXT: [[VAL_FCA_2_LOAD:%.*]] = load i32, i32* [[VAL_FCA_2_GEP]], align 8 +; CHECK-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_1_INSERT]], i32 [[VAL_FCA_2_LOAD]], 2 +; CHECK-NEXT: [[VAL_FCA_3_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 3 +; CHECK-NEXT: [[VAL_FCA_3_LOAD:%.*]] = load i32, i32* [[VAL_FCA_3_GEP]], align 4 +; CHECK-NEXT: [[VAL_FCA_3_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_2_INSERT]], i32 [[VAL_FCA_3_LOAD]], 3 +; CHECK-NEXT: [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 4 +; CHECK-NEXT: [[VAL_FCA_4_LOAD:%.*]] = load i32, i32* [[VAL_FCA_4_GEP]], align 8 +; CHECK-NEXT: [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test9( +; DEBUGLOC-NEXT: [[PTR:%.*]] = alloca [5 x i32], align 8, !dbg [[DBG147:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [5 x i32]* [[PTR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +; DEBUGLOC-NEXT: [[PTR_8:%.*]] = bitcast [5 x i32]* [[PTR]] to i8*, !dbg [[DBG148:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* [[PTR_8]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148]] +; DEBUGLOC-NEXT: call void @populate(i8* [[PTR_8]]), !dbg [[DBG149:![0-9]+]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 0, !dbg [[DBG150:![0-9]+]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 8, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [5 x i32] poison, i32 [[VAL_FCA_0_LOAD]], 0, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 1, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_LOAD:%.*]] = load i32, i32* [[VAL_FCA_1_GEP]], align 4, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_0_INSERT]], i32 [[VAL_FCA_1_LOAD]], 1, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 2, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_LOAD:%.*]] = load i32, i32* [[VAL_FCA_2_GEP]], align 8, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_1_INSERT]], i32 [[VAL_FCA_2_LOAD]], 2, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 3, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_LOAD:%.*]] = load i32, i32* [[VAL_FCA_3_GEP]], align 4, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_2_INSERT]], i32 [[VAL_FCA_3_LOAD]], 3, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* [[PTR]], i32 0, i32 4, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_4_LOAD:%.*]] = load i32, i32* [[VAL_FCA_4_GEP]], align 8, !dbg [[DBG150]] +; DEBUGLOC-NEXT: [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4, !dbg [[DBG150]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [5 x i32] [[VAL_FCA_4_INSERT]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG151:![0-9]+]] +; %ptr = alloca [5 x i32], align 8 %ptr.8 = bitcast [5 x i32]* %ptr to i8* @@ -218,11 +452,50 @@ define void @test9() { define void @test10() { ; CHECK-LABEL: @test10( -; CHECK: load i32, {{.*}}, align 2 -; CHECK: load i8, {{.*}}, align 2 -; CHECK: load i8, {{.*}}, align 1 -; CHECK: load i8, {{.*}}, align 2 -; CHECK: load i16, {{.*}}, align 2 +; CHECK-NEXT: [[PTR:%.*]] = alloca { i32, i8, i8, { i8, i16 } }, align 2 +; CHECK-NEXT: [[PTR_8:%.*]] = bitcast { i32, i8, i8, { i8, i16 } }* [[PTR]] to i8* +; CHECK-NEXT: call void @populate(i8* [[PTR_8]]) +; CHECK-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 2 +; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } poison, i32 [[VAL_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 1 +; CHECK-NEXT: [[VAL_FCA_1_LOAD:%.*]] = load i8, i8* [[VAL_FCA_1_GEP]], align 2 +; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_0_INSERT]], i8 [[VAL_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 2 +; CHECK-NEXT: [[VAL_FCA_2_LOAD:%.*]] = load i8, i8* [[VAL_FCA_2_GEP]], align 1 +; CHECK-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_1_INSERT]], i8 [[VAL_FCA_2_LOAD]], 2 +; CHECK-NEXT: [[VAL_FCA_3_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 3, i32 0 +; CHECK-NEXT: [[VAL_FCA_3_0_LOAD:%.*]] = load i8, i8* [[VAL_FCA_3_0_GEP]], align 2 +; CHECK-NEXT: [[VAL_FCA_3_0_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_2_INSERT]], i8 [[VAL_FCA_3_0_LOAD]], 3, 0 +; CHECK-NEXT: [[VAL_FCA_3_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 3, i32 1 +; CHECK-NEXT: [[VAL_FCA_3_1_LOAD:%.*]] = load i16, i16* [[VAL_FCA_3_1_GEP]], align 2 +; CHECK-NEXT: [[VAL_FCA_3_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_0_INSERT]], i16 [[VAL_FCA_3_1_LOAD]], 3, 1 +; CHECK-NEXT: ret void +; +; DEBUGLOC-LABEL: @test10( +; DEBUGLOC-NEXT: [[PTR:%.*]] = alloca { i32, i8, i8, { i8, i16 } }, align 2, !dbg [[DBG158:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata { i32, i8, i8, { i8, i16 } }* [[PTR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158]] +; DEBUGLOC-NEXT: [[PTR_8:%.*]] = bitcast { i32, i8, i8, { i8, i16 } }* [[PTR]] to i8*, !dbg [[DBG159:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8* [[PTR_8]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159]] +; DEBUGLOC-NEXT: call void @populate(i8* [[PTR_8]]), !dbg [[DBG160:![0-9]+]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 0, !dbg [[DBG161:![0-9]+]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, i32* [[VAL_FCA_0_GEP]], align 2, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } poison, i32 [[VAL_FCA_0_LOAD]], 0, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 1, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_LOAD:%.*]] = load i8, i8* [[VAL_FCA_1_GEP]], align 2, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_0_INSERT]], i8 [[VAL_FCA_1_LOAD]], 1, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 2, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_LOAD:%.*]] = load i8, i8* [[VAL_FCA_2_GEP]], align 1, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_1_INSERT]], i8 [[VAL_FCA_2_LOAD]], 2, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 3, i32 0, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_0_LOAD:%.*]] = load i8, i8* [[VAL_FCA_3_0_GEP]], align 2, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_0_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_2_INSERT]], i8 [[VAL_FCA_3_0_LOAD]], 3, 0, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, { i32, i8, i8, { i8, i16 } }* [[PTR]], i32 0, i32 3, i32 1, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_1_LOAD:%.*]] = load i16, i16* [[VAL_FCA_3_1_GEP]], align 2, !dbg [[DBG161]] +; DEBUGLOC-NEXT: [[VAL_FCA_3_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_0_INSERT]], i16 [[VAL_FCA_3_1_LOAD]], 3, 1, !dbg [[DBG161]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata { i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_1_INSERT]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG161]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG162:![0-9]+]] +; %ptr = alloca {i32, i8, i8, {i8, i16}}, align 2 %ptr.8 = bitcast {i32, i8, i8, {i8, i16}}* %ptr to i8* @@ -233,8 +506,28 @@ define void @test10() { %struct = type { i32, i32 } define dso_local i32 @pr45010(%struct* %A) { -; CHECK-LABEL: @pr45010 -; CHECK: load atomic volatile i32, {{.*}}, align 4 +; CHECK-LABEL: @pr45010( +; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT:%.*]], %struct* [[A:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_I]], align 4 +; CHECK-NEXT: store atomic volatile i32 [[TMP1]], i32* [[B_SROA_0]] release, align 4 +; CHECK-NEXT: [[B_SROA_0_0_B_SROA_0_0_X:%.*]] = load atomic volatile i32, i32* [[B_SROA_0]] acquire, align 4 +; CHECK-NEXT: ret i32 [[B_SROA_0_0_B_SROA_0_0_X]] +; +; DEBUGLOC-LABEL: @pr45010( +; DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca i32, align 4, !dbg [[DBG172:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata %struct* undef, metadata [[META165:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] +; DEBUGLOC-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT:%.*]], %struct* [[A:%.*]], i32 0, i32 0, !dbg [[DBG173:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i32* [[A_I]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i32* undef, metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_I]], align 4, !dbg [[DBG175:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META168:![0-9]+]], metadata !DIExpression()), !dbg [[DBG175]] +; DEBUGLOC-NEXT: store atomic volatile i32 [[TMP1]], i32* [[B_SROA_0]] release, align 4, !dbg [[DBG176:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i32* undef, metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] +; DEBUGLOC-NEXT: [[B_SROA_0_0_B_SROA_0_0_X:%.*]] = load atomic volatile i32, i32* [[B_SROA_0]] acquire, align 4, !dbg [[DBG178:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i32 [[B_SROA_0_0_B_SROA_0_0_X]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG178]] +; DEBUGLOC-NEXT: ret i32 [[B_SROA_0_0_B_SROA_0_0_X]], !dbg [[DBG179:![0-9]+]] +; %B = alloca %struct, align 4 %A.i = getelementptr inbounds %struct, %struct* %A, i32 0, i32 0 diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll index 0853f9e9ed4a..7538c311aa51 100644 --- a/llvm/test/Transforms/SROA/big-endian.ll +++ b/llvm/test/Transforms/SROA/big-endian.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=sroa -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -9,11 +10,31 @@ define i8 @test1() { ; ordering. ; ; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i8 0 to i24 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i24 undef, -256 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_EXT]] +; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i8 0 to i24 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i24 [[A_SROA_2_0_INSERT_EXT]], 8 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_MASK:%.*]] = and i24 [[A_SROA_3_0_INSERT_INSERT]], -65281 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i8 0 to i24 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_SHIFT:%.*]] = shl i24 [[A_SROA_0_0_INSERT_EXT]], 16 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i24 [[A_SROA_2_0_INSERT_INSERT]], 65535 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_SHIFT]] +; CHECK-NEXT: [[B_SROA_0_0_EXTRACT_SHIFT:%.*]] = lshr i24 [[A_SROA_0_0_INSERT_INSERT]], 16 +; CHECK-NEXT: [[B_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[B_SROA_0_0_EXTRACT_SHIFT]] to i8 +; CHECK-NEXT: [[B_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i24 [[A_SROA_0_0_INSERT_INSERT]], 8 +; CHECK-NEXT: [[B_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[B_SROA_2_0_EXTRACT_SHIFT]] to i8 +; CHECK-NEXT: [[B_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[A_SROA_0_0_INSERT_INSERT]] to i8 +; CHECK-NEXT: [[BSUM0:%.*]] = add i8 [[B_SROA_0_0_EXTRACT_TRUNC]], [[B_SROA_2_0_EXTRACT_TRUNC]] +; CHECK-NEXT: [[BSUM1:%.*]] = add i8 [[BSUM0]], [[B_SROA_3_0_EXTRACT_TRUNC]] +; CHECK-NEXT: ret i8 [[BSUM1]] +; entry: %a = alloca [3 x i8] %b = alloca [3 x i8] -; CHECK-NOT: alloca %a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0 store i8 0, i8* %a0ptr @@ -23,19 +44,6 @@ entry: store i8 0, i8* %a2ptr %aiptr = bitcast [3 x i8]* %a to i24* %ai = load i24, i24* %aiptr -; CHECK-NOT: store -; CHECK-NOT: load -; CHECK: %[[ext2:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256 -; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]] -; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8 -; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281 -; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]] -; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16 -; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535 -; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]] %biptr = bitcast [3 x i8]* %b to i24* store i24 %ai, i24* %biptr @@ -45,20 +53,10 @@ entry: %b1 = load i8, i8* %b1ptr %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2 %b2 = load i8, i8* %b2ptr -; CHECK-NOT: store -; CHECK-NOT: load -; CHECK: %[[shift0:.*]] = lshr i24 %[[insert0]], 16 -; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8 -; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8 -; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8 -; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8 %bsum0 = add i8 %b0, %b1 %bsum1 = add i8 %bsum0, %b2 ret i8 %bsum1 -; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]] -; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]] -; CHECK-NEXT: ret i8 %[[sum1]] } define i64 @test2() { @@ -66,18 +64,37 @@ define i64 @test2() { ; promoted. ; ; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_INSERT_EXT:%.*]] = zext i8 1 to i40 +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_INSERT_MASK:%.*]] = and i40 undef, -256 +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_INSERT_INSERT:%.*]] = or i40 [[A_SROA_2_SROA_4_0_INSERT_MASK]], [[A_SROA_2_SROA_4_0_INSERT_EXT]] +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_INSERT_EXT:%.*]] = zext i24 0 to i40 +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_INSERT_SHIFT:%.*]] = shl i40 [[A_SROA_2_SROA_3_0_INSERT_EXT]], 8 +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_INSERT_MASK:%.*]] = and i40 [[A_SROA_2_SROA_4_0_INSERT_INSERT]], -4294967041 +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_INSERT_INSERT:%.*]] = or i40 [[A_SROA_2_SROA_3_0_INSERT_MASK]], [[A_SROA_2_SROA_3_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_INSERT_EXT:%.*]] = zext i8 0 to i40 +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_INSERT_SHIFT:%.*]] = shl i40 [[A_SROA_2_SROA_0_0_INSERT_EXT]], 32 +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_INSERT_MASK:%.*]] = and i40 [[A_SROA_2_SROA_3_0_INSERT_INSERT]], 4294967295 +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_INSERT_INSERT:%.*]] = or i40 [[A_SROA_2_SROA_0_0_INSERT_MASK]], [[A_SROA_2_SROA_0_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i40 [[A_SROA_2_SROA_0_0_INSERT_INSERT]] to i56 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_MASK:%.*]] = and i56 undef, -1099511627776 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i56 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_EXT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i16 1 to i56 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_SHIFT:%.*]] = shl i56 [[A_SROA_0_0_INSERT_EXT]], 40 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i56 [[A_SROA_2_0_INSERT_INSERT]], 1099511627775 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i56 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_SHIFT]] +; CHECK-NEXT: [[RET:%.*]] = zext i56 [[A_SROA_0_0_INSERT_INSERT]] to i64 +; CHECK-NEXT: ret i64 [[RET]] +; entry: %a = alloca [7 x i8] -; CHECK-NOT: alloca %a0ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 0 %a1ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 1 %a2ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 2 %a3ptr = getelementptr [7 x i8], [7 x i8]* %a, i64 0, i32 3 -; CHECK-NOT: store -; CHECK-NOT: load %a0i16ptr = bitcast i8* %a0ptr to i16* store i16 1, i16* %a0i16ptr @@ -92,44 +109,32 @@ entry: ; the alloca is splitted into multiple slices ; Here, i8 1 is for %a[6] -; CHECK: %[[ext1:.*]] = zext i8 1 to i40 -; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, -256 -; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], %[[ext1]] ; Here, i24 0 is for %a[3] to %a[5] -; CHECK-NEXT: %[[ext2:.*]] = zext i24 0 to i40 -; CHECK-NEXT: %[[shift2:.*]] = shl i40 %[[ext2]], 8 -; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041 -; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], %[[shift2]] ; Here, i8 0 is for %a[2] -; CHECK-NEXT: %[[ext3:.*]] = zext i8 0 to i40 -; CHECK-NEXT: %[[shift3:.*]] = shl i40 %[[ext3]], 32 -; CHECK-NEXT: %[[mask3:.*]] = and i40 %[[insert2]], 4294967295 -; CHECK-NEXT: %[[insert3:.*]] = or i40 %[[mask3]], %[[shift3]] -; CHECK-NEXT: %[[ext4:.*]] = zext i40 %[[insert3]] to i56 -; CHECK-NEXT: %[[mask4:.*]] = and i56 undef, -1099511627776 -; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[ext4]] -; CHECK-NOT: store -; CHECK-NOT: load %aiptr = bitcast [7 x i8]* %a to i56* %ai = load i56, i56* %aiptr %ret = zext i56 %ai to i64 ret i64 %ret ; Here, i16 1 is for %a[0] to %a[1] -; CHECK-NEXT: %[[ext5:.*]] = zext i16 1 to i56 -; CHECK-NEXT: %[[shift5:.*]] = shl i56 %[[ext5]], 40 -; CHECK-NEXT: %[[mask5:.*]] = and i56 %[[insert4]], 1099511627775 -; CHECK-NEXT: %[[insert5:.*]] = or i56 %[[mask5]], %[[shift5]] -; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert5]] to i64 -; CHECK-NEXT: ret i64 %[[ret]] } define i64 @PR14132(i1 %flag) { ; CHECK-LABEL: @PR14132( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[B_0_LOAD_EXT:%.*]] = zext i8 1 to i64 +; CHECK-NEXT: [[B_0_ENDIAN_SHIFT:%.*]] = shl i64 [[B_0_LOAD_EXT]], 56 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[PTR_0_SROA_SPECULATED:%.*]] = phi i64 [ [[B_0_ENDIAN_SHIFT]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i64 [[PTR_0_SROA_SPECULATED]] +; ; Here we form a PHI-node by promoting the pointer alloca first, and then in ; order to promote the other two allocas, we speculate the load of the ; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8 @@ -142,7 +147,6 @@ entry: %a = alloca i64, align 8 %b = alloca i8, align 8 %ptr = alloca i64*, align 8 -; CHECK-NOT: alloca %ptr.cast = bitcast i64** %ptr to i8** store i64 0, i64* %a @@ -153,24 +157,28 @@ entry: if.then: store i8* %b, i8** %ptr.cast br label %if.end -; CHECK-NOT: store -; CHECK: %[[ext:.*]] = zext i8 1 to i64 -; CHECK: %[[shift:.*]] = shl i64 %[[ext]], 56 if.end: %tmp = load i64*, i64** %ptr %result = load i64, i64* %tmp -; CHECK-NOT: load -; CHECK: %[[result:.*]] = phi i64 [ %[[shift]], %if.then ], [ 0, %entry ] ret i64 %result -; CHECK-NEXT: ret i64 %[[result]] } declare void @f(i64 %x, i32 %y) define void @test3() { ; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i32 134316040 to i64 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i64 undef, -4294967296 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_EXT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i32 8 to i64 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_SHIFT:%.*]] = shl i64 [[A_SROA_0_0_INSERT_EXT]], 32 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i64 [[A_SROA_3_0_INSERT_INSERT]], 4294967295 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_SHIFT]] +; CHECK-NEXT: call void @f(i64 [[A_SROA_0_0_INSERT_INSERT]], i32 8) +; CHECK-NEXT: ret void ; ; This is a test that specifically exercises the big-endian lowering because it ; ends up splitting a 64-bit integer into two smaller integers and has a number @@ -178,75 +186,61 @@ define void @test3() { ; would miscompile this by either dropping a most significant byte or least ; significant byte due to shrinking the [4,8) slice to an i24, or by failing to ; move the bytes around correctly. -; ; The magical number 34494054408 is used because it has bits set in various ; bytes so that it is clear if those bytes fail to be propagated. -; ; If you're debugging this, rather than using the direct magical numbers, run ; the IR through '-sroa -instcombine'. With '-instcombine' these will be ; constant folded, and if the i64 doesn't round-trip correctly, you've found ; a bug! -; entry: %a = alloca { i32, i24 }, align 4 -; CHECK-NOT: alloca %tmp0 = bitcast { i32, i24 }* %a to i64* store i64 34494054408, i64* %tmp0 %tmp1 = load i64, i64* %tmp0, align 4 %tmp2 = bitcast { i32, i24 }* %a to i32* %tmp3 = load i32, i32* %tmp2, align 4 -; CHECK: %[[HI_EXT:.*]] = zext i32 134316040 to i64 -; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296 -; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]] -; CHECK: %[[LO_EXT:.*]] = zext i32 8 to i64 -; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32 -; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295 -; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]] call void @f(i64 %tmp1, i32 %tmp3) -; CHECK: call void @f(i64 %[[LO_MERGE]], i32 8) ret void -; CHECK: ret void } define void @test4() { -; CHECK-LABEL: @test4 +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_0_EXTRACT_SHIFT:%.*]] = lshr i64 34494054408, 32 +; CHECK-NEXT: [[A_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A_SROA_0_0_EXTRACT_SHIFT]] to i32 +; CHECK-NEXT: [[A_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i64 34494054408 to i32 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i32 [[A_SROA_3_0_EXTRACT_TRUNC]] to i64 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i64 undef, -4294967296 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_EXT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[A_SROA_0_0_EXTRACT_TRUNC]] to i64 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_SHIFT:%.*]] = shl i64 [[A_SROA_0_0_INSERT_EXT]], 32 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i64 [[A_SROA_3_0_INSERT_INSERT]], 4294967295 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_SHIFT]] +; CHECK-NEXT: call void @f(i64 [[A_SROA_0_0_INSERT_INSERT]], i32 [[A_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK-NEXT: ret void ; ; Much like @test3, this is specifically testing big-endian management of data. ; Also similarly, it uses constants with particular bits set to help track ; whether values are corrupted, and can be easily evaluated by running through ; -instcombine to see that the i64 round-trips. -; entry: %a = alloca { i32, i24 }, align 4 %a2 = alloca i64, align 4 -; CHECK-NOT: alloca store i64 34494054408, i64* %a2 %tmp0 = bitcast { i32, i24 }* %a to i8* %tmp1 = bitcast i64* %a2 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp0, i8* align 4 %tmp1, i64 8, i1 false) -; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32 -; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32 -; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32 %tmp2 = bitcast { i32, i24 }* %a to i64* %tmp3 = load i64, i64* %tmp2, align 4 %tmp4 = bitcast { i32, i24 }* %a to i32* %tmp5 = load i32, i32* %tmp4, align 4 -; CHECK: %[[HI_EXT:.*]] = zext i32 %[[HI_START]] to i64 -; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296 -; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]] -; CHECK: %[[LO_EXT:.*]] = zext i32 %[[LO_START]] to i64 -; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32 -; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295 -; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]] call void @f(i64 %tmp3, i32 %tmp5) -; CHECK: call void @f(i64 %[[LO_MERGE]], i32 %[[LO_START]]) ret void -; CHECK: ret void } declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/llvm/test/Transforms/SROA/dbg-inline.ll b/llvm/test/Transforms/SROA/dbg-inline.ll index b3b3660f6414..27b5d68961a2 100644 --- a/llvm/test/Transforms/SROA/dbg-inline.ll +++ b/llvm/test/Transforms/SROA/dbg-inline.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Test that SROA can deal with allocas that have more than one ; dbg.declare hanging off of it. @@ -10,16 +11,20 @@ target triple = "x86_64-apple-macosx10.15.0" ; Function Attrs: noinline optnone ssp uwtable define i64 @_Z1g4pair(i64 %p.coerce0, i64 %p.coerce1) #0 !dbg !8 { +; CHECK-LABEL: @_Z1g4pair( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[P_COERCE0:%.*]], metadata [[META16:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[P_COERCE0]], metadata [[META18:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg [[DBG20:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[P_COERCE1:%.*]], metadata [[META16]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG17]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[P_COERCE1]], metadata [[META18]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG20]] +; CHECK-NEXT: ret i64 [[P_COERCE0]], !dbg [[DBG22:![0-9]+]] +; entry: %p = alloca %struct.pair, align 8 %0 = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 0 store i64 %p.coerce0, i64* %0, align 8 %1 = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 1 store i64 %p.coerce1, i64* %1, align 8 - ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce0, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg ![[LOC:[0-9]+]] - ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce1, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg ![[LOC]] - ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce0, metadata ![[INLINED_VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg ![[INLINED_LOC:[0-9]+]] - ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce1, metadata ![[INLINED_VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg ![[INLINED_LOC]] call void @llvm.dbg.declare(metadata %struct.pair* %p, metadata !17, metadata !DIExpression()), !dbg !18 call void @llvm.dbg.declare(metadata %struct.pair* %p, metadata !21, metadata !DIExpression()), !dbg !23 %a.i = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 0, !dbg !25 @@ -57,7 +62,6 @@ attributes #2 = { argmemonly nounwind willreturn } !15 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !13, file: !9, line: 1, baseType: !12, size: 64) !16 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !13, file: !9, line: 1, baseType: !12, size: 64, offset: 64) !17 = !DILocalVariable(name: "p", arg: 1, scope: !8, file: !9, line: 9, type: !13) -; CHECK: ![[LOC]] = !DILocation ; CHECK-NOT: inlinedAt ; CHECK: = !18 = !DILocation(line: 9, column: 27, scope: !8) @@ -65,7 +69,6 @@ attributes #2 = { argmemonly nounwind willreturn } !20 = !DILocation(line: 10, column: 10, scope: !8) !21 = !DILocalVariable(name: "p", arg: 1, scope: !22, file: !9, line: 5, type: !13) !22 = distinct !DISubprogram(name: "f", linkageName: "_ZL1f4pair", scope: !9, file: !9, line: 5, type: !10, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !2) -; CHECK: ![[INLINED_LOC]] = !DILocation({{.*}}inlinedAt !23 = !DILocation(line: 5, column: 27, scope: !22, inlinedAt: !24) !24 = distinct !DILocation(line: 10, column: 10, scope: !8) !25 = !DILocation(line: 6, column: 12, scope: !22, inlinedAt: !24) diff --git a/llvm/test/Transforms/SROA/dbg-single-piece.ll b/llvm/test/Transforms/SROA/dbg-single-piece.ll index d9eb41b34772..55aa3070aabc 100644 --- a/llvm/test/Transforms/SROA/dbg-single-piece.ll +++ b/llvm/test/Transforms/SROA/dbg-single-piece.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=sroa %s -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -5,14 +6,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 define void @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE() { +; CHECK-LABEL: @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.dbg.value(metadata %foo* undef, metadata [[META3:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: ret void +; entry: %retval = alloca %foo, align 8 call void @llvm.dbg.declare(metadata %foo* %retval, metadata !1, metadata !7), !dbg !8 ; Checks that SROA still inserts a bit_piece expression, even if it produces only one piece ; (as long as that piece is smaller than the whole thing) -; CHECK-NOT: call void @llvm.dbg.value -; CHECK: call void @llvm.dbg.value(metadata %foo* undef, {{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg -; CHECK-NOT: call void @llvm.dbg.value %0 = bitcast %foo* %retval to i8* %1 = getelementptr inbounds i8, i8* %0, i64 8 %2 = bitcast i8* %1 to %foo** diff --git a/llvm/test/Transforms/SROA/dead-inst.ll b/llvm/test/Transforms/SROA/dead-inst.ll index 083c8a6221e1..fe320c790b39 100644 --- a/llvm/test/Transforms/SROA/dead-inst.ll +++ b/llvm/test/Transforms/SROA/dead-inst.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; SROA fails to rewrite allocs but does rewrite some phis and delete ; dead instructions. Ensure that this invalidates analyses required ; for other passes. @@ -23,8 +24,8 @@ define hidden fastcc void @H(%class.b* noalias nocapture readnone, [2 x i64]) un store i64 0, i64* %.sroa.0, align 8 %4 = extractvalue [2 x i64] %1, 1 switch i64 %4, label %6 [ - i64 4, label %foo - i64 5, label %5 + i64 4, label %foo + i64 5, label %5 ] ;