From 64c54c5459cfae8478ce28710784f36b0d94fb2f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 1 Oct 2020 22:42:14 +0200 Subject: [PATCH] [MemCpyOpt] Regnerate test checks (NFC) --- .../MemCpyOpt/2008-02-24-MultipleUseofSRet.ll | 18 +- .../MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll | 15 +- .../2011-06-02-CallSlotOverwritten.ll | 20 +- .../MemCpyOpt/aggregate-type-crash.ll | 11 +- llvm/test/Transforms/MemCpyOpt/align.ll | 20 +- llvm/test/Transforms/MemCpyOpt/atomic.ll | 27 +- llvm/test/Transforms/MemCpyOpt/callslot_aa.ll | 8 +- .../Transforms/MemCpyOpt/callslot_deref.ll | 18 +- .../Transforms/MemCpyOpt/callslot_throw.ll | 29 +- .../Transforms/MemCpyOpt/capturing-func.ll | 13 +- llvm/test/Transforms/MemCpyOpt/crash.ll | 31 +- llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll | 125 ++--- llvm/test/Transforms/MemCpyOpt/form-memset.ll | 459 +++++++++++------- .../Transforms/MemCpyOpt/invariant.start.ll | 27 +- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 10 +- .../MemCpyOpt/load-store-to-memcpy.ll | 6 +- .../Transforms/MemCpyOpt/loadstore-sret.ll | 14 +- .../Transforms/MemCpyOpt/memcpy-to-memset.ll | 71 +-- .../test/Transforms/MemCpyOpt/memcpy-undef.ll | 26 +- llvm/test/Transforms/MemCpyOpt/memcpy.ll | 136 ++++-- llvm/test/Transforms/MemCpyOpt/memmove.ll | 27 +- .../MemCpyOpt/memset-memcpy-oversized.ll | 8 +- .../memset-memcpy-redundant-memset.ll | 183 ++++--- .../MemCpyOpt/memset-memcpy-to-2x-memset.ll | 84 ++-- llvm/test/Transforms/MemCpyOpt/nontemporal.ll | 41 +- llvm/test/Transforms/MemCpyOpt/pr29105.ll | 13 +- llvm/test/Transforms/MemCpyOpt/pr37967.ll | 18 +- .../Transforms/MemCpyOpt/process_store.ll | 27 +- .../Transforms/MemCpyOpt/profitable-memset.ll | 15 +- llvm/test/Transforms/MemCpyOpt/smaller.ll | 13 +- llvm/test/Transforms/MemCpyOpt/sret.ll | 21 +- .../test/Transforms/MemCpyOpt/stackrestore.ll | 42 +- .../store-to-memset-is-nonzero-type.ll | 2 +- .../Transforms/MemCpyOpt/store-to-memset.ll | 42 +- .../Transforms/MemCpyOpt/vscale-memset.ll | 4 +- 35 files changed, 1091 insertions(+), 533 deletions(-) diff --git a/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll index 237b8fec4f64..0f8c417f2127 100644 --- a/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll +++ b/llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -dse -S | grep "call.*initialize" | not grep memtmp +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck %s ; PR2077 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" @@ -7,6 +8,14 @@ target triple = "i386-pc-linux-gnu" %0 = type { x86_fp80, x86_fp80 } define internal fastcc void @initialize(%0* noalias nocapture sret %agg.result) nounwind { +; CHECK-LABEL: @initialize( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AGG_RESULT_03:%.*]] = getelementptr [[TMP0:%.*]], %0* [[AGG_RESULT:%.*]], i32 0, i32 0 +; CHECK-NEXT: store x86_fp80 0xK00000000000000000000, x86_fp80* [[AGG_RESULT_03]], align 4 +; CHECK-NEXT: [[AGG_RESULT_15:%.*]] = getelementptr [[TMP0]], %0* [[AGG_RESULT]], i32 0, i32 1 +; CHECK-NEXT: store x86_fp80 0xK00000000000000000000, x86_fp80* [[AGG_RESULT_15]], align 4 +; CHECK-NEXT: ret void +; entry: %agg.result.03 = getelementptr %0, %0* %agg.result, i32 0, i32 0 store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03 @@ -18,6 +27,13 @@ entry: declare fastcc x86_fp80 @passed_uninitialized(%0* nocapture) nounwind define fastcc void @badly_optimized() nounwind { +; CHECK-LABEL: @badly_optimized( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[Z:%.*]] = alloca [[TMP0:%.*]], align 8 +; CHECK-NEXT: call fastcc void @initialize(%0* noalias sret [[Z]]) +; CHECK-NEXT: [[TMP5:%.*]] = call fastcc x86_fp80 @passed_uninitialized(%0* [[Z]]) +; CHECK-NEXT: ret void +; entry: %z = alloca %0 %tmp = alloca %0 diff --git a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll index a0f34b9baa6d..dbe819adb689 100644 --- a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll +++ b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -S | not grep "call.*memcpy." +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" %a = type { i32 } @@ -7,6 +8,18 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 declare void @g(%a* nocapture) define float @f() { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_VAR:%.*]] = alloca [[A:%.*]], align 8 +; CHECK-NEXT: [[B_VAR:%.*]] = alloca [[B:%.*]], align 8 +; CHECK-NEXT: [[B_VAR1:%.*]] = bitcast %b* [[B_VAR]] to %a* +; CHECK-NEXT: call void @g(%a* [[B_VAR1]]) +; CHECK-NEXT: [[A_I8:%.*]] = bitcast %a* [[A_VAR]] to i8* +; CHECK-NEXT: [[B_I8:%.*]] = bitcast %b* [[B_VAR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[B]], %b* [[B_VAR]], i32 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4 +; CHECK-NEXT: ret float [[TMP2]] +; entry: %a_var = alloca %a %b_var = alloca %b, align 1 diff --git a/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll b/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll index 8ba8df4d8b39..bd086967ec29 100644 --- a/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll +++ b/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s ; PR10067 ; Make sure the call+copy isn't optimized in such a way that @@ -12,10 +13,25 @@ target triple = "i386-apple-darwin10" declare void @bar(%struct1* nocapture sret %agg.result) nounwind define i32 @foo() nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[X:%.*]] = alloca [[STRUCT1:%.*]], align 8 +; CHECK-NEXT: [[Y:%.*]] = alloca [[STRUCT2:%.*]], align 8 +; CHECK-NEXT: call void @bar(%struct1* sret [[X]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[GEPN1:%.*]] = getelementptr inbounds [[STRUCT2]], %struct2* [[Y]], i32 0, i32 0, i32 0 +; CHECK-NEXT: store i32 0, i32* [[GEPN1]], align 8 +; CHECK-NEXT: [[GEPN2:%.*]] = getelementptr inbounds [[STRUCT2]], %struct2* [[Y]], i32 0, i32 0, i32 1 +; CHECK-NEXT: store i32 0, i32* [[GEPN2]], align 4 +; CHECK-NEXT: [[BIT1:%.*]] = bitcast %struct1* [[X]] to i64* +; CHECK-NEXT: [[BIT2:%.*]] = bitcast %struct2* [[Y]] to i64* +; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[BIT1]], align 8 +; CHECK-NEXT: store i64 [[LOAD]], i64* [[BIT2]], align 8 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr [[STRUCT2]], %struct2* [[Y]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[GEP1]], align 4 +; CHECK-NEXT: ret i32 [[RET]] +; %x = alloca %struct1, align 8 %y = alloca %struct2, align 8 call void @bar(%struct1* sret %x) nounwind -; CHECK: call void @bar(%struct1* sret %x) %gepn1 = getelementptr inbounds %struct2, %struct2* %y, i32 0, i32 0, i32 0 store i32 0, i32* %gepn1, align 8 @@ -27,8 +43,6 @@ define i32 @foo() nounwind { %load = load i64, i64* %bit1, align 8 store i64 %load, i64* %bit2, align 8 -; CHECK: %load = load i64, i64* %bit1, align 8 -; CHECK: store i64 %load, i64* %bit2, align 8 %gep1 = getelementptr %struct2, %struct2* %y, i32 0, i32 0, i32 0 %ret = load i32, i32* %gep1 diff --git a/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll b/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll index 16d107730acd..dd9536a85888 100644 --- a/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll +++ b/llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S -o - < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -9,13 +10,19 @@ target triple = "x86_64-apple-macosx10.14.0" declare noalias i8* @my_malloc(%my_struct*) #0 define void @my_func(%my_struct* %0) { +; CHECK-LABEL: @my_func( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load [[MY_STRUCT:%.*]], %my_struct* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @my_malloc(%my_struct* [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %my_struct* +; CHECK-NEXT: store [[MY_STRUCT]] [[TMP1]], %my_struct* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; entry: -; CHECK: entry: %1 = load %my_struct, %my_struct* %0 %2 = call i8* @my_malloc(%my_struct* %0) %3 = bitcast i8* %2 to %my_struct* store %my_struct %1, %my_struct* %3 -; CHECK-NOT: call void @llvm.memcpy.{{.*}}.{{.*}}.{{.*}} ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/align.ll b/llvm/test/Transforms/MemCpyOpt/align.ll index 2e683bfa91f5..cdaf44f69e01 100644 --- a/llvm/test/Transforms/MemCpyOpt/align.ll +++ b/llvm/test/Transforms/MemCpyOpt/align.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basic-aa -memcpyopt | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" @@ -9,7 +10,14 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind define void @foo(i32* %p) { ; CHECK-LABEL: @foo( -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 16, i1 false) +; CHECK-NEXT: [[A0:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 0 +; CHECK-NEXT: [[A1:%.*]] = getelementptr i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[A2:%.*]] = getelementptr i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[A3:%.*]] = getelementptr i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; %a0 = getelementptr i32, i32* %p, i64 0 store i32 0, i32* %a0, align 4 %a1 = getelementptr i32, i32* %p, i64 1 @@ -25,8 +33,14 @@ define void @foo(i32* %p) { define void @bar() { ; CHECK-LABEL: @bar( -; CHECK: %a4 = alloca i32, align 8 -; CHECK-NOT: memcpy +; CHECK-NEXT: [[A4:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[A8:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[A8_CAST:%.*]] = bitcast i32* [[A8]] to i8* +; CHECK-NEXT: [[A4_CAST:%.*]] = bitcast i32* [[A4]] to i8* +; CHECK-NEXT: [[A41:%.*]] = bitcast i32* [[A4]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A41]], i8 0, i64 4, i1 false) +; CHECK-NEXT: ret void +; %a4 = alloca i32, align 4 %a8 = alloca i32, align 8 %a8.cast = bitcast i32* %a8 to i8* diff --git a/llvm/test/Transforms/MemCpyOpt/atomic.ll b/llvm/test/Transforms/MemCpyOpt/atomic.ll index 65f6c925e205..ed31766b2f54 100644 --- a/llvm/test/Transforms/MemCpyOpt/atomic.ll +++ b/llvm/test/Transforms/MemCpyOpt/atomic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -memcpyopt -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" @@ -11,8 +12,16 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; memcpyopt should not touch atomic ops define void @test1() nounwind uwtable ssp { -; CHECK: test1 -; CHECK: store atomic +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[X:%.*]] = alloca [101 x i32], align 16 +; CHECK-NEXT: [[BC:%.*]] = bitcast [101 x i32]* [[X]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[BC]], i8 0, i64 400, i1 false) +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* [[X]], i32 0, i32 100 +; CHECK-NEXT: store atomic i32 0, i32* [[GEP1]] unordered, align 4 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* [[X]], i32 0, i32 0 +; CHECK-NEXT: call void @otherf(i32* [[GEP2]]) +; CHECK-NEXT: ret void +; %x = alloca [101 x i32], align 16 %bc = bitcast [101 x i32]* %x to i8* call void @llvm.memset.p0i8.i64(i8* align 16 %bc, i8 0, i64 400, i1 false) @@ -25,17 +34,21 @@ define void @test1() nounwind uwtable ssp { ; memcpyopt across unordered store define void @test2() nounwind uwtable ssp { -; CHECK: test2 -; CHECK: call -; CHECK-NEXT: store atomic -; CHECK-NEXT: call +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[OLD:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[NEW:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @otherf(i32* nocapture [[NEW]]) +; CHECK-NEXT: store atomic i32 0, i32* @x unordered, align 4 +; CHECK-NEXT: call void @otherf(i32* nocapture [[NEW]]) +; CHECK-NEXT: ret void +; %old = alloca i32 %new = alloca i32 call void @otherf(i32* nocapture %old) store atomic i32 0, i32* @x unordered, align 4 %v = load i32, i32* %old store i32 %v, i32* %new - call void @otherf(i32* nocapture %new) + call void @otherf(i32* nocapture %new) ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll b/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll index 1d45cbe9e5cb..6e7b78d4da71 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot_aa.ll @@ -1,12 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basic-aa -memcpyopt | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %T = type { i64, i64 } define void @test(i8* %src) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[SRC:%.*]], i64 1, i1 false) +; CHECK-NEXT: ret void +; %tmp = alloca i8 %dst = alloca i8 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !noalias !2 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false) diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll index ad578be711cd..a2c0503894a1 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basic-aa -memcpyopt | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" @@ -7,8 +8,13 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; all bytes of %dst that are touch by the memset are dereferenceable define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) { ; CHECK-LABEL: @must_remove_memcpy( -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0 +; CHECK-NEXT: [[DST1:%.*]] = bitcast i8* [[DST:%.*]] to [4096 x i8]* +; CHECK-NEXT: [[DST12:%.*]] = bitcast [4096 x i8]* [[DST1]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST12]], i8 0, i64 4096, i1 false) +; CHECK-NEXT: ret void +; %src = alloca [4096 x i8], align 1 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false) @@ -20,8 +26,12 @@ define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst ; We can't remove the memcpy, but we can turn it into an independent memset. define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) { ; CHECK-LABEL: @must_not_remove_memcpy( -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[P]], i8 0, i64 4096, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST:%.*]], i8 0, i64 4096, i1 false) +; CHECK-NEXT: ret void +; %src = alloca [4096 x i8], align 1 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i1 false) diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll b/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll index 1aa4c92efc72..7092f046af31 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll @@ -1,34 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s | FileCheck %s declare void @may_throw(i32* nocapture %x) -; CHECK-LABEL: define void @test1( define void @test1(i32* nocapture noalias dereferenceable(4) %x) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @may_throw(i32* nonnull [[T]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[T]], align 4 +; CHECK-NEXT: store i32 [[LOAD]], i32* [[X:%.*]], align 4 +; CHECK-NEXT: ret void +; entry: %t = alloca i32, align 4 call void @may_throw(i32* nonnull %t) %load = load i32, i32* %t, align 4 store i32 %load, i32* %x, align 4 -; CHECK: %[[t:.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @may_throw(i32* {{.*}} %[[t]]) -; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[t]], align 4 -; CHECK-NEXT: store i32 %[[load]], i32* %x, align 4 ret void } declare void @always_throws() -; CHECK-LABEL: define void @test2( define void @test2(i32* nocapture noalias dereferenceable(4) %x) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @may_throw(i32* nonnull [[T]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[T]], align 4 +; CHECK-NEXT: call void @always_throws() +; CHECK-NEXT: store i32 [[LOAD]], i32* [[X:%.*]], align 4 +; CHECK-NEXT: ret void +; entry: %t = alloca i32, align 4 call void @may_throw(i32* nonnull %t) nounwind %load = load i32, i32* %t, align 4 call void @always_throws() store i32 %load, i32* %x, align 4 -; CHECK: %[[t:.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @may_throw(i32* {{.*}} %[[t]]) -; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[t]], align 4 -; CHECK-NEXT: call void @always_throws() -; CHECK-NEXT: store i32 %[[load]], i32* %x, align 4 ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll index 0ea889a66497..8376ecd3d30d 100644 --- a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll +++ b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "e" @@ -6,6 +7,14 @@ declare void @foo(i8*) declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[PTR1:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[PTR2:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @foo(i8* [[PTR2]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false) +; CHECK-NEXT: call void @foo(i8* [[PTR1]]) +; CHECK-NEXT: ret void +; %ptr1 = alloca i8 %ptr2 = alloca i8 call void @foo(i8* %ptr2) @@ -15,8 +24,4 @@ define void @test() { ; Check that the transformation isn't applied if the called function can ; capture the pointer argument (i.e. the nocapture attribute isn't present) - ; CHECK-LABEL: @test( - ; CHECK: call void @foo(i8* %ptr2) - ; CHECK-NEXT: call void @llvm.memcpy - ; CHECK-NEXT: call void @foo(i8* %ptr1) } diff --git a/llvm/test/Transforms/MemCpyOpt/crash.ll b/llvm/test/Transforms/MemCpyOpt/crash.ll index 1fd4d0deae6d..489a1827604b 100644 --- a/llvm/test/Transforms/MemCpyOpt/crash.ll +++ b/llvm/test/Transforms/MemCpyOpt/crash.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -disable-output +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -basic-aa -memcpyopt | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-eabi" @@ -8,6 +9,30 @@ target triple = "armv7-eabi" ; PR4882 define void @test1(%struct.bar* %this) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_BAR:%.*]], %struct.bar* [[THIS:%.*]], i32 0, i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 0, i32 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 0, i32 0, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 0, i32 0, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 1, i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 1, i32 0, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 1, i32 0, i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 1, i32 0, i32 3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 3, i32 0, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 3, i32 0, i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 3, i32 0, i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 4, i32 0, i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 4, i32 0, i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 4, i32 0, i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 4, i32 0, i32 3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_BAR]], %struct.bar* [[THIS]], i32 0, i32 5 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP16]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP8]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP17]], i8 0, i64 32, i1 false) +; CHECK-NEXT: unreachable +; entry: %0 = getelementptr inbounds %struct.bar, %struct.bar* %this, i32 0, i32 0, i32 0, i32 0 store float 0.000000e+00, float* %0, align 4 @@ -49,6 +74,10 @@ entry: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @test2(i32 %cmd) nounwind { +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i1 false) [[ATTR1:#.*]] +; CHECK-NEXT: ret void +; call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 20, i1 false) nounwind call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i1 false) nounwind ret void diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll index 6ce1aee338d8..777ba51f3827 100644 --- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64" @@ -6,41 +7,49 @@ target triple = "x86_64-unknown-linux-gnu" %S = type { i8*, i8, i32 } define void @copy(%S* %src, %S* %dst) { -; CHECK-LABEL: copy -; CHECK-NOT: load -; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @copy( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @noaliassrc(%S* noalias %src, %S* %dst) { -; CHECK-LABEL: noaliassrc -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @noaliassrc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @noaliasdst(%S* %src, %S* noalias %dst) { -; CHECK-LABEL: noaliasdst -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @noaliasdst( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @destroysrc(%S* %src, %S* %dst) { -; CHECK-LABEL: destroysrc -; CHECK: load %S, %S* %src -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: store %S %1, %S* %dst -; CHECK-NEXT: ret void +; CHECK-LABEL: @destroysrc( +; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], %S* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP2]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store [[S]] [[TMP1]], %S* [[DST:%.*]], align 8 +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S zeroinitializer, %S* %src store %S %1, %S* %dst @@ -48,11 +57,14 @@ define void @destroysrc(%S* %src, %S* %dst) { } define void @destroynoaliassrc(%S* noalias %src, %S* %dst) { -; CHECK-LABEL: destroynoaliassrc -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @destroynoaliassrc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S zeroinitializer, %S* %src store %S %1, %S* %dst @@ -60,12 +72,14 @@ define void @destroynoaliassrc(%S* noalias %src, %S* %dst) { } define void @copyalias(%S* %src, %S* %dst) { -; CHECK-LABEL: copyalias -; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src -; CHECK-NOT: load -; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 -; CHECK-NEXT: store %S [[LOAD]], %S* %dst -; CHECK-NEXT: ret void +; CHECK-LABEL: @copyalias( +; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], %S* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) +; CHECK-NEXT: store [[S]] [[TMP1]], %S* [[DST]], align 8 +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src %2 = load %S, %S* %src store %S %1, %S* %dst @@ -76,14 +90,15 @@ define void @copyalias(%S* %src, %S* %dst) { ; If the store address is computed in a complex manner, make ; sure we lift the computation as well if needed and possible. define void @addrproducer(%S* %src, %S* %dst) { -; CHECK-LABEL: addrproducer( -; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8* -; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i64 1 -; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8* -; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8* -; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false) -; CHECK-NEXT: ret void +; CHECK-LABEL: @addrproducer( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], %S* [[DST]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST2]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %dst %dst2 = getelementptr %S , %S* %dst, i64 1 @@ -92,14 +107,15 @@ define void @addrproducer(%S* %src, %S* %dst) { } define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) { -; CHECK-LABEL: aliasaddrproducer( -; CHECK-NEXT: %[[SRC:[0-9]+]] = load %S, %S* %src -; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false) -; CHECK-NEXT: %dstindex = load i32, i32* %dstidptr -; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex -; CHECK-NEXT: store %S %[[SRC]], %S* %dst2 -; CHECK-NEXT: ret void +; CHECK-LABEL: @aliasaddrproducer( +; CHECK-NEXT: [[TMP1:%.*]] = load [[S:%.*]], %S* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP2]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: [[DSTINDEX:%.*]] = load i32, i32* [[DSTIDPTR:%.*]], align 4 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S]], %S* [[DST]], i32 [[DSTINDEX]] +; CHECK-NEXT: store [[S]] [[TMP1]], %S* [[DST2]], align 8 +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %dst %dstindex = load i32, i32* %dstidptr @@ -109,16 +125,17 @@ define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) { } define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) { -; CHECK-LABEL: noaliasaddrproducer( -; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8* -; CHECK-NEXT: %[[LOADED:[0-9]+]] = load i32, i32* %dstidptr -; CHECK-NEXT: %dstindex = or i32 %[[LOADED]], 1 -; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex -; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8* -; CHECK-NEXT: %[[SRCCAST2:[0-9]+]] = bitcast %S* %src to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST2]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SRCCAST]], i8 undef, i64 16, i1 false) -; CHECK-NEXT: ret void +; CHECK-LABEL: @noaliasaddrproducer( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[SRC:%.*]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DSTIDPTR:%.*]], align 4 +; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr [[S:%.*]], %S* [[DST:%.*]], i32 [[DSTINDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* [[DST2]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast %S* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 undef, i64 16, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %src %2 = load i32, i32* %dstidptr diff --git a/llvm/test/Transforms/MemCpyOpt/form-memset.ll b/llvm/test/Transforms/MemCpyOpt/form-memset.ll index dde025dac926..bec6b8855a2b 100644 --- a/llvm/test/Transforms/MemCpyOpt/form-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/form-memset.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s ; All the stores in this example should be merged into a single memset. @@ -6,53 +7,74 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 target triple = "i386-apple-darwin8" define void @test1(i8 signext %c) nounwind { -entry: - %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] - %tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] - store i8 %c, i8* %tmp, align 1 - %tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] - store i8 %c, i8* %tmp5, align 1 - %tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] - store i8 %c, i8* %tmp9, align 1 - %tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] - store i8 %c, i8* %tmp13, align 1 - %tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] - store i8 %c, i8* %tmp17, align 1 - %tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] - store i8 %c, i8* %tmp21, align 1 - %tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] - store i8 %c, i8* %tmp25, align 1 - %tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] - store i8 %c, i8* %tmp29, align 1 - %tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] - store i8 %c, i8* %tmp33, align 1 - %tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] - store i8 %c, i8* %tmp37, align 1 - %tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] - store i8 %c, i8* %tmp41, align 1 - %tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] - store i8 %c, i8* %tmp45, align 1 - %tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] - store i8 %c, i8* %tmp49, align 1 - %tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] - store i8 %c, i8* %tmp53, align 1 - %tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] - store i8 %c, i8* %tmp57, align 1 - %tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] - store i8 %c, i8* %tmp61, align 1 - %tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] - store i8 %c, i8* %tmp65, align 1 - %tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] - store i8 %c, i8* %tmp69, align 1 - %tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] - store i8 %c, i8* %tmp73, align 1 - %tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind - ret void ; CHECK-LABEL: @test1( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NOT: store -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = alloca [19 x i8], align 1 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 4 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 5 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 6 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 7 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 8 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 9 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 10 +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 11 +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 12 +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 13 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 14 +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 15 +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 16 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 17 +; CHECK-NEXT: [[TMP73:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 18 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP]], i8 [[C:%.*]], i64 19, i1 false) +; CHECK-NEXT: [[TMP76:%.*]] = call i32 (...) @bar([19 x i8]* [[X]]) [[ATTR0:#.*]] +; CHECK-NEXT: ret void +; +entry: + %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] + %tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] + store i8 %c, i8* %tmp, align 1 + %tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] + store i8 %c, i8* %tmp5, align 1 + %tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] + store i8 %c, i8* %tmp9, align 1 + %tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] + store i8 %c, i8* %tmp13, align 1 + %tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] + store i8 %c, i8* %tmp17, align 1 + %tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] + store i8 %c, i8* %tmp21, align 1 + %tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] + store i8 %c, i8* %tmp25, align 1 + %tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] + store i8 %c, i8* %tmp29, align 1 + %tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] + store i8 %c, i8* %tmp33, align 1 + %tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] + store i8 %c, i8* %tmp37, align 1 + %tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] + store i8 %c, i8* %tmp41, align 1 + %tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] + store i8 %c, i8* %tmp45, align 1 + %tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] + store i8 %c, i8* %tmp49, align 1 + %tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] + store i8 %c, i8* %tmp53, align 1 + %tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] + store i8 %c, i8* %tmp57, align 1 + %tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] + store i8 %c, i8* %tmp61, align 1 + %tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] + store i8 %c, i8* %tmp65, align 1 + %tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] + store i8 %c, i8* %tmp69, align 1 + %tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] + store i8 %c, i8* %tmp73, align 1 + %tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind + ret void } declare i32 @bar(...) @@ -61,104 +83,150 @@ declare i32 @bar(...) define void @test2() nounwind { -entry: - %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] - %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] - %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] - %tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7 ; [#uses=1] - store i8 -1, i8* %tmp20, align 1 - %tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6 ; [#uses=1] - store i8 -1, i8* %tmp23, align 1 - %tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5 ; [#uses=1] - store i8 -1, i8* %tmp26, align 1 - %tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4 ; [#uses=1] - store i8 -1, i8* %tmp29, align 1 - %tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3 ; [#uses=1] - store i8 -1, i8* %tmp32, align 1 - %tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2 ; [#uses=1] - store i8 -1, i8* %tmp35, align 1 - %tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1 ; [#uses=1] - store i8 -1, i8* %tmp38, align 1 - %tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0 ; [#uses=2] - store i8 -1, i8* %tmp41, align 1 - %tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; [#uses=1] - store i16 0, i16* %tmp43, align 2 - %tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; [#uses=1] - store i16 0, i16* %tmp46, align 2 - %tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; [#uses=1] - store i16 0, i16* %tmp57, align 2 - %tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; [#uses=1] - store i16 0, i16* %tmp60, align 2 - %tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; [#uses=1] - store i16 0, i16* %tmp71, align 2 - %tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; [#uses=1] - store i16 0, i16* %tmp74, align 2 - %tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; [#uses=1] - store i16 0, i16* %tmp85, align 2 - %tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; [#uses=1] - store i16 0, i16* %tmp88, align 2 - %tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; [#uses=1] - store i16 0, i16* %tmp99, align 2 - %tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; [#uses=1] - store i16 0, i16* %tmp102, align 2 - %tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; [#uses=1] - store i16 0, i16* %tmp113, align 2 - %tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; [#uses=1] - store i16 0, i16* %tmp116, align 2 - %tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; [#uses=1] - store i16 0, i16* %tmp127, align 2 - %tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; [#uses=1] - store i16 0, i16* %tmp130, align 2 - %tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; [#uses=1] - store i16 0, i16* %tmp141, align 8 - %tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; [#uses=1] - store i16 0, i16* %tmp144, align 2 - %tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; [#uses=1] - store i16 0, i16* %tmp148, align 2 - %tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; [#uses=1] - store i16 0, i16* %tmp151, align 2 - %tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; [#uses=1] - store i16 0, i16* %tmp162, align 2 - %tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; [#uses=1] - store i16 0, i16* %tmp165, align 2 - %tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; [#uses=1] - store i16 0, i16* %tmp176, align 2 - %tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; [#uses=1] - store i16 0, i16* %tmp179, align 2 - %tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; [#uses=1] - store i16 0, i16* %tmp190, align 2 - %tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; [#uses=1] - store i16 0, i16* %tmp193, align 2 - %tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; [#uses=1] - store i16 0, i16* %tmp204, align 2 - %tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; [#uses=1] - store i16 0, i16* %tmp207, align 2 - %tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; [#uses=1] - store i16 0, i16* %tmp218, align 2 - %tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; [#uses=1] - store i16 0, i16* %tmp221, align 2 - %tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; [#uses=1] - store i16 0, i16* %tmp232, align 2 - %tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; [#uses=1] - store i16 0, i16* %tmp235, align 2 - %tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; [#uses=1] - store i16 0, i16* %tmp246, align 8 - %tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; [#uses=1] - store i16 0, i16* %tmp249, align 2 - %up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] - %left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] - call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind - ret void - ; CHECK-LABEL: @test2( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %tmp41, i8 -1, i64 8, i1 false) -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 32, i1 false) -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 0, i64 32, i1 false) -; CHECK-NOT: store -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REF_IDX:%.*]] = alloca [8 x i8], align 1 +; CHECK-NEXT: [[LEFT_MVD:%.*]] = alloca [8 x %struct.MV], align 8 +; CHECK-NEXT: [[UP_MVD:%.*]] = alloca [8 x %struct.MV], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 7 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 6 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 5 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 4 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 2 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 1 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 0 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP41]], i8 -1, i64 8, i1 false) +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 1 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 0 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 1 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 5, i32 0 +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 5, i32 1 +; CHECK-NEXT: [[TMP85:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 4, i32 0 +; CHECK-NEXT: [[TMP88:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 4, i32 1 +; CHECK-NEXT: [[TMP99:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 3, i32 0 +; CHECK-NEXT: [[TMP102:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 3, i32 1 +; CHECK-NEXT: [[TMP113:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 2, i32 0 +; CHECK-NEXT: [[TMP116:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 2, i32 1 +; CHECK-NEXT: [[TMP127:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP130:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 1 +; CHECK-NEXT: [[TMP141:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP144:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 1 +; CHECK-NEXT: [[TMP148:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[TMP141]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP151:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 1 +; CHECK-NEXT: [[TMP162:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 0 +; CHECK-NEXT: [[TMP165:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 1 +; CHECK-NEXT: [[TMP176:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 5, i32 0 +; CHECK-NEXT: [[TMP179:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 5, i32 1 +; CHECK-NEXT: [[TMP190:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 4, i32 0 +; CHECK-NEXT: [[TMP193:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 4, i32 1 +; CHECK-NEXT: [[TMP204:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 3, i32 0 +; CHECK-NEXT: [[TMP207:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 3, i32 1 +; CHECK-NEXT: [[TMP218:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 2, i32 0 +; CHECK-NEXT: [[TMP221:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 2, i32 1 +; CHECK-NEXT: [[TMP232:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP235:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 1 +; CHECK-NEXT: [[TMP246:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP249:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 1 +; CHECK-NEXT: [[UP_MVD252:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0 +; CHECK-NEXT: [[LEFT_MVD253:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP246]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call void @foo(%struct.MV* [[UP_MVD252]], %struct.MV* [[LEFT_MVD253]], i8* [[TMP41]]) [[ATTR0]] +; CHECK-NEXT: ret void +; +entry: + %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] + %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] + %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] + %tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7 ; [#uses=1] + store i8 -1, i8* %tmp20, align 1 + %tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6 ; [#uses=1] + store i8 -1, i8* %tmp23, align 1 + %tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5 ; [#uses=1] + store i8 -1, i8* %tmp26, align 1 + %tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4 ; [#uses=1] + store i8 -1, i8* %tmp29, align 1 + %tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3 ; [#uses=1] + store i8 -1, i8* %tmp32, align 1 + %tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2 ; [#uses=1] + store i8 -1, i8* %tmp35, align 1 + %tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1 ; [#uses=1] + store i8 -1, i8* %tmp38, align 1 + %tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0 ; [#uses=2] + store i8 -1, i8* %tmp41, align 1 + %tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; [#uses=1] + store i16 0, i16* %tmp43, align 2 + %tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; [#uses=1] + store i16 0, i16* %tmp46, align 2 + %tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; [#uses=1] + store i16 0, i16* %tmp57, align 2 + %tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; [#uses=1] + store i16 0, i16* %tmp60, align 2 + %tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; [#uses=1] + store i16 0, i16* %tmp71, align 2 + %tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; [#uses=1] + store i16 0, i16* %tmp74, align 2 + %tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; [#uses=1] + store i16 0, i16* %tmp85, align 2 + %tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; [#uses=1] + store i16 0, i16* %tmp88, align 2 + %tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; [#uses=1] + store i16 0, i16* %tmp99, align 2 + %tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; [#uses=1] + store i16 0, i16* %tmp102, align 2 + %tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; [#uses=1] + store i16 0, i16* %tmp113, align 2 + %tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; [#uses=1] + store i16 0, i16* %tmp116, align 2 + %tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; [#uses=1] + store i16 0, i16* %tmp127, align 2 + %tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; [#uses=1] + store i16 0, i16* %tmp130, align 2 + %tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; [#uses=1] + store i16 0, i16* %tmp141, align 8 + %tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; [#uses=1] + store i16 0, i16* %tmp144, align 2 + %tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; [#uses=1] + store i16 0, i16* %tmp148, align 2 + %tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; [#uses=1] + store i16 0, i16* %tmp151, align 2 + %tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; [#uses=1] + store i16 0, i16* %tmp162, align 2 + %tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; [#uses=1] + store i16 0, i16* %tmp165, align 2 + %tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; [#uses=1] + store i16 0, i16* %tmp176, align 2 + %tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; [#uses=1] + store i16 0, i16* %tmp179, align 2 + %tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; [#uses=1] + store i16 0, i16* %tmp190, align 2 + %tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; [#uses=1] + store i16 0, i16* %tmp193, align 2 + %tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; [#uses=1] + store i16 0, i16* %tmp204, align 2 + %tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; [#uses=1] + store i16 0, i16* %tmp207, align 2 + %tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; [#uses=1] + store i16 0, i16* %tmp218, align 2 + %tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; [#uses=1] + store i16 0, i16* %tmp221, align 2 + %tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; [#uses=1] + store i16 0, i16* %tmp232, align 2 + %tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; [#uses=1] + store i16 0, i16* %tmp235, align 2 + %tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; [#uses=1] + store i16 0, i16* %tmp246, align 8 + %tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; [#uses=1] + store i16 0, i16* %tmp249, align 2 + %up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] + %left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] + call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind + ret void + } declare void @foo(%struct.MV*, %struct.MV*, i8*) @@ -166,6 +234,15 @@ declare void @foo(%struct.MV*, %struct.MV*, i8*) ; Store followed by memset. define void @test3(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 15, i1 false) +; CHECK-NEXT: ret void +; entry: %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 store i32 0, i32* %arrayidx, align 4 @@ -173,28 +250,39 @@ entry: %0 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false) ret void -; CHECK-LABEL: @test3( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false) } ; store followed by memset, different offset scenario define void @test4(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 15, i1 false) +; CHECK-NEXT: ret void +; entry: store i32 0, i32* %P, align 4 %add.ptr = getelementptr inbounds i32, i32* %P, i64 1 %0 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false) ret void -; CHECK-LABEL: @test4( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false) } declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; Memset followed by store. define void @test5(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 15, i1 false) +; CHECK-NEXT: ret void +; entry: %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 %0 = bitcast i32* %add.ptr to i8* @@ -202,13 +290,19 @@ entry: %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 store i32 0, i32* %arrayidx, align 4 ret void -; CHECK-LABEL: @test5( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false) } ;; Memset followed by memset. define void @test6(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 24, i1 false) +; CHECK-NEXT: ret void +; entry: %0 = bitcast i32* %P to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i1 false) @@ -216,13 +310,20 @@ entry: %1 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i1 false) ret void -; CHECK-LABEL: @test6( -; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i1 false) } ; More aggressive heuristic ; rdar://9892684 define void @test7(i32* nocapture %c) nounwind optsize { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[C]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP5]], i8 -1, i64 20, i1 false) +; CHECK-NEXT: ret void +; store i32 -1, i32* %c, align 4 %1 = getelementptr inbounds i32, i32* %c, i32 1 store i32 -1, i32* %1, align 4 @@ -232,26 +333,33 @@ define void @test7(i32* nocapture %c) nounwind optsize { store i32 -1, i32* %3, align 4 %4 = getelementptr inbounds i32, i32* %c, i32 4 store i32 -1, i32* %4, align 4 -; CHECK-LABEL: @test7( -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %5, i8 -1, i64 20, i1 false) ret void } %struct.test8 = type { [4 x i32] } define void @test8() { +; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[STRUCT_TEST8:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.test8* [[MEMTMP]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: ret void +; entry: %memtmp = alloca %struct.test8, align 16 %0 = bitcast %struct.test8* %memtmp to <4 x i32>* store <4 x i32> , <4 x i32>* %0, align 16 ret void -; CHECK-LABEL: @test8( -; CHECK: store <4 x i32> , <4 x i32>* %0, align 16 } @test9buf = internal unnamed_addr global [16 x i64] zeroinitializer, align 16 define void @test9() nounwind { +; CHECK-LABEL: @test9( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i1 false) +; CHECK-NEXT: ret void +; store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2 @@ -269,24 +377,31 @@ define void @test9() nounwind { store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1 ret void -; CHECK-LABEL: @test9( -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i1 false) } ; PR19092 define void @test10(i8* nocapture %P) nounwind { +; CHECK-LABEL: @test10( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 0, i64 42, i1 false) +; CHECK-NEXT: ret void +; tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i1 false) tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i1 false) ret void -; CHECK-LABEL: @test10( -; CHECK-NOT: memset -; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i1 false) -; CHECK-NOT: memset -; CHECK: ret void } ; Memset followed by odd store. define void @test11(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test11( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 0 +; CHECK-NEXT: [[ARRAYIDX_CAST:%.*]] = bitcast i32* [[ARRAYIDX]] to i96* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i96* [[ARRAYIDX_CAST]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 1, i64 23, i1 false) +; CHECK-NEXT: ret void +; entry: %add.ptr = getelementptr inbounds i32, i32* %P, i64 3 %0 = bitcast i32* %add.ptr to i8* @@ -295,20 +410,22 @@ entry: %arrayidx.cast = bitcast i32* %arrayidx to i96* store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4 ret void -; CHECK-LABEL: @test11( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 1, i64 23, i1 false) } ; Alignment should be preserved when there is a store with default align define void @test12(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test12( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP1]], i8 0, i64 15, i1 false) +; CHECK-NEXT: ret void +; entry: store i32 0, i32* %P %add.ptr = getelementptr inbounds i32, i32* %P, i64 1 %0 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i1 false) ret void -; CHECK-LABEL: @test12( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 0, i64 15, i1 false) } diff --git a/llvm/test/Transforms/MemCpyOpt/invariant.start.ll b/llvm/test/Transforms/MemCpyOpt/invariant.start.ll index b7e3160c7da7..1bab2f65799a 100644 --- a/llvm/test/Transforms/MemCpyOpt/invariant.start.ll +++ b/llvm/test/Transforms/MemCpyOpt/invariant.start.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; MemCpy optimizations should take place even in presence of invariant.start ; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck %s @@ -16,30 +17,32 @@ declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be transformed to a memmove. define void @test1(i8* %P, i8* %Q) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[R]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[P]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[R]], i32 32, i1 false) +; CHECK-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P) call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) ret void -; CHECK-LABEL: @test1( -; CHECK-NEXT: %memtmp = alloca %0, align 16 -; CHECK-NEXT: %R = bitcast %0* %memtmp to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) -; CHECK-NEXT: ret void } ; The invariant.start intrinsic does not inhibit tranforming the memcpy to a ; memset. define void @test2(i8* %dst1, i8* %dst2, i8 %c) { -; CHECK-LABEL: define void @test2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst2, i8 %c, i64 128, i1 false) -; CHECK-NEXT: ret void +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[DST1]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST2:%.*]], i8 [[C]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 128, i1 false) diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index ad14bdd6df66..f998a194d688 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -O2 -S | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to @@ -8,10 +9,13 @@ declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 define void @_ZN4CordC2EOS_(i8* nocapture dereferenceable(16) %arg1) { +; CHECK-LABEL: @_ZN4CordC2EOS_( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[ARG1:%.*]], i64 7 +; CHECK-NEXT: store i8 0, i8* [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: ret void +; bb: -; CHECK-LABEL: @_ZN4CordC2EOS_ -; CHECK-NOT: call void @llvm.lifetime.start -; CHECK: ret void %tmp = alloca [8 x i8], align 8 %tmp5 = bitcast [8 x i8]* %tmp to i8* call void @llvm.lifetime.start.p0i8(i64 16, i8* %tmp5) diff --git a/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll index c3f7a1127281..1c61132eb2d2 100644 --- a/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll @@ -35,9 +35,9 @@ define void @test_memcpy(%T* noalias align 8 %a, %T* noalias align 16 %b) { define void @f(%T* %a, %T* %b, %T* %c, %T* %d) { ; CHECK-LABEL: @f( ; CHECK-NEXT: [[VAL:%.*]] = load [[T:%.*]], %T* [[A:%.*]], align 4, !alias.scope !0 -; CHECK-NEXT: store [[T]] { i8 23, i32 23 }, %T* [[B:%.*]], !alias.scope !3 -; CHECK-NEXT: store [[T]] { i8 44, i32 44 }, %T* [[C:%.*]], !alias.scope !6, !noalias !3 -; CHECK-NEXT: store [[T]] %val, %T* [[D:%.*]], !alias.scope !9, !noalias !12 +; CHECK-NEXT: store [[T]] { i8 23, i32 23 }, %T* [[B:%.*]], align 4, !alias.scope !3 +; CHECK-NEXT: store [[T]] { i8 44, i32 44 }, %T* [[C:%.*]], align 4, !alias.scope !6, !noalias !3 +; CHECK-NEXT: store [[T]] [[VAL]], %T* [[D:%.*]], align 4, !alias.scope !9, !noalias !12 ; CHECK-NEXT: ret void ; %val = load %T, %T* %a, !alias.scope !{!10} diff --git a/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll b/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll index 0f8a70a5511d..9b0098a499d9 100644 --- a/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll +++ b/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S < %s -basic-aa -memcpyopt | FileCheck %s ; @@ -6,19 +7,22 @@ target triple = "x86_64-apple-darwin10.0.0" %"class.std::auto_ptr" = type { i32* } -; CHECK-LABEL: @_Z3foov( define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp { +; CHECK-LABEL: @_Z3foov( +; CHECK-NEXT: _ZNSt8auto_ptrIiED1Ev.exit: +; CHECK-NEXT: [[TEMP_LVALUE:%.*]] = alloca %"class.std::auto_ptr", align 8 +; CHECK-NEXT: call void @_Z3barv(%"class.std::auto_ptr"* sret [[AGG_RESULT:%.*]]) +; CHECK-NEXT: [[TMP_I_I:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* [[TEMP_LVALUE]], i64 0, i32 0 +; CHECK-NEXT: [[TMP_I_I4:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* [[AGG_RESULT]], i64 0, i32 0 +; CHECK-NEXT: ret void +; _ZNSt8auto_ptrIiED1Ev.exit: %temp.lvalue = alloca %"class.std::auto_ptr", align 8 -; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result) call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue) %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0 -; CHECK-NOT: load %tmp2.i.i = load i32*, i32** %tmp.i.i, align 8 %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0 -; CHECK-NOT: store store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8 -; CHECK: ret void ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll index 1424ca3709cc..97237a6e68dd 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll @@ -1,89 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind @undef = internal constant i32 undef, align 4 define void @test_undef() nounwind { +; CHECK-LABEL: @test_undef( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 undef, i64 4, i1 false) +; CHECK-NEXT: ret void +; %a = alloca i32, align 4 %i8 = bitcast i32* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (i32* @undef to i8*), i64 4, i1 false) ret void -; CHECK-LABEL: @test_undef( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } @i32x3 = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4 define void @test_i32x3() nounwind { +; CHECK-LABEL: @test_i32x3( +; CHECK-NEXT: [[A:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast [3 x i32]* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 -1, i64 12, i1 false) +; CHECK-NEXT: ret void +; %a = alloca [3 x i32], align 4 %i8 = bitcast [3 x i32]* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3 to i8*), i64 12, i1 false) ret void -; CHECK-LABEL: @test_i32x3( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } @i32x3_undef = internal constant [3 x i32] [i32 -1, i32 undef, i32 -1], align 4 define void @test_i32x3_undef() nounwind { +; CHECK-LABEL: @test_i32x3_undef( +; CHECK-NEXT: [[A:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast [3 x i32]* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 -1, i64 12, i1 false) +; CHECK-NEXT: ret void +; %a = alloca [3 x i32], align 4 %i8 = bitcast [3 x i32]* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3_undef to i8*), i64 12, i1 false) ret void -; CHECK-LABEL: @test_i32x3_undef( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } %struct.bitfield = type { i8, [3 x i8] } @bitfield = private unnamed_addr constant %struct.bitfield { i8 -86, [3 x i8] [i8 -86, i8 -86, i8 -86] }, align 4 define void @test_bitfield() nounwind { +; CHECK-LABEL: @test_bitfield( +; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_BITFIELD:%.*]], align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast %struct.bitfield* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 -86, i64 4, i1 false) +; CHECK-NEXT: ret void +; %a = alloca %struct.bitfield, align 4 %i8 = bitcast %struct.bitfield* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (%struct.bitfield* @bitfield to i8*), i64 4, i1 false) ret void -; CHECK-LABEL: @test_bitfield( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } @i1x16_zero = internal constant <16 x i1> , align 4 define void @test_i1x16_zero() nounwind { +; CHECK-LABEL: @test_i1x16_zero( +; CHECK-NEXT: [[A:%.*]] = alloca <16 x i1>, align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast <16 x i1>* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; %a = alloca <16 x i1>, align 4 %i8 = bitcast <16 x i1>* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_zero to i8*), i64 16, i1 false) ret void -; CHECK-LABEL: @test_i1x16_zero( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } ; i1 isn't currently handled. Should it? @i1x16_one = internal constant <16 x i1> , align 4 define void @test_i1x16_one() nounwind { +; CHECK-LABEL: @test_i1x16_one( +; CHECK-NEXT: [[A:%.*]] = alloca <16 x i1>, align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast <16 x i1>* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[I8]], i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false) +; CHECK-NEXT: ret void +; %a = alloca <16 x i1>, align 4 %i8 = bitcast <16 x i1>* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false) ret void -; CHECK-LABEL: @test_i1x16_one( -; CHECK-NOT: call void @llvm.memset -; CHECK: call void @llvm.memcpy -; CHECK: ret void } @half = internal constant half 0xH0000, align 4 define void @test_half() nounwind { +; CHECK-LABEL: @test_half( +; CHECK-NEXT: [[A:%.*]] = alloca half, align 4 +; CHECK-NEXT: [[I8:%.*]] = bitcast half* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[I8]], i8 0, i64 2, i1 false) +; CHECK-NEXT: ret void +; %a = alloca half, align 4 %i8 = bitcast half* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (half* @half to i8*), i64 2, i1 false) ret void -; CHECK-LABEL: @test_half( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll index 5cdd1a27258c..e1dd9c92d4e0 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -6,6 +7,16 @@ target triple = "x86_64-apple-macosx10.8.0" %struct.foo = type { i8, [7 x i8], i32 } define i32 @test1(%struct.foo* nocapture %foobie) nounwind noinline ssp uwtable { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[BLETCH_SROA_1:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], %struct.foo* [[FOOBIE:%.*]], i64 0, i32 0 +; CHECK-NEXT: store i8 98, i8* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_FOO]], %struct.foo* [[FOOBIE]], i64 0, i32 1, i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[BLETCH_SROA_1]], i64 0, i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FOO]], %struct.foo* [[FOOBIE]], i64 0, i32 2 +; CHECK-NEXT: store i32 20, i32* [[TMP4]], align 4 +; CHECK-NEXT: ret i32 undef +; %bletch.sroa.1 = alloca [7 x i8], align 1 %1 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 0 store i8 98, i8* %1, align 4 @@ -17,28 +28,31 @@ define i32 @test1(%struct.foo* nocapture %foobie) nounwind noinline ssp uwtable ret i32 undef ; Check that the memcpy is removed. -; CHECK-LABEL: @test1( -; CHECK-NOT: call void @llvm.memcpy } define void @test2(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[IN:%.*]]) +; CHECK-NEXT: ret void +; call void @llvm.lifetime.start.p0i8(i64 8, i8* %in) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false) ret void ; Check that the memcpy is removed. -; CHECK-LABEL: @test2( -; CHECK-NOT: call void @llvm.memcpy } define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { +; CHECK-LABEL: @test3( +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[IN:%.*]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[OUT:%.*]], i8* [[IN]], i64 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.lifetime.start.p0i8(i64 4, i8* %in) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i1 false) ret void ; Check that the memcpy is not removed. -; CHECK-LABEL: @test3( -; CHECK: call void @llvm.memcpy } declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 1741da030c2e..54e5e75fd6e2 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck -enable-var-scope %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basic-aa -memcpyopt -dse -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" @@ -7,6 +8,16 @@ target triple = "i686-apple-darwin9" %1 = type { i32, i32 } define void @test1(%0* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = alloca [[TMP0:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = fsub x86_fp80 0xK80000000000000000000, [[Z_1:%.*]] +; CHECK-NEXT: call void @ccoshl(%0* sret [[TMP2]], x86_fp80 [[TMP5]], x86_fp80 [[Z_0:%.*]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[TMP219:%.*]] = bitcast %0* [[TMP2]] to i8* +; CHECK-NEXT: [[AGG_RESULT21:%.*]] = bitcast %0* [[AGG_RESULT:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[AGG_RESULT21]], i8* align 16 [[TMP219]], i32 32, i1 false) +; CHECK-NEXT: ret void +; entry: %tmp2 = alloca %0 %memtmp = alloca %0, align 16 @@ -22,11 +33,6 @@ entry: ; Check that one of the memcpy's are removed. ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here. -; CHECK-LABEL: @test1( -; CHECK: call void @ccoshl -; CHECK: call void @llvm.memcpy -; CHECK-NOT: llvm.memcpy -; CHECK: ret void } declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind @@ -35,29 +41,31 @@ declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be related with a memmove. define void @test2(i8* %P, i8* %Q) nounwind { +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; CHECK-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) ret void -; CHECK-LABEL: @test2( -; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* align 16 %Q, i8* align 16 %P -; CHECK-NEXT: ret void } ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be related with a memcpy. define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind { +; CHECK-LABEL: @test2_memcpy( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; CHECK-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) ret void -; CHECK-LABEL: @test2_memcpy( -; CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* align 16 %Q, i8* align 16 %P -; CHECK-NEXT: ret void } @@ -66,40 +74,47 @@ define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind { @x = external global %0 define void @test3(%0* noalias sret %agg.result) nounwind { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[AGG_RESULT1:%.*]] = bitcast %0* [[AGG_RESULT:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[AGG_RESULT1]], i8* align 16 bitcast (%0* @x to i8*), i32 32, i1 false) +; CHECK-NEXT: ret void +; %x.0 = alloca %0 %x.01 = bitcast %0* %x.0 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %x.01, i8* align 16 bitcast (%0* @x to i8*), i32 32, i1 false) %agg.result2 = bitcast %0* %agg.result to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result2, i8* align 16 %x.01, i32 32, i1 false) ret void -; CHECK-LABEL: @test3( -; CHECK-NEXT: %agg.result1 = bitcast -; CHECK-NEXT: call void @llvm.memcpy -; CHECK-NEXT: ret void } ; PR8644 define void @test4(i8 *%P) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: call void @test4a(i8* byval align 1 [[P:%.*]]) +; CHECK-NEXT: ret void +; %A = alloca %1 %a = bitcast %1* %A to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 %P, i64 8, i1 false) call void @test4a(i8* align 1 byval %a) ret void -; CHECK-LABEL: @test4( -; CHECK-NEXT: call void @test4a( } ; Make sure we don't remove the memcpy if the source address space doesn't match the byval argument define void @test4_addrspace(i8 addrspace(1)* %P) { - %A = alloca %1 - %a = bitcast %1* %A to i8* - call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 %a, i8 addrspace(1)* align 4 %P, i64 8, i1 false) - call void @test4a(i8* align 1 byval %a) - ret void ; CHECK-LABEL: @test4_addrspace( -; CHECK: call void @llvm.memcpy.p0i8.p1i8.i64( -; CHECK-NEXT: call void @test4a( +; CHECK-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 +; CHECK-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 [[A2]], i8 addrspace(1)* align 4 [[P:%.*]], i64 8, i1 false) +; CHECK-NEXT: call void @test4a(i8* byval align 1 [[A2]]) +; CHECK-NEXT: ret void +; + %a1 = alloca %1 + %a2 = bitcast %1* %a1 to i8* + call void @llvm.memcpy.p0i8.p1i8.i64(i8* align 4 %a2, i8 addrspace(1)* align 4 %P, i64 8, i1 false) + call void @test4a(i8* align 1 byval %a2) + ret void } declare void @test4a(i8* align 1 byval) @@ -116,6 +131,16 @@ declare void @test5a(%struct.S* align 16 byval) nounwind ssp ; rdar://8713376 - This memcpy can't be eliminated. define i32 @test5(i32 %x) nounwind ssp { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[Y:%.*]] = alloca [[STRUCT_S:%.*]], align 16 +; CHECK-NEXT: [[TMP:%.*]] = bitcast %struct.S* [[Y]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP]], i8* align 16 bitcast (%struct.S* @sS to i8*), i64 32, i1 false) +; CHECK-NEXT: [[A:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[Y]], i64 0, i32 1, i64 0 +; CHECK-NEXT: store i8 4, i8* [[A]], align 1 +; CHECK-NEXT: call void @test5a(%struct.S* byval align 16 [[Y]]) +; CHECK-NEXT: ret i32 0 +; entry: %y = alloca %struct.S, align 16 %tmp = bitcast %struct.S* %y to i8* @@ -124,17 +149,15 @@ entry: store i8 4, i8* %a call void @test5a(%struct.S* align 16 byval %y) ret i32 0 - ; CHECK-LABEL: @test5( - ; CHECK: store i8 4 - ; CHECK: call void @test5a(%struct.S* byval align 16 %y) } ;; Noop memcpy should be zapped. define void @test6(i8 *%P) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: ret void +; call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %P, i8* align 4 %P, i64 8, i1 false) ret void -; CHECK-LABEL: @test6( -; CHECK-NEXT: ret void } @@ -143,6 +166,11 @@ define void @test6(i8 *%P) { %struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } define i32 @test7(%struct.p* nocapture align 8 byval %q) nounwind ssp { +; CHECK-LABEL: @test7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @g(%struct.p* byval align 8 [[Q:%.*]]) [[ATTR0]] +; CHECK-NEXT: ret i32 [[CALL]] +; entry: %agg.tmp = alloca %struct.p, align 4 %tmp = bitcast %struct.p* %agg.tmp to i8* @@ -150,8 +178,6 @@ entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp, i8* align 4 %tmp1, i64 48, i1 false) %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind ret i32 %call -; CHECK-LABEL: @test7( -; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[$NUW:#[0-9]+]] } declare i32 @g(%struct.p* align 8 byval) @@ -163,8 +189,9 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) n @test8.str = internal constant [7 x i8] c"ABCDEF\00" define void @test8() { -; CHECK: test8 -; CHECK-NOT: memcpy +; CHECK-LABEL: @test8( +; CHECK-NEXT: ret void +; %A = tail call i8* @malloc(i32 10) %B = getelementptr inbounds i8, i8* %A, i64 2 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i1 false) @@ -172,7 +199,6 @@ define void @test8() { %D = getelementptr inbounds i8, i8* %C, i64 2 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i1 false) ret void -; CHECK: ret void } declare noalias i8* @malloc(i32) @@ -181,11 +207,14 @@ declare noalias i8* @malloc(i32) %struct.big = type { [50 x i32] } define void @test9_addrspacecast() nounwind ssp uwtable { -entry: ; CHECK-LABEL: @test9_addrspacecast( -; CHECK: f1 -; CHECK-NOT: memcpy -; CHECK: f2 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4 +; CHECK-NEXT: call void @f1(%struct.big* sret [[B]]) +; CHECK-NEXT: call void @f2(%struct.big* [[B]]) +; CHECK-NEXT: ret void +; +entry: %b = alloca %struct.big, align 4 %tmp = alloca %struct.big, align 4 call void @f1(%struct.big* sret %tmp) @@ -197,11 +226,14 @@ entry: } define void @test9() nounwind ssp uwtable { +; CHECK-LABEL: @test9( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4 +; CHECK-NEXT: call void @f1(%struct.big* sret [[B]]) +; CHECK-NEXT: call void @f2(%struct.big* [[B]]) +; CHECK-NEXT: ret void +; entry: -; CHECK: test9 -; CHECK: f1 -; CHECK-NOT: memcpy -; CHECK: f2 %b = alloca %struct.big, align 4 %tmp = alloca %struct.big, align 4 call void @f1(%struct.big* sret %tmp) @@ -220,6 +252,15 @@ entry: declare void @foo(i32* noalias nocapture) define void @test10(%opaque* noalias nocapture sret %x, i32 %y) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[Y:%.*]], i32* [[A]], align 4 +; CHECK-NEXT: call void @foo(i32* noalias nocapture [[A]]) +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[D:%.*]] = bitcast %opaque* [[X:%.*]] to i32* +; CHECK-NEXT: store i32 [[C]], i32* [[D]], align 4 +; CHECK-NEXT: ret void +; %a = alloca i32, align 4 store i32 %y, i32* %a call void @foo(i32* noalias nocapture %a) @@ -231,14 +272,17 @@ define void @test10(%opaque* noalias nocapture sret %x, i32 %y) { ; don't create new addressspacecasts when we don't know they're safe for the target define void @test11([20 x i32] addrspace(1)* nocapture dereferenceable(80) %P) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[B:%.*]] = bitcast [20 x i32] addrspace(1)* [[P:%.*]] to i8 addrspace(1)* +; CHECK-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 [[B]], i8 0, i64 80, i1 false) +; CHECK-NEXT: ret void +; %A = alloca [20 x i32], align 4 %a = bitcast [20 x i32]* %A to i8* %b = bitcast [20 x i32] addrspace(1)* %P to i8 addrspace(1)* call void @llvm.memset.p0i8.i64(i8* align 4 %a, i8 0, i64 80, i1 false) call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* align 4 %b, i8* align 4 %a, i64 80, i1 false) ret void -; CHECK-LABEL: @test11( -; CHECK-NOT: addrspacecast } declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind @@ -247,7 +291,7 @@ declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture, i8* nocaptur declare void @f1(%struct.big* nocapture sret) declare void @f2(%struct.big*) -; CHECK: attributes [[$NUW]] = { nounwind } +; CHECK: attributes [[ATTR0]] = { nounwind } ; CHECK: attributes #1 = { argmemonly nounwind willreturn } ; CHECK: attributes #2 = { nounwind ssp } ; CHECK: attributes #3 = { nounwind ssp uwtable } diff --git a/llvm/test/Transforms/MemCpyOpt/memmove.ll b/llvm/test/Transforms/MemCpyOpt/memmove.ll index d152cfb63f2b..4a75cfe6a046 100644 --- a/llvm/test/Transforms/MemCpyOpt/memmove.ll +++ b/llvm/test/Transforms/MemCpyOpt/memmove.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s ; These memmoves should get optimized to memcpys. @@ -7,9 +8,15 @@ target triple = "x86_64-apple-darwin9.0" declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define i8* @test1(i8* nocapture %src) nounwind { -entry: ; CHECK-LABEL: @test1( -; CHECK: call void @llvm.memcpy +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MALLOCCALL:%.*]] = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32)) +; CHECK-NEXT: [[CALL3:%.*]] = bitcast i8* [[MALLOCCALL]] to [13 x i8]* +; CHECK-NEXT: [[CALL3_SUB:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* [[CALL3]], i64 0, i64 0 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL3_SUB]], i8* [[SRC:%.*]], i64 13, i1 false) +; CHECK-NEXT: ret i8* [[CALL3_SUB]] +; +entry: %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32)) %call3 = bitcast i8* %malloccall to [13 x i8]* @@ -21,9 +28,13 @@ declare noalias i8* @malloc(i32) define void @test2(i8* %P) nounwind { -entry: ; CHECK-LABEL: @test2( -; CHECK: call void @llvm.memcpy +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 16 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[ADD_PTR]], i64 16, i1 false) +; CHECK-NEXT: ret void +; +entry: %add.ptr = getelementptr i8, i8* %P, i64 16 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i1 false) ret void @@ -31,9 +42,13 @@ entry: ; This cannot be optimize because the src/dst really do overlap. define void @test3(i8* %P) nounwind { -entry: ; CHECK-LABEL: @test3( -; CHECK: call void @llvm.memmove +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 16 +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[ADD_PTR]], i64 17, i1 false) +; CHECK-NEXT: ret void +; +entry: %add.ptr = getelementptr i8, i8* %P, i64 16 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i1 false) ret void diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll index 7ee0682ed229..52ac35ba5da5 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll @@ -130,7 +130,7 @@ define void @test_write_between(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) -; CHECK-NEXT: store i8 -1, i8* [[B]] +; CHECK-NEXT: store i8 -1, i8* [[B]], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) ; CHECK-NEXT: ret void ; @@ -148,7 +148,7 @@ define void @test_write_before_memset_in_memset_region(i8* %result) { ; CHECK-LABEL: @test_write_before_memset_in_memset_region( ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* -; CHECK-NEXT: store i8 -1, i8* [[B]] +; CHECK-NEXT: store i8 -1, i8* [[B]], align 1 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) ; CHECK-NEXT: ret void @@ -168,7 +168,7 @@ define void @test_write_before_memset_in_memcpy_region(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 2 -; CHECK-NEXT: store i32 -1, i32* [[C]] +; CHECK-NEXT: store i32 -1, i32* [[C]], align 4 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 8, i1 false) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) ; CHECK-NEXT: ret void @@ -189,7 +189,7 @@ define void @test_write_before_memset_in_both_regions(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[T]], %T* [[A]], i64 0, i32 1 -; CHECK-NEXT: store i32 -1, i32* [[C]] +; CHECK-NEXT: store i32 -1, i32* [[C]], align 4 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 10, i1 false) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll index 651ac3194a15..758a093a3b65 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll @@ -1,126 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -memcpyopt -S %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -; CHECK-LABEL: define void @test -; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size, i8 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 [[C:%.*]], i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i32_i64 -; CHECK: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i64 -; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 [[DSTSIZE]], %src_size -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 [[DSTSIZE]], %src_size -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i32_i64(i8* %dst, i8* %src, i32 %dst_size, i64 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i32_i64( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[DST_SIZE:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[TMP1]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP5]], i8 [[C:%.*]], i64 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i128_i32 -; CHECK: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i128 -; CHECK: [[ULE:%[0-9]+]] = icmp ule i128 %dst_size, [[SRCSIZE]] -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i128 %dst_size, [[SRCSIZE]] -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i128 [[SRCSIZE]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i128_i32(i8* %dst, i8* %src, i128 %dst_size, i32 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i128_i32( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[SRC_SIZE:%.*]] to i128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 [[DST_SIZE:%.*]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i128 [[DST_SIZE]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i128 0, i128 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[DST:%.*]], i128 [[TMP1]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[TMP5]], i8 [[C:%.*]], i128 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DST]], i8* [[SRC:%.*]], i32 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i128(i8* %dst, i8 %c, i128 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i32_i128 -; CHECK: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i128 -; CHECK: [[ULE:%[0-9]+]] = icmp ule i128 [[DSTSIZE]], %src_size -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i128 [[DSTSIZE]], %src_size -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i128 %src_size -; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[DST]], i8 %c, i128 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i32_i128(i8* %dst, i8* %src, i32 %dst_size, i128 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i32_i128( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[DST_SIZE:%.*]] to i128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 [[TMP1]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i128 [[TMP1]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i128 0, i128 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[DST:%.*]], i128 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* align 1 [[TMP5]], i8 [[C:%.*]], i128 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* [[DST]], i8* [[SRC:%.*]], i128 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i64_i32 -; CHECK: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i64 -; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, [[SRCSIZE]] -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, [[SRCSIZE]] -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 [[SRCSIZE]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i64_i32(i8* %dst, i8* %src, i64 %dst_size, i32 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i64_i32( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[SRC_SIZE:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[DST_SIZE]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 [[TMP1]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP5]], i8 [[C:%.*]], i64 [[TMP4]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DST]], i8* [[SRC:%.*]], i32 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_align_same -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false) define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_same( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], 80 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], 80 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 80 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 80, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i1 false) ret void } -; CHECK-LABEL: define void @test_align_min -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 {{.*}}, i1 false) define void @test_align_min(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_min( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], 36 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], 36 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 36 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP4]], i8 0, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 36, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* align 8 %dst, i8 0, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i1 false) ret void } -; CHECK-LABEL: define void @test_align_memcpy -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 {{.*}}, i8 0, i64 {{.*}}, i1 false) define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_memcpy( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], 80 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], 80 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST:%.*]], i64 80 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[SRC:%.*]], i64 80, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 80, i1 false) ret void } -; CHECK-LABEL: define void @test_non_i8_dst_type -; CHECK-NEXT: %dst = bitcast i64* %dst_pi64 to i8* -; CHECK: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size -; CHECK: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size -; CHECK: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST]], i8 %c, i64 [[SIZE]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_non_i8_dst_type(i8* %src, i64 %src_size, i64* %dst_pi64, i64 %dst_size, i8 %c) { +; CHECK-LABEL: @test_non_i8_dst_type( +; CHECK-NEXT: [[DST:%.*]] = bitcast i64* [[DST_PI64:%.*]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST]], i64 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 [[C:%.*]], i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; %dst = bitcast i64* %dst_pi64 to i8* call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i1 false) ret void } -; CHECK-LABEL: define void @test_different_dst -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i1 false) -; CHECK-NEXT: ret void define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_different_dst( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST:%.*]], i8 0, i64 [[DST_SIZE:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[SRC:%.*]], i64 [[SRC_SIZE:%.*]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i1 false) ret void @@ -128,12 +157,13 @@ define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i6 ; Make sure we also take into account dependencies on the destination. -; CHECK-LABEL: define i8 @test_intermediate_read -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i1 false) -; CHECK-NEXT: %r = load i8, i8* %a -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i1 false) -; CHECK-NEXT: ret i8 %r define i8 @test_intermediate_read(i8* %a, i8* %b) #0 { +; CHECK-LABEL: @test_intermediate_read( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A:%.*]], i8 0, i64 64, i1 false) +; CHECK-NEXT: [[R:%.*]] = load i8, i8* [[A]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A]], i8* [[B:%.*]], i64 24, i1 false) +; CHECK-NEXT: ret i8 [[R]] +; call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i1 false) %r = load i8, i8* %a call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i1 false) @@ -142,15 +172,16 @@ define i8 @test_intermediate_read(i8* %a, i8* %b) #0 { %struct = type { [8 x i8], [8 x i8] } -; CHECK-LABEL: define void @test_intermediate_write -; CHECK-NEXT: %a = alloca %struct -; CHECK-NEXT: %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0 -; CHECK-NEXT: %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i1 false) -; CHECK-NEXT: store i8 1, i8* %a1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i1 false) -; CHECK-NEXT: ret void define void @test_intermediate_write(i8* %b) #0 { +; CHECK-LABEL: @test_intermediate_write( +; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT:%.*]], align 8 +; CHECK-NEXT: [[A0:%.*]] = getelementptr [[STRUCT]], %struct* [[A]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[A1:%.*]] = getelementptr [[STRUCT]], %struct* [[A]], i32 0, i32 1, i32 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A0]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store i8 1, i8* [[A1]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A0]], i8* [[B:%.*]], i64 8, i1 false) +; CHECK-NEXT: ret void +; %a = alloca %struct %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0 %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0 diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll index e36389a128f9..8867c4f810b2 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll @@ -1,73 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -; CHECK-LABEL: define void @test( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst2, i8 %c, i64 128, i1 false) -; CHECK-NEXT: ret void define void @test(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST2:%.*]], i8 [[C]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 128, i1 false) ret void } -; CHECK-LABEL: define void @test_smaller_memcpy( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false) -; CHECK-NEXT: ret void define void @test_smaller_memcpy(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_smaller_memcpy( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST2:%.*]], i8 [[C]], i64 100, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false) ret void } -; CHECK-LABEL: define void @test_smaller_memset( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) -; CHECK-NEXT: ret void define void @test_smaller_memset(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_smaller_memset( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 100, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[DST1]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) ret void } -; CHECK-LABEL: define void @test_align_memset( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i1 false) -; CHECK-NEXT: ret void define void @test_align_memset(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_align_memset( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST2:%.*]], i8 [[C]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i1 false) -; CHECK-NEXT: ret void define void @test_different_types(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_types( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[DST2:%.*]], i8 [[C]], i32 100, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 %dst1, i8 %c, i32 128, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_2(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_types_2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 8 [[DST1:%.*]], i8 [[C:%.*]], i32 128, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST2:%.*]], i8 [[C]], i64 100, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* align 8 %dst1, i8 %c, i32 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i1 false) ret void } -; CHECK-LABEL: define void @test_different_source_gep( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: %p = getelementptr i8, i8* %dst1, i64 64 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i1 false) -; CHECK-NEXT: ret void define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_source_gep( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: [[P:%.*]] = getelementptr i8, i8* [[DST1]], i64 64 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[P]], i64 64, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) ; FIXME: We could optimize this as well. %p = getelementptr i8, i8* %dst1, i64 64 @@ -75,21 +83,23 @@ define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) { ret void } -; CHECK-LABEL: define void @test_variable_size_1( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) -; CHECK-NEXT: ret void define void @test_variable_size_1(i8* %dst1, i64 %dst1_size, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_variable_size_1( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 [[DST1_SIZE:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[DST1]], i64 128, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i1 false) ret void } -; CHECK-LABEL: define void @test_variable_size_2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false) -; CHECK-NEXT: ret void define void @test_variable_size_2(i8* %dst1, i8* %dst2, i64 %dst2_size, i8 %c) { +; CHECK-LABEL: @test_variable_size_2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST1:%.*]], i8 [[C:%.*]], i64 128, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST2:%.*]], i8* [[DST1]], i64 [[DST2_SIZE:%.*]], i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i1 false) ret void diff --git a/llvm/test/Transforms/MemCpyOpt/nontemporal.ll b/llvm/test/Transforms/MemCpyOpt/nontemporal.ll index d9dafcc7b816..a67aa8cf0007 100644 --- a/llvm/test/Transforms/MemCpyOpt/nontemporal.ll +++ b/llvm/test/Transforms/MemCpyOpt/nontemporal.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -5,16 +6,25 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; Verify that we don't combine nontemporal stores into memset calls. define void @nontemporal_stores_1(<4 x float>* nocapture %dst) { -; CHECK-LABEL: @nontemporal_stores_1 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0 -; CHECK-NEXT: ret void +; CHECK-LABEL: @nontemporal_stores_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[DST:%.*]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 1 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 2 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR2]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 3 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR3]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 4 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR4]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR5:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 5 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR5]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 6 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR6]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR7:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 7 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR7]], align 16, !nontemporal !0 +; CHECK-NEXT: ret void +; entry: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 @@ -35,10 +45,13 @@ entry: } define void @nontemporal_stores_2(<4 x float>* nocapture %dst) { -; CHECK-LABEL: @nontemporal_stores_2 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0 -; CHECK-NEXT: ret void +; CHECK-LABEL: @nontemporal_stores_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[DST:%.*]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[DST]], i64 1 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 +; CHECK-NEXT: ret void +; entry: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 diff --git a/llvm/test/Transforms/MemCpyOpt/pr29105.ll b/llvm/test/Transforms/MemCpyOpt/pr29105.ll index e9e9b611aef2..e83508606e55 100644 --- a/llvm/test/Transforms/MemCpyOpt/pr29105.ll +++ b/llvm/test/Transforms/MemCpyOpt/pr29105.ll @@ -1,10 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -instcombine -S %s | FileCheck %s %Foo = type { [2048 x i64] } ; Make sure that all mempcy calls are converted to memset calls, or removed. -; CHECK-LABEL: @baz( -; CHECK-NOT: call void @llvm.memcpy define void @baz() unnamed_addr #0 { +; CHECK-LABEL: @baz( +; CHECK-NEXT: entry-block: +; CHECK-NEXT: [[TMP2:%.*]] = alloca [[FOO:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %Foo* [[TMP2]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16384, i8* nonnull [[TMP0]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(16384) [[TMP0]], i8 0, i64 16384, i1 false) +; CHECK-NEXT: call void @bar(%Foo* noalias nocapture nonnull dereferenceable(16384) [[TMP2]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16384, i8* nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; entry-block: %x.sroa.0 = alloca [2048 x i64], align 8 %tmp0 = alloca [2048 x i64], align 8 diff --git a/llvm/test/Transforms/MemCpyOpt/pr37967.ll b/llvm/test/Transforms/MemCpyOpt/pr37967.ll index 6b6a40873273..8a4e88881d0c 100644 --- a/llvm/test/Transforms/MemCpyOpt/pr37967.ll +++ b/llvm/test/Transforms/MemCpyOpt/pr37967.ll @@ -1,16 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -debugify -memcpyopt -check-debugify -S < %s 2>&1 | FileCheck %s ; CHECK: CheckModuleDebugify: PASS -; CHECK-LABEL: define {{.*}} @_Z3bar3Foo -; CHECK: [[target:%.*]] = load i8*, i8** bitcast (%struct.Foo** @a to i8**), align 8, !dbg -; CHECK: %tmpcast = bitcast i8* [[target]] to %struct.Foo*, !dbg - %struct.Foo = type { i64, i64, i64 } @a = dso_local global %struct.Foo* null, align 8 define dso_local void @_Z3bar3Foo(%struct.Foo* byval(%struct.Foo) align 8 %0) { +; CHECK-LABEL: @_Z3bar3Foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8, [[DBG13:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.Foo* [[AGG_TMP]], [[META9:metadata !.*]], metadata !DIExpression()), [[DBG13]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** bitcast (%struct.Foo** @a to i8**), align 8, [[DBG14:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[TMP1]], [[META11:metadata !.*]], metadata !DIExpression()), [[DBG14]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.Foo* [[AGG_TMP]] to i8*, [[DBG15:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[TMP2]], [[META12:metadata !.*]], metadata !DIExpression()), [[DBG15]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(24) [[TMP2]], i8* nonnull align 8 dereferenceable(24) [[TMP1]], i64 24, i1 false), [[DBG16:!dbg !.*]] +; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast i8* [[TMP1]] to %struct.Foo*, [[DBG16]] +; CHECK-NEXT: call void @_Z3bar3Foo(%struct.Foo* nonnull byval(%struct.Foo) align 8 [[TMPCAST]]), [[DBG17:!dbg !.*]] +; CHECK-NEXT: ret void, [[DBG18:!dbg !.*]] +; entry: %agg.tmp = alloca %struct.Foo, align 8 %1 = load i8*, i8** bitcast (%struct.Foo** @a to i8**), align 8 diff --git a/llvm/test/Transforms/MemCpyOpt/process_store.ll b/llvm/test/Transforms/MemCpyOpt/process_store.ll index e2edef0a94f7..7b647e556b56 100644 --- a/llvm/test/Transforms/MemCpyOpt/process_store.ll +++ b/llvm/test/Transforms/MemCpyOpt/process_store.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -memcpyopt -disable-output +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -memcpyopt | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -10,6 +11,17 @@ declare dso_local i32 @f1() ; Do not crash due to store first in BB. define dso_local void @f2() { +; CHECK-LABEL: @f2( +; CHECK-NEXT: for.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* @a, align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @f1() +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1]] = load i32, i32* @b, align 4 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; for.end: %0 = load i32, i32* @b, align 4 ret void @@ -24,6 +36,19 @@ for.body: ; Do not crash due to call not before store in BB. define dso_local void @f3() { +; CHECK-LABEL: @f3( +; CHECK-NEXT: for.end: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[T:%.*]] = add i32 [[T2:%.*]], 1 +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* @a, align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @f1() +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[T2]] = xor i32 [[T]], 5 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; for.end: %0 = load i32, i32* @b, align 4 ret void diff --git a/llvm/test/Transforms/MemCpyOpt/profitable-memset.ll b/llvm/test/Transforms/MemCpyOpt/profitable-memset.ll index 649d2386f960..c45ccb9c9aba 100644 --- a/llvm/test/Transforms/MemCpyOpt/profitable-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/profitable-memset.ll @@ -1,12 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -; CHECK-LABEL: @foo( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %2, i8 0, i64 8, i1 false) - define void @foo(i64* nocapture %P) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[P:%.*]] to i16* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[ARRAYIDX]] to i32* +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 2 [[TMP2]], i8 0, i64 8, i1 false) +; CHECK-NEXT: ret void +; entry: %0 = bitcast i64* %P to i16* %arrayidx = getelementptr inbounds i16, i16* %0, i64 1 diff --git a/llvm/test/Transforms/MemCpyOpt/smaller.ll b/llvm/test/Transforms/MemCpyOpt/smaller.ll index 0c82b5201dca..1aed83fdb4d6 100644 --- a/llvm/test/Transforms/MemCpyOpt/smaller.ll +++ b/llvm/test/Transforms/MemCpyOpt/smaller.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s ; RUN: opt -passes=memcpyopt -S < %s | FileCheck %s ; rdar://8875553 @@ -5,8 +6,6 @@ ; Memcpyopt shouldn't optimize the second memcpy using the first ; because the first has a smaller size. -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp, i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false) - target datalayout = "e-p:32:32:32" %struct.s = type { [11 x i8], i32 } @@ -18,6 +17,16 @@ declare void @check(%struct.s* byval %p) nounwind declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @foo() nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; CHECK-NEXT: store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* align 1 getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i1 false) +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[AGG_TMP]], i32 0, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP]], i8* align 4 getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i1 false) +; CHECK-NEXT: call void @check(%struct.s* byval [[AGG_TMP]]) +; CHECK-NEXT: ret void +; entry: %agg.tmp = alloca %struct.s, align 4 store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4 diff --git a/llvm/test/Transforms/MemCpyOpt/sret.ll b/llvm/test/Transforms/MemCpyOpt/sret.ll index af625127f56b..f5ffbeaf239f 100644 --- a/llvm/test/Transforms/MemCpyOpt/sret.ll +++ b/llvm/test/Transforms/MemCpyOpt/sret.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basic-aa -memcpyopt -S | not grep "call.*memcpy" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -basic-aa -memcpyopt -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" @@ -6,6 +7,24 @@ target triple = "i686-apple-darwin9" %0 = type { x86_fp80, x86_fp80 } define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind { +; CHECK-LABEL: @ccosl( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IZ:%.*]] = alloca [[TMP0:%.*]], align 16 +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[TMP0]], %0* [[Z:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = load x86_fp80, x86_fp80* [[TMP1]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fsub x86_fp80 0xK80000000000000000000, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[TMP0]], %0* [[IZ]], i32 0, i32 1 +; CHECK-NEXT: [[REAL:%.*]] = getelementptr [[TMP0]], %0* [[IZ]], i32 0, i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [[TMP0]], %0* [[Z]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = load x86_fp80, x86_fp80* [[TMP7]], align 16 +; CHECK-NEXT: store x86_fp80 [[TMP3]], x86_fp80* [[REAL]], align 16 +; CHECK-NEXT: store x86_fp80 [[TMP8]], x86_fp80* [[TMP4]], align 16 +; CHECK-NEXT: call void @ccoshl(%0* noalias sret [[AGG_RESULT:%.*]], %0* byval align 8 [[IZ]]) [[ATTR0:#.*]] +; CHECK-NEXT: [[MEMTMP14:%.*]] = bitcast %0* [[MEMTMP]] to i8* +; CHECK-NEXT: [[AGG_RESULT15:%.*]] = bitcast %0* [[AGG_RESULT]] to i8* +; CHECK-NEXT: ret void +; entry: %iz = alloca %0 %memtmp = alloca %0, align 16 diff --git a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll index 4bead3381ccd..6f7a7c898dd9 100644 --- a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll +++ b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s | FileCheck %s ; PR40118: BasicAA didn't realize that stackrestore ends the lifetime of @@ -14,6 +15,20 @@ target triple = "i686-unknown-windows-msvc19.14.26433" ; a call to @external. define i32 @test_norestore(i32 %n) { +; CHECK-LABEL: @test_norestore( +; CHECK-NEXT: [[TMPMEM:%.*]] = alloca [10 x i8], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[TMPMEM]], i32 0, i32 0 +; CHECK-NEXT: [[P:%.*]] = alloca i8, i32 [[N:%.*]], align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[P]], i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) +; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 9 +; CHECK-NEXT: store i8 0, i8* [[P10]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* [[P]], i32 10, i1 false) +; CHECK-NEXT: call void @external() +; CHECK-NEXT: [[HEAP:%.*]] = call i8* @malloc(i32 9) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[HEAP]], i8* [[P]], i32 9, i1 false) +; CHECK-NEXT: call void @useit(i8* [[HEAP]]) +; CHECK-NEXT: ret i32 0 +; %tmpmem = alloca [10 x i8], align 4 %tmp = getelementptr inbounds [10 x i8], [10 x i8]* %tmpmem, i32 0, i32 0 @@ -33,15 +48,25 @@ define i32 @test_norestore(i32 %n) { ret i32 0 } -; CHECK-LABEL: define i32 @test_norestore(i32 %n) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %p, i32 9, i1 false) - - ; Do not propagate memcpy from %p across the stackrestore. define i32 @test_stackrestore() { +; CHECK-LABEL: @test_stackrestore( +; CHECK-NEXT: [[TMPMEM:%.*]] = alloca [10 x i8], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[TMPMEM]], i32 0, i32 0 +; CHECK-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; CHECK-NEXT: [[ARGMEM:%.*]] = alloca inalloca [10 x i8], align 4 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[ARGMEM]], i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[P]], i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) +; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[ARGMEM]], i32 0, i32 9 +; CHECK-NEXT: store i8 0, i8* [[P10]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* [[P]], i32 10, i1 false) +; CHECK-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; CHECK-NEXT: [[HEAP:%.*]] = call i8* @malloc(i32 9) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[HEAP]], i8* [[TMP]], i32 9, i1 false) +; CHECK-NEXT: call void @useit(i8* [[HEAP]]) +; CHECK-NEXT: ret i32 0 +; %tmpmem = alloca [10 x i8], align 4 %tmp = getelementptr inbounds [10 x i8], [10 x i8]* %tmpmem, i32 0, i32 0 %inalloca.save = tail call i8* @llvm.stacksave() @@ -61,11 +86,6 @@ define i32 @test_stackrestore() { ret i32 0 } -; CHECK-LABEL: define i32 @test_stackrestore() -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %p, i32 10, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %heap, i8* %tmp, i32 9, i1 false) - declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) declare i8* @llvm.stacksave() declare void @llvm.stackrestore(i8*) diff --git a/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll b/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll index f75b63edef35..81d3da0966d9 100644 --- a/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll +++ b/llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll @@ -65,7 +65,7 @@ define void @vector_fixed_length_nonzero(<16 x i8>* %p) { define void @vector_scalable_nonzero(* %p) { ; CHECK-LABEL: @vector_scalable_nonzero( -; CHECK-NEXT: store zeroinitializer, * [[P:%.*]] +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]], align 16 ; CHECK-NEXT: ret void ; store zeroinitializer, * %p diff --git a/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll b/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll index 59ed892b60ee..51651e73e2bc 100644 --- a/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/store-to-memset.ll @@ -1,8 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" define i8* @foo(i8* returned %0, i32 %1, i64 %2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP0:%.*]], i64 [[TMP2:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 -32 +; CHECK-NEXT: [[VV:%.*]] = trunc i32 [[TMP1:%.*]] to i8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 5 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 6 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 7 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 9 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 10 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 11 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 12 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 13 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 14 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 15 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 16 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 1 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 2 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 3 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 5 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 6 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 7 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 9 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 10 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 11 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 12 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 13 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 14 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 15 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP4]], i8 [[VV]], i64 32, i1 false) +; CHECK-NEXT: ret i8* [[TMP0]] +; entry: %3 = getelementptr inbounds i8, i8* %0, i64 %2 %4 = getelementptr inbounds i8, i8* %3, i64 -32 @@ -71,7 +111,5 @@ entry: %35 = getelementptr inbounds i8, i8* %20, i64 15 store i8 %vv, i8* %35, align 1 ret i8* %0 -; CHECK-LABEL: @foo -; CHECK: call void @llvm.memset.p0i8.i64(i8* align 1 %4, i8 %vv, i64 32, i1 false) } diff --git a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll index 256bd8518dc1..952a57796f87 100644 --- a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll @@ -9,9 +9,9 @@ define void @foo(i8* %p) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to * ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr , * [[A]], i64 0 -; CHECK-NEXT: store zeroinitializer, * [[TMP0]] +; CHECK-NEXT: store zeroinitializer, * [[TMP0]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr , * [[A]], i64 1 -; CHECK-NEXT: store zeroinitializer, * [[TMP1]] +; CHECK-NEXT: store zeroinitializer, * [[TMP1]], align 16 ; CHECK-NEXT: ret void ; %a = bitcast i8* %p to *