From bb6f4d32aac3eecb51909f4facc625219307ee68 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 19 Sep 2020 21:13:35 +0300 Subject: [PATCH] [NFC][PhaseOrdering] Add test showing SROA not being performed after loop unrolling --- .../X86/SROA-after-loop-unrolling.ll | 234 ++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll new file mode 100644 index 000000000000..5b289ee586bc --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-loop-unrolling.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O3 -S | FileCheck %s --check-prefixes=CHECK,OLDPM +; RUN: opt < %s -passes='default' -aa-pipeline=default -S | FileCheck %s --check-prefixes=CHECK,NEWPM + +; This is based on the following most basic C++ code: +; +; #include +; void use(int); +; void foo(int cnt) { +; std::array arr; +; for(int& elt : arr) +; elt = ++cnt; +; for(int& elt : arr) +; use(elt); +; } +; +; Not only should the loops be unrolled, no alloca's should be left there. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%"struct.std::array" = type { [6 x i32] } + +define dso_local void @_Z3fooi(i32 %cnt) { +; OLDPM-LABEL: @_Z3fooi( +; OLDPM-NEXT: entry: +; OLDPM-NEXT: [[ARR:%.*]] = alloca %"struct.std::array", align 16 +; OLDPM-NEXT: [[TMP0:%.*]] = bitcast %"struct.std::array"* [[ARR]] to i8* +; OLDPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[TMP0]]) +; OLDPM-NEXT: [[ARRAYDECAY_I_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 0 +; OLDPM-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 1 +; OLDPM-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 2 +; OLDPM-NEXT: [[INCDEC_PTR_2:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 3 +; OLDPM-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[CNT:%.*]], i32 0 +; OLDPM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; OLDPM-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], +; OLDPM-NEXT: [[TMP4:%.*]] = bitcast %"struct.std::array"* [[ARR]] to <4 x i32>* +; OLDPM-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16 +; OLDPM-NEXT: [[INCDEC_PTR_3:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* [[ARR]], i64 0, i32 0, i64 4 +; OLDPM-NEXT: [[INC_4:%.*]] = add nsw i32 [[CNT]], 5 +; OLDPM-NEXT: store i32 [[INC_4]], i32* [[INCDEC_PTR_3]], align 16 +; OLDPM-NEXT: [[INC_5:%.*]] = add nsw i32 [[CNT]], 6 +; OLDPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYDECAY_I_I_I]], align 16 +; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP5]]) +; OLDPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 +; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP6]]) +; OLDPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[INCDEC_PTR_1]], align 8 +; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP7]]) +; OLDPM-NEXT: [[TMP8:%.*]] = load i32, i32* [[INCDEC_PTR_2]], align 4 +; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP8]]) +; OLDPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[INCDEC_PTR_3]], align 16 +; OLDPM-NEXT: call void @_Z3usei(i32 [[TMP9]]) +; OLDPM-NEXT: call void @_Z3usei(i32 [[INC_5]]) +; OLDPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[TMP0]]) +; OLDPM-NEXT: ret void +; +; NEWPM-LABEL: @_Z3fooi( +; NEWPM-NEXT: entry: +; NEWPM-NEXT: [[INC:%.*]] = add nsw i32 [[CNT:%.*]], 1 +; NEWPM-NEXT: [[INC_1:%.*]] = add nsw i32 [[CNT]], 2 +; NEWPM-NEXT: [[INC_2:%.*]] = add nsw i32 [[CNT]], 3 +; NEWPM-NEXT: [[INC_3:%.*]] = add nsw i32 [[CNT]], 4 +; NEWPM-NEXT: [[INC_4:%.*]] = add nsw i32 [[CNT]], 5 +; NEWPM-NEXT: [[INC_5:%.*]] = add nsw i32 [[CNT]], 6 +; NEWPM-NEXT: call void @_Z3usei(i32 [[INC]]) +; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_1]]) +; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_2]]) +; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_3]]) +; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_4]]) +; NEWPM-NEXT: call void @_Z3usei(i32 [[INC_5]]) +; NEWPM-NEXT: ret void +; +entry: + %cnt.addr = alloca i32 + %arr = alloca %"struct.std::array" + %__range1 = alloca %"struct.std::array"* + %__begin1 = alloca i32* + %__end1 = alloca i32* + %elt = alloca i32* + %__range12 = alloca %"struct.std::array"* + %__begin13 = alloca i32* + %__end15 = alloca i32* + %elt11 = alloca i32* + store i32 %cnt, i32* %cnt.addr + %0 = bitcast %"struct.std::array"* %arr to i8* + call void @llvm.lifetime.start.p0i8(i64 24, i8* %0) + %1 = bitcast %"struct.std::array"** %__range1 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %1) + store %"struct.std::array"* %arr, %"struct.std::array"** %__range1 + %2 = bitcast i32** %__begin1 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %2) + %3 = load %"struct.std::array"*, %"struct.std::array"** %__range1 + %call = call i32* @_ZNSt5arrayIiLm6EE5beginEv(%"struct.std::array"* %3) + store i32* %call, i32** %__begin1 + %4 = bitcast i32** %__end1 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %4) + %5 = load %"struct.std::array"*, %"struct.std::array"** %__range1 + %call1 = call i32* @_ZNSt5arrayIiLm6EE3endEv(%"struct.std::array"* %5) + store i32* %call1, i32** %__end1 + br label %for.cond + +for.cond: + %6 = load i32*, i32** %__begin1 + %7 = load i32*, i32** %__end1 + %cmp = icmp ne i32* %6, %7 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + %8 = bitcast i32** %__end1 to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %8) + %9 = bitcast i32** %__begin1 to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %9) + %10 = bitcast %"struct.std::array"** %__range1 to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %10) + br label %for.end + +for.body: + %11 = bitcast i32** %elt to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %11) + %12 = load i32*, i32** %__begin1 + store i32* %12, i32** %elt + %13 = load i32, i32* %cnt.addr + %inc = add nsw i32 %13, 1 + store i32 %inc, i32* %cnt.addr + %14 = load i32*, i32** %elt + store i32 %inc, i32* %14 + %15 = bitcast i32** %elt to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %15) + br label %for.inc + +for.inc: + %16 = load i32*, i32** %__begin1 + %incdec.ptr = getelementptr inbounds i32, i32* %16, i32 1 + store i32* %incdec.ptr, i32** %__begin1 + br label %for.cond + +for.end: + %17 = bitcast %"struct.std::array"** %__range12 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %17) + store %"struct.std::array"* %arr, %"struct.std::array"** %__range12 + %18 = bitcast i32** %__begin13 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %18) + %19 = load %"struct.std::array"*, %"struct.std::array"** %__range12 + %call4 = call i32* @_ZNSt5arrayIiLm6EE5beginEv(%"struct.std::array"* %19) + store i32* %call4, i32** %__begin13 + %20 = bitcast i32** %__end15 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %20) + %21 = load %"struct.std::array"*, %"struct.std::array"** %__range12 + %call6 = call i32* @_ZNSt5arrayIiLm6EE3endEv(%"struct.std::array"* %21) + store i32* %call6, i32** %__end15 + br label %for.cond7 + +for.cond7: + %22 = load i32*, i32** %__begin13 + %23 = load i32*, i32** %__end15 + %cmp8 = icmp ne i32* %22, %23 + br i1 %cmp8, label %for.body10, label %for.cond.cleanup9 + +for.cond.cleanup9: + %24 = bitcast i32** %__end15 to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %24) + %25 = bitcast i32** %__begin13 to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %25) + %26 = bitcast %"struct.std::array"** %__range12 to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %26) + br label %for.end14 + +for.body10: + %27 = bitcast i32** %elt11 to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %27) + %28 = load i32*, i32** %__begin13 + store i32* %28, i32** %elt11 + %29 = load i32*, i32** %elt11 + %30 = load i32, i32* %29 + call void @_Z3usei(i32 %30) + %31 = bitcast i32** %elt11 to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %31) + br label %for.inc12 + +for.inc12: + %32 = load i32*, i32** %__begin13 + %incdec.ptr13 = getelementptr inbounds i32, i32* %32, i32 1 + store i32* %incdec.ptr13, i32** %__begin13 + br label %for.cond7 + +for.end14: + %33 = bitcast %"struct.std::array"* %arr to i8* + call void @llvm.lifetime.end.p0i8(i64 24, i8* %33) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +define linkonce_odr dso_local i32* @_ZNSt5arrayIiLm6EE5beginEv(%"struct.std::array"* %this) { +entry: + %this.addr = alloca %"struct.std::array"* + store %"struct.std::array"* %this, %"struct.std::array"** %this.addr + %this1 = load %"struct.std::array"*, %"struct.std::array"** %this.addr + %call = call i32* @_ZNSt5arrayIiLm6EE4dataEv(%"struct.std::array"* %this1) + ret i32* %call +} + +define linkonce_odr dso_local i32* @_ZNSt5arrayIiLm6EE3endEv(%"struct.std::array"* %this) { +entry: + %this.addr = alloca %"struct.std::array"* + store %"struct.std::array"* %this, %"struct.std::array"** %this.addr + %this1 = load %"struct.std::array"*, %"struct.std::array"** %this.addr + %call = call i32* @_ZNSt5arrayIiLm6EE4dataEv(%"struct.std::array"* %this1) + %add.ptr = getelementptr inbounds i32, i32* %call, i64 6 + ret i32* %add.ptr +} + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +declare dso_local void @_Z3usei(i32) + +define linkonce_odr dso_local i32* @_ZNSt5arrayIiLm6EE4dataEv(%"struct.std::array"* %this) { +entry: + %this.addr = alloca %"struct.std::array"* + store %"struct.std::array"* %this, %"struct.std::array"** %this.addr + %this1 = load %"struct.std::array"*, %"struct.std::array"** %this.addr + %_M_elems = getelementptr inbounds %"struct.std::array", %"struct.std::array"* %this1, i32 0, i32 0 + %call = call i32* @_ZNSt14__array_traitsIiLm6EE6_S_ptrERA6_Ki([6 x i32]* nonnull align 4 dereferenceable(24) %_M_elems) + ret i32* %call +} + +define linkonce_odr dso_local i32* @_ZNSt14__array_traitsIiLm6EE6_S_ptrERA6_Ki([6 x i32]* nonnull align 4 dereferenceable(24) %__t) { +entry: + %__t.addr = alloca [6 x i32]* + store [6 x i32]* %__t, [6 x i32]** %__t.addr + %0 = load [6 x i32]*, [6 x i32]** %__t.addr + %arraydecay = getelementptr inbounds [6 x i32], [6 x i32]* %0, i64 0, i64 0 + ret i32* %arraydecay +}