From 61d8e0003c2faef38d8a84527b06d525b09795ee Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 23 Jan 2017 11:53:16 +0000 Subject: [PATCH] [SLP] Additional test for SLP vectorizer with 31 reduction elements. llvm-svn: 292783 --- .../SLPVectorizer/X86/horizontal-list.ll | 196 ++++++++++++++++++ 1 file changed, 196 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index a3374b0be250..b7c39599ed81 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -773,3 +773,199 @@ define float @f1(float* nocapture readonly %x, i32 %a, i32 %b) { ret float %add.31 } +define float @loadadd31(float* nocapture readonly %x) { +; CHECK-LABEL: @loadadd31( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP2]], [[ADD_1]] +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP3]], [[ADD_2]] +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 +; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP4]], [[ADD_3]] +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 +; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP5]], [[ADD_4]] +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 +; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP6]], [[ADD_5]] +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 +; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float [[TMP7]], [[ADD_6]] +; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 +; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX_8]], align 4 +; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float [[TMP8]], [[ADD_7]] +; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds float, float* [[X]], i64 10 +; CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX_9]], align 4 +; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float [[TMP9]], [[ADD_8]] +; CHECK-NEXT: [[ARRAYIDX_10:%.*]] = getelementptr inbounds float, float* [[X]], i64 11 +; CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX_10]], align 4 +; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float [[TMP10]], [[ADD_9]] +; CHECK-NEXT: [[ARRAYIDX_11:%.*]] = getelementptr inbounds float, float* [[X]], i64 12 +; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX_11]], align 4 +; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float [[TMP11]], [[ADD_10]] +; CHECK-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds float, float* [[X]], i64 13 +; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX_12]], align 4 +; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float [[TMP12]], [[ADD_11]] +; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 +; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX_13]], align 4 +; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float [[TMP13]], [[ADD_12]] +; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 +; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 +; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 +; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 +; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 19 +; CHECK-NEXT: [[ARRAYIDX_19:%.*]] = getelementptr inbounds float, float* [[X]], i64 20 +; CHECK-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds float, float* [[X]], i64 21 +; CHECK-NEXT: [[ARRAYIDX_21:%.*]] = getelementptr inbounds float, float* [[X]], i64 22 +; CHECK-NEXT: [[ARRAYIDX_22:%.*]] = getelementptr inbounds float, float* [[X]], i64 23 +; CHECK-NEXT: [[ARRAYIDX_23:%.*]] = getelementptr inbounds float, float* [[X]], i64 24 +; CHECK-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds float, float* [[X]], i64 25 +; CHECK-NEXT: [[ARRAYIDX_25:%.*]] = getelementptr inbounds float, float* [[X]], i64 26 +; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds float, float* [[X]], i64 27 +; CHECK-NEXT: [[ARRAYIDX_27:%.*]] = getelementptr inbounds float, float* [[X]], i64 28 +; CHECK-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds float, float* [[X]], i64 29 +; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* +; CHECK-NEXT: [[TMP15:%.*]] = load <16 x float>, <16 x float>* [[TMP14]], align 4 +; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] +; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] +; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] +; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] +; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] +; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] +; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] +; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] +; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] +; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] +; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] +; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] +; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] +; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] +; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP15]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP15]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <16 x float> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <16 x float> [[BIN_RDX2]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <16 x float> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = fadd fast float [[TMP16]], [[TMP13]] +; CHECK-NEXT: [[TMP18:%.*]] = fadd fast float [[TMP17]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP18]], [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = fadd fast float [[TMP19]], [[TMP10]] +; CHECK-NEXT: [[TMP21:%.*]] = fadd fast float [[TMP20]], [[TMP9]] +; CHECK-NEXT: [[TMP22:%.*]] = fadd fast float [[TMP21]], [[TMP8]] +; CHECK-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP22]], [[TMP7]] +; CHECK-NEXT: [[TMP24:%.*]] = fadd fast float [[TMP23]], [[TMP6]] +; CHECK-NEXT: [[TMP25:%.*]] = fadd fast float [[TMP24]], [[TMP5]] +; CHECK-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP25]], [[TMP4]] +; CHECK-NEXT: [[TMP27:%.*]] = fadd fast float [[TMP26]], [[TMP3]] +; CHECK-NEXT: [[TMP28:%.*]] = fadd fast float [[TMP27]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = fadd fast float [[TMP28]], [[TMP1]] +; CHECK-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP29]], [[TMP0]] +; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] +; CHECK-NEXT: ret float [[TMP30]] +; + entry: + %arrayidx = getelementptr inbounds float, float* %x, i64 1 + %0 = load float, float* %arrayidx, align 4 + %arrayidx.1 = getelementptr inbounds float, float* %x, i64 2 + %1 = load float, float* %arrayidx.1, align 4 + %add.1 = fadd fast float %1, %0 + %arrayidx.2 = getelementptr inbounds float, float* %x, i64 3 + %2 = load float, float* %arrayidx.2, align 4 + %add.2 = fadd fast float %2, %add.1 + %arrayidx.3 = getelementptr inbounds float, float* %x, i64 4 + %3 = load float, float* %arrayidx.3, align 4 + %add.3 = fadd fast float %3, %add.2 + %arrayidx.4 = getelementptr inbounds float, float* %x, i64 5 + %4 = load float, float* %arrayidx.4, align 4 + %add.4 = fadd fast float %4, %add.3 + %arrayidx.5 = getelementptr inbounds float, float* %x, i64 6 + %5 = load float, float* %arrayidx.5, align 4 + %add.5 = fadd fast float %5, %add.4 + %arrayidx.6 = getelementptr inbounds float, float* %x, i64 7 + %6 = load float, float* %arrayidx.6, align 4 + %add.6 = fadd fast float %6, %add.5 + %arrayidx.7 = getelementptr inbounds float, float* %x, i64 8 + %7 = load float, float* %arrayidx.7, align 4 + %add.7 = fadd fast float %7, %add.6 + %arrayidx.8 = getelementptr inbounds float, float* %x, i64 9 + %8 = load float, float* %arrayidx.8, align 4 + %add.8 = fadd fast float %8, %add.7 + %arrayidx.9 = getelementptr inbounds float, float* %x, i64 10 + %9 = load float, float* %arrayidx.9, align 4 + %add.9 = fadd fast float %9, %add.8 + %arrayidx.10 = getelementptr inbounds float, float* %x, i64 11 + %10 = load float, float* %arrayidx.10, align 4 + %add.10 = fadd fast float %10, %add.9 + %arrayidx.11 = getelementptr inbounds float, float* %x, i64 12 + %11 = load float, float* %arrayidx.11, align 4 + %add.11 = fadd fast float %11, %add.10 + %arrayidx.12 = getelementptr inbounds float, float* %x, i64 13 + %12 = load float, float* %arrayidx.12, align 4 + %add.12 = fadd fast float %12, %add.11 + %arrayidx.13 = getelementptr inbounds float, float* %x, i64 14 + %13 = load float, float* %arrayidx.13, align 4 + %add.13 = fadd fast float %13, %add.12 + %arrayidx.14 = getelementptr inbounds float, float* %x, i64 15 + %14 = load float, float* %arrayidx.14, align 4 + %add.14 = fadd fast float %14, %add.13 + %arrayidx.15 = getelementptr inbounds float, float* %x, i64 16 + %15 = load float, float* %arrayidx.15, align 4 + %add.15 = fadd fast float %15, %add.14 + %arrayidx.16 = getelementptr inbounds float, float* %x, i64 17 + %16 = load float, float* %arrayidx.16, align 4 + %add.16 = fadd fast float %16, %add.15 + %arrayidx.17 = getelementptr inbounds float, float* %x, i64 18 + %17 = load float, float* %arrayidx.17, align 4 + %add.17 = fadd fast float %17, %add.16 + %arrayidx.18 = getelementptr inbounds float, float* %x, i64 19 + %18 = load float, float* %arrayidx.18, align 4 + %add.18 = fadd fast float %18, %add.17 + %arrayidx.19 = getelementptr inbounds float, float* %x, i64 20 + %19 = load float, float* %arrayidx.19, align 4 + %add.19 = fadd fast float %19, %add.18 + %arrayidx.20 = getelementptr inbounds float, float* %x, i64 21 + %20 = load float, float* %arrayidx.20, align 4 + %add.20 = fadd fast float %20, %add.19 + %arrayidx.21 = getelementptr inbounds float, float* %x, i64 22 + %21 = load float, float* %arrayidx.21, align 4 + %add.21 = fadd fast float %21, %add.20 + %arrayidx.22 = getelementptr inbounds float, float* %x, i64 23 + %22 = load float, float* %arrayidx.22, align 4 + %add.22 = fadd fast float %22, %add.21 + %arrayidx.23 = getelementptr inbounds float, float* %x, i64 24 + %23 = load float, float* %arrayidx.23, align 4 + %add.23 = fadd fast float %23, %add.22 + %arrayidx.24 = getelementptr inbounds float, float* %x, i64 25 + %24 = load float, float* %arrayidx.24, align 4 + %add.24 = fadd fast float %24, %add.23 + %arrayidx.25 = getelementptr inbounds float, float* %x, i64 26 + %25 = load float, float* %arrayidx.25, align 4 + %add.25 = fadd fast float %25, %add.24 + %arrayidx.26 = getelementptr inbounds float, float* %x, i64 27 + %26 = load float, float* %arrayidx.26, align 4 + %add.26 = fadd fast float %26, %add.25 + %arrayidx.27 = getelementptr inbounds float, float* %x, i64 28 + %27 = load float, float* %arrayidx.27, align 4 + %add.27 = fadd fast float %27, %add.26 + %arrayidx.28 = getelementptr inbounds float, float* %x, i64 29 + %28 = load float, float* %arrayidx.28, align 4 + %add.28 = fadd fast float %28, %add.27 + %arrayidx.29 = getelementptr inbounds float, float* %x, i64 30 + %29 = load float, float* %arrayidx.29, align 4 + %add.29 = fadd fast float %29, %add.28 + ret float %add.29 +} +