InstCombine: Move tests that use target intrinsics into subdirectories

Tests with target intrinsics are inherently target specific, so it
doesn't actually make sense to run them if we've excluded their
target.

llvm-svn: 302979
This commit is contained in:
Justin Bogner 2017-05-13 05:39:46 +00:00
parent c501f754a5
commit 3c6fbad388
38 changed files with 190 additions and 175 deletions

View File

@ -1,70 +1,6 @@
; RUN: opt -S -instcombine < %s | FileCheck %s
define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
ret <4 x i32> %a
; CHECK: entry:
; CHECK-NEXT: ret <4 x i32> zeroinitializer
}
define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
ret <4 x i32> %a
; CHECK: entry:
; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
; CHECK-NEXT: ret <4 x i32> %a
}
define <4 x i32> @constantMul() nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
ret <4 x i32> %a
; CHECK: entry:
; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
}
define <4 x i32> @constantMulS() nounwind readnone ssp {
entry:
%b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
ret <4 x i32> %b
; CHECK: entry:
; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
}
define <4 x i32> @constantMulU() nounwind readnone ssp {
entry:
%b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
ret <4 x i32> %b
; CHECK: entry:
; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
}
define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
%b = add <4 x i32> zeroinitializer, %a
ret <4 x i32> %b
; CHECK: entry:
; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
; CHECK-NEXT: ret <4 x i32> %a
}
define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
%b = add <4 x i32> %x, %a
ret <4 x i32> %b
; CHECK: entry:
; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
; CHECK-NEXT: ret <4 x i32> %b
}
declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
; ARM64 variants - <rdar://problem/12349617>
; ARM64 neon intrinsic variants - <rdar://problem/12349617>
; REQUIRES: aarch64
define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp {
entry:

View File

@ -0,0 +1,2 @@
if not 'AArch64' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,2 @@
if not 'AMDGPU' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,65 @@
; RUN: opt -S -instcombine < %s | FileCheck %s
define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
ret <4 x i32> %a
; CHECK: entry:
; CHECK-NEXT: ret <4 x i32> zeroinitializer
}
define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
ret <4 x i32> %a
; CHECK: entry:
; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
; CHECK-NEXT: ret <4 x i32> %a
}
define <4 x i32> @constantMul() nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
ret <4 x i32> %a
; CHECK: entry:
; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
}
define <4 x i32> @constantMulS() nounwind readnone ssp {
entry:
%b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
ret <4 x i32> %b
; CHECK: entry:
; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
}
define <4 x i32> @constantMulU() nounwind readnone ssp {
entry:
%b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
ret <4 x i32> %b
; CHECK: entry:
; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
}
define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
%b = add <4 x i32> zeroinitializer, %a
ret <4 x i32> %b
; CHECK: entry:
; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
; CHECK-NEXT: ret <4 x i32> %a
}
define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp {
entry:
%a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
%b = add <4 x i32> %x, %a
ret <4 x i32> %b
; CHECK: entry:
; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
; CHECK-NEXT: ret <4 x i32> %b
}
declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone

View File

@ -0,0 +1,2 @@
if not 'ARM' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,3 @@
if not 'PowerPC' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,2 @@
if not 'X86' in config.root.targets:
config.unsupported = True

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -instcombine -S | not grep "shufflevector.*i32 8"
; RUN: opt < %s -instcombine -S | FileCheck %s
; CHECK-NOT: shufflevector{{.*}}i32 8"
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9"

View File

@ -0,0 +1,110 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i16 @test1(float %f) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[TMP281:%.*]] = fadd float %f, -1.000000e+00
; CHECK-NEXT: [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01
; CHECK-NEXT: [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0
; CHECK-NEXT: [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> <float 6.553500e+04, float undef, float undef, float undef>)
; CHECK-NEXT: [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> <float 0.000000e+00, float undef, float undef, float undef>)
; CHECK-NEXT: [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]])
; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16
; CHECK-NEXT: ret i16 [[TMP69]]
;
%tmp = insertelement <4 x float> undef, float %f, i32 0
%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1
%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )
%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )
%tmp69 = trunc i32 %tmp.upgrd.1 to i16
ret i16 %tmp69
}
define i64 @test3(float %f, double %d) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]])
; CHECK-NEXT: [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]])
; CHECK-NEXT: [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]])
; CHECK-NEXT: [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]])
; CHECK-NEXT: [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]])
; CHECK-NEXT: [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]])
; CHECK-NEXT: [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]])
; CHECK-NEXT: [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]])
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
; CHECK-NEXT: ret i64 [[TMP15]]
;
%v00 = insertelement <4 x float> undef, float %f, i32 0
%v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
%v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
%v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
%tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
%v10 = insertelement <4 x float> undef, float %f, i32 0
%v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
%v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
%v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
%tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
%v20 = insertelement <4 x float> undef, float %f, i32 0
%v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
%v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
%v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
%tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
%v30 = insertelement <4 x float> undef, float %f, i32 0
%v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
%v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
%v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
%tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
%v40 = insertelement <2 x double> undef, double %d, i32 0
%v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
%tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
%v50 = insertelement <2 x double> undef, double %d, i32 0
%v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
%tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
%v60 = insertelement <2 x double> undef, double %d, i32 0
%v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
%tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
%v70 = insertelement <2 x double> undef, double %d, i32 0
%v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
%tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
%tmp8 = add i32 %tmp0, %tmp2
%tmp9 = add i32 %tmp4, %tmp6
%tmp10 = add i32 %tmp8, %tmp9
%tmp11 = sext i32 %tmp10 to i64
%tmp12 = add i64 %tmp1, %tmp3
%tmp13 = add i64 %tmp5, %tmp7
%tmp14 = add i64 %tmp12, %tmp13
%tmp15 = add i64 %tmp11, %tmp14
ret i64 %tmp15
}
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)

View File

@ -2,30 +2,6 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i16 @test1(float %f) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[TMP281:%.*]] = fadd float %f, -1.000000e+00
; CHECK-NEXT: [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01
; CHECK-NEXT: [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0
; CHECK-NEXT: [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> <float 6.553500e+04, float undef, float undef, float undef>)
; CHECK-NEXT: [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> <float 0.000000e+00, float undef, float undef, float undef>)
; CHECK-NEXT: [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]])
; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16
; CHECK-NEXT: ret i16 [[TMP69]]
;
%tmp = insertelement <4 x float> undef, float %f, i32 0
%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1
%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )
%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )
%tmp69 = trunc i32 %tmp.upgrd.1 to i16
ret i16 %tmp69
}
define i32 @test2(float %f) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[TMP5:%.*]] = fmul float %f, %f
@ -42,77 +18,6 @@ define i32 @test2(float %f) {
ret i32 %tmp21
}
define i64 @test3(float %f, double %d) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]])
; CHECK-NEXT: [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]])
; CHECK-NEXT: [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]])
; CHECK-NEXT: [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]])
; CHECK-NEXT: [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]])
; CHECK-NEXT: [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]])
; CHECK-NEXT: [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]])
; CHECK-NEXT: [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
; CHECK-NEXT: [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]])
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
; CHECK-NEXT: ret i64 [[TMP15]]
;
%v00 = insertelement <4 x float> undef, float %f, i32 0
%v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
%v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
%v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
%tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
%v10 = insertelement <4 x float> undef, float %f, i32 0
%v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
%v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
%v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
%tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
%v20 = insertelement <4 x float> undef, float %f, i32 0
%v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
%v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
%v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
%tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
%v30 = insertelement <4 x float> undef, float %f, i32 0
%v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
%v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
%v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
%tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
%v40 = insertelement <2 x double> undef, double %d, i32 0
%v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
%tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
%v50 = insertelement <2 x double> undef, double %d, i32 0
%v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
%tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
%v60 = insertelement <2 x double> undef, double %d, i32 0
%v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
%tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
%v70 = insertelement <2 x double> undef, double %d, i32 0
%v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
%tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
%tmp8 = add i32 %tmp0, %tmp2
%tmp9 = add i32 %tmp4, %tmp6
%tmp10 = add i32 %tmp8, %tmp9
%tmp11 = sext i32 %tmp10 to i64
%tmp12 = add i64 %tmp1, %tmp3
%tmp13 = add i64 %tmp5, %tmp7
%tmp14 = add i64 %tmp12, %tmp13
%tmp15 = add i64 %tmp11, %tmp14
ret i64 %tmp15
}
define void @get_image() nounwind {
; CHECK-LABEL: @get_image(
; CHECK-NEXT: entry:
@ -156,18 +61,6 @@ entry:
}
declare i32 @fgetc(i8*)
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
; CHECK-LABEL: @dead_shuffle_elt(
@ -248,4 +141,3 @@ define <2 x i64> @PR24922(<2 x i64> %v) {
%result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
ret <2 x i64> %result
}