2014-07-04 19:55:40 +08:00
|
|
|
; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7-avx | FileCheck %s
|
2012-12-30 10:33:22 +08:00
|
|
|
; RUN: opt -instsimplify -disable-output < %s
|
2011-12-05 14:29:09 +08:00
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: AGEP0:
|
2011-12-05 14:29:09 +08:00
|
|
|
define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
|
|
|
|
entry:
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK-LABEL: AGEP0
|
[x86] Revert r212324 which was too aggressive w.r.t. allowing undef
lanes in vector splats.
The core problem here is that undef lanes can't *unilaterally* be
considered to contribute to splats. Their handling needs to be more
cautious. There is also a reported failure of the nightly testers
(thanks Tobias!) that may well stem from the same core issue. I'm going
to fix this theoretical issue, factor the APIs a bit better, and then
verify that I don't see anything bad with Tobias's reduction from the
test suite before recommitting.
Original commit message for r212324:
[x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for
any constant, constant FP, or undef splat and to tolerate any undef
lanes in a splat, then replace all uses of isSplatVector in X86's
lowering with it.
This fixes issues where undef lanes in an otherwise splat vector would
prevent the splat logic from firing. It is a touch more awkward to use
this interface, but it is much more accurate. Suggestions for better
interface structuring welcome.
With this fix, the code generated with the widening legalization
strategy for widen_cast-4.ll is *dramatically* improved as the special
lowering strategies for a v16i8 SRA kick in even though the high lanes
are undef.
We also get a slightly different choice for broadcasting an aligned
memory location, and use vpshufd instead of vbroadcastss. This looks
like a minor win for pipelining and domain crossing, but a minor loss
for the number of micro-ops. I suspect its a wash, but folks can
easily tweak the lowering if they want.
llvm-svn: 212475
2014-07-08 03:03:32 +08:00
|
|
|
;CHECK: vbroadcast
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK-NEXT: vpaddd
|
|
|
|
;CHECK-NEXT: ret
|
2011-12-05 14:29:09 +08:00
|
|
|
%vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
|
|
|
|
%vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
|
|
|
|
%vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
|
|
|
|
%vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
|
|
|
|
%A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
|
|
|
|
%A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
|
|
|
|
ret <4 x i32*> %A3
|
|
|
|
}
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: AGEP1:
|
2011-12-05 14:29:09 +08:00
|
|
|
define i32 @AGEP1(<4 x i32*> %param) nounwind {
|
|
|
|
entry:
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK-LABEL: AGEP1
|
|
|
|
;CHECK: vpaddd
|
|
|
|
;CHECK-NEXT: vpextrd
|
|
|
|
;CHECK-NEXT: movl
|
2011-12-05 14:29:09 +08:00
|
|
|
%A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
|
|
|
|
%k = extractelement <4 x i32*> %A2, i32 3
|
|
|
|
%v = load i32* %k
|
|
|
|
ret i32 %v
|
|
|
|
;CHECK: ret
|
|
|
|
}
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: AGEP2:
|
2011-12-05 14:29:09 +08:00
|
|
|
define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
|
|
|
|
entry:
|
2014-02-16 15:31:05 +08:00
|
|
|
;CHECK-LABEL: AGEP2
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK: vpslld $2
|
|
|
|
;CHECK-NEXT: vpadd
|
2011-12-05 14:29:09 +08:00
|
|
|
%A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
|
|
|
|
%k = extractelement <4 x i32*> %A2, i32 3
|
|
|
|
%v = load i32* %k
|
|
|
|
ret i32 %v
|
|
|
|
;CHECK: ret
|
|
|
|
}
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: AGEP3:
|
2011-12-05 14:29:09 +08:00
|
|
|
define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
|
|
|
|
entry:
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK-LABEL: AGEP3
|
|
|
|
;CHECK: vpslld $2
|
|
|
|
;CHECK-NEXT: vpadd
|
2011-12-05 14:29:09 +08:00
|
|
|
%A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
|
|
|
|
%v = alloca i32
|
|
|
|
%k = insertelement <4 x i32*> %A2, i32* %v, i32 3
|
|
|
|
ret <4 x i32*> %k
|
|
|
|
;CHECK: ret
|
|
|
|
}
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: AGEP4:
|
2012-02-28 19:54:05 +08:00
|
|
|
define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
|
2011-12-05 14:29:09 +08:00
|
|
|
entry:
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK-LABEL: AGEP4
|
2012-02-28 19:54:05 +08:00
|
|
|
; Multiply offset by two (add it to itself).
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK: vpadd
|
2012-02-28 19:54:05 +08:00
|
|
|
; add the base to the offset
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK-NEXT: vpadd
|
2012-02-28 19:54:05 +08:00
|
|
|
%A = getelementptr <4 x i16*> %param, <4 x i32> %off
|
|
|
|
ret <4 x i16*> %A
|
2011-12-05 14:29:09 +08:00
|
|
|
;CHECK: ret
|
|
|
|
}
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: AGEP5:
|
2011-12-05 14:29:09 +08:00
|
|
|
define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
|
|
|
|
entry:
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK-LABEL: AGEP5
|
|
|
|
;CHECK: vpaddd
|
2011-12-05 14:29:09 +08:00
|
|
|
%A = getelementptr <4 x i8*> %param, <4 x i8> %off
|
|
|
|
ret <4 x i8*> %A
|
|
|
|
;CHECK: ret
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-02-28 19:54:05 +08:00
|
|
|
; The size of each element is 1 byte. No need to multiply by element size.
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: AGEP6:
|
2012-02-28 19:54:05 +08:00
|
|
|
define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
|
|
|
|
entry:
|
2014-01-14 04:51:35 +08:00
|
|
|
;CHECK-LABEL: AGEP6
|
2012-02-28 19:54:05 +08:00
|
|
|
;CHECK-NOT: pslld
|
|
|
|
%A = getelementptr <4 x i8*> %param, <4 x i32> %off
|
|
|
|
ret <4 x i8*> %A
|
|
|
|
;CHECK: ret
|
|
|
|
}
|
|
|
|
|