forked from OSchip/llvm-project
Finish implementing a readme entry: when inserting an i64 variable
into a vector of zeros or undef, and when the top part is obviously zero, we can just use movd + shuffle. This allows us to compile vec_set-B.ll into: _test3: movl $1234567, %eax andl 4(%esp), %eax movd %eax, %xmm0 ret instead of: _test3: subl $28, %esp movl $1234567, %eax andl 32(%esp), %eax movl %eax, (%esp) movl $0, 4(%esp) movq (%esp), %xmm0 addl $28, %esp ret llvm-svn: 48090
This commit is contained in:
parent
6af064641f
commit
b6387c8a74
|
@ -781,41 +781,3 @@ LLVM should be able to generate the same thing as gcc. This looks like it is
|
|||
just a matter of matching (scalar_to_vector (load x)) to movd.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
These two functions should compile to identical code on x86-32:
|
||||
|
||||
define <2 x i64> @test2(i64 %arg) {
|
||||
entry:
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> undef, i64 %A, i32 0
|
||||
ret <2 x i64> %B
|
||||
}
|
||||
|
||||
define <2 x i64> @test2(i64 %arg) {
|
||||
entry:
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
|
||||
ret <2 x i64> %B
|
||||
}
|
||||
|
||||
The later compiles to:
|
||||
|
||||
_test2:
|
||||
movl $1234567, %eax
|
||||
andl 4(%esp), %eax
|
||||
movd %eax, %xmm0
|
||||
ret
|
||||
|
||||
the former compiles to:
|
||||
|
||||
_test2:
|
||||
subl $28, %esp
|
||||
movl $1234567, %eax
|
||||
andl 32(%esp), %eax
|
||||
movl %eax, (%esp)
|
||||
movl $0, 4(%esp)
|
||||
movaps (%esp), %xmm0
|
||||
addl $28, %esp
|
||||
ret
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
|
|
@ -3063,11 +3063,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
|
|||
return DAG.getNode(ISD::UNDEF, VT);
|
||||
}
|
||||
|
||||
// Splat is obviously ok. Let legalizer expand it to a shuffle.
|
||||
if (Values.size() == 1)
|
||||
return SDOperand();
|
||||
|
||||
// Special case for single non-zero element.
|
||||
// Special case for single non-zero, non-undef, element.
|
||||
if (NumNonZero == 1 && NumElems <= 4) {
|
||||
unsigned Idx = CountTrailingZeros_32(NonZeros);
|
||||
SDOperand Item = Op.getOperand(Idx);
|
||||
|
@ -3141,6 +3137,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
|
|||
}
|
||||
}
|
||||
|
||||
// Splat is obviously ok. Let legalizer expand it to a shuffle.
|
||||
if (Values.size() == 1)
|
||||
return SDOperand();
|
||||
|
||||
// A vector full of immediates; various special cases are already
|
||||
// handled, so this is best done with a single constant-pool load.
|
||||
if (IsAllConstants)
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
; RUN: llvm-as < %s | llc -march=x86 | not grep movaps
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep esp | count 2
|
||||
|
||||
; These should both generate something like this:
|
||||
;_test3:
|
||||
; movl $1234567, %eax
|
||||
; andl 4(%esp), %eax
|
||||
; movd %eax, %xmm0
|
||||
; ret
|
||||
|
||||
define <2 x i64> @test3(i64 %arg) {
|
||||
entry:
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
|
||||
ret <2 x i64> %B
|
||||
}
|
||||
|
||||
define <2 x i64> @test2(i64 %arg) {
|
||||
entry:
|
||||
%A = and i64 %arg, 1234567
|
||||
%B = insertelement <2 x i64> undef, i64 %A, i32 0
|
||||
ret <2 x i64> %B
|
||||
}
|
||||
|
Loading…
Reference in New Issue