Don't lower splat vector load to relative to the esp if the

stack may be misaligned.

Update test accordingly.

Patch by Evan Cheng!

llvm-svn: 94291
This commit is contained in:
Eric Christopher 2010-01-23 06:02:43 +00:00
parent 1deb09c28d
commit c1451d764f
2 changed files with 4 additions and 37 deletions

View File

@ -3390,17 +3390,10 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
if (DAG.InferPtrAlignment(Ptr) < 16) {
if (MFI->isFixedObjectIndex(FI)) {
// Can't change the alignment. Reference stack + offset explicitly
// if stack pointer is at least 16-byte aligned.
unsigned StackAlign = Subtarget->getStackAlignment();
if (StackAlign < 16)
// Can't change the alignment. FIXME: It's possible to compute
// the exact stack offset and reference FI + adjust offset instead.
// If someone *really* cares about this. That's the way to implement it.
return SDValue();
Offset = MFI->getObjectOffset(FI) + Offset;
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
getPointerTy());
Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
DAG.getConstant(Offset & ~15, getPointerTy()));
Offset %= 16;
} else {
MFI->setObjectAlignment(FI, 16);
}

View File

@ -1,21 +1,6 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
; rdar://7434544
define <2 x i64> @t1() nounwind ssp {
entry:
; CHECK: t1:
; CHECK: pshufd $0, (%esp), %xmm0
%array = alloca [8 x float], align 16
%arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0
%tmp2 = load float* %arrayidx
%vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
%vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
%vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
%vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
%0 = bitcast <4 x float> %vecinit9 to <2 x i64>
ret <2 x i64> %0
}
define <2 x i64> @t2() nounwind ssp {
entry:
; CHECK: t2:
@ -30,14 +15,3 @@ entry:
%0 = bitcast <4 x float> %vecinit9 to <2 x i64>
ret <2 x i64> %0
}
define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp {
entry:
; CHECK: t3:
; CHECK: pshufd $-86, (%esp), %xmm0
%0 = insertelement <4 x float> undef, float %tmp3, i32 0
%1 = insertelement <4 x float> %0, float %tmp3, i32 1
%2 = insertelement <4 x float> %1, float %tmp3, i32 2
%3 = insertelement <4 x float> %2, float %tmp3, i32 3
ret <4 x float> %3
}