forked from OSchip/llvm-project
Don't lower splat vector load to relative to the esp if the
stack may be misaligned. Update test accordingly. Patch by Evan Cheng! llvm-svn: 94291
This commit is contained in:
parent
1deb09c28d
commit
c1451d764f
|
@ -3390,17 +3390,10 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
|
||||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||||
if (DAG.InferPtrAlignment(Ptr) < 16) {
|
if (DAG.InferPtrAlignment(Ptr) < 16) {
|
||||||
if (MFI->isFixedObjectIndex(FI)) {
|
if (MFI->isFixedObjectIndex(FI)) {
|
||||||
// Can't change the alignment. Reference stack + offset explicitly
|
// Can't change the alignment. FIXME: It's possible to compute
|
||||||
// if stack pointer is at least 16-byte aligned.
|
// the exact stack offset and reference FI + adjust offset instead.
|
||||||
unsigned StackAlign = Subtarget->getStackAlignment();
|
// If someone *really* cares about this. That's the way to implement it.
|
||||||
if (StackAlign < 16)
|
return SDValue();
|
||||||
return SDValue();
|
|
||||||
Offset = MFI->getObjectOffset(FI) + Offset;
|
|
||||||
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
|
|
||||||
getPointerTy());
|
|
||||||
Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
|
|
||||||
DAG.getConstant(Offset & ~15, getPointerTy()));
|
|
||||||
Offset %= 16;
|
|
||||||
} else {
|
} else {
|
||||||
MFI->setObjectAlignment(FI, 16);
|
MFI->setObjectAlignment(FI, 16);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,6 @@
|
||||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
|
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
|
||||||
; rdar://7434544
|
; rdar://7434544
|
||||||
|
|
||||||
define <2 x i64> @t1() nounwind ssp {
|
|
||||||
entry:
|
|
||||||
; CHECK: t1:
|
|
||||||
; CHECK: pshufd $0, (%esp), %xmm0
|
|
||||||
%array = alloca [8 x float], align 16
|
|
||||||
%arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0
|
|
||||||
%tmp2 = load float* %arrayidx
|
|
||||||
%vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
|
|
||||||
%vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
|
|
||||||
%vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
|
|
||||||
%vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
|
|
||||||
%0 = bitcast <4 x float> %vecinit9 to <2 x i64>
|
|
||||||
ret <2 x i64> %0
|
|
||||||
}
|
|
||||||
|
|
||||||
define <2 x i64> @t2() nounwind ssp {
|
define <2 x i64> @t2() nounwind ssp {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: t2:
|
; CHECK: t2:
|
||||||
|
@ -30,14 +15,3 @@ entry:
|
||||||
%0 = bitcast <4 x float> %vecinit9 to <2 x i64>
|
%0 = bitcast <4 x float> %vecinit9 to <2 x i64>
|
||||||
ret <2 x i64> %0
|
ret <2 x i64> %0
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp {
|
|
||||||
entry:
|
|
||||||
; CHECK: t3:
|
|
||||||
; CHECK: pshufd $-86, (%esp), %xmm0
|
|
||||||
%0 = insertelement <4 x float> undef, float %tmp3, i32 0
|
|
||||||
%1 = insertelement <4 x float> %0, float %tmp3, i32 1
|
|
||||||
%2 = insertelement <4 x float> %1, float %tmp3, i32 2
|
|
||||||
%3 = insertelement <4 x float> %2, float %tmp3, i32 3
|
|
||||||
ret <4 x float> %3
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue