Don't lower splat vector load to relative to the esp if the

stack may be misaligned. Update test accordingly. Patch by Evan Cheng! llvm-svn: 94291
2010-01-23 06:02:43 +00:00 · 2010-01-23 06:02:43 +00:00 · c1451d764f
parent 1deb09c28d
commit c1451d764f
2 changed files with 4 additions and 37 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -3390,17 +3390,10 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
    if (DAG.InferPtrAlignment(Ptr) < 16) {
      if (MFI->isFixedObjectIndex(FI)) {
-        // Can't change the alignment. Reference stack + offset explicitly
+        // Can't change the alignment. FIXME: It's possible to compute
-        // if stack pointer is at least 16-byte aligned.
+        // the exact stack offset and reference FI + adjust offset instead.
-        unsigned StackAlign = Subtarget->getStackAlignment();
+        // If someone *really* cares about this. That's the way to implement it.
-        if (StackAlign < 16)
+        return SDValue();
          return SDValue();
        Offset = MFI->getObjectOffset(FI) + Offset;
        SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
                                              getPointerTy());
        Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
                          DAG.getConstant(Offset & ~15, getPointerTy()));
        Offset %= 16;
      } else {
        MFI->setObjectAlignment(FI, 16);
      }
--- a/llvm/test/CodeGen/X86/splat-scalar-load.ll
+++ b/llvm/test/CodeGen/X86/splat-scalar-load.ll
@ -1,21 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
 ; rdar://7434544
 define <2 x i64> @t1() nounwind ssp {
 entry:
 ; CHECK: t1:
 ; CHECK: pshufd	$0, (%esp), %xmm0
  %array = alloca [8 x float], align 16
  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0
  %tmp2 = load float* %arrayidx
  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
  ret <2 x i64> %0
 }
 define <2 x i64> @t2() nounwind ssp {
 entry:
 ; CHECK: t2:
@ -30,14 +15,3 @@ entry:
  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
  ret <2 x i64> %0
 }
 define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp {
 entry:
 ; CHECK: t3:
 ; CHECK: pshufd	$-86, (%esp), %xmm0
  %0 = insertelement <4 x float> undef, float %tmp3, i32 0
  %1 = insertelement <4 x float> %0, float %tmp3, i32 1
  %2 = insertelement <4 x float> %1, float %tmp3, i32 2
  %3 = insertelement <4 x float> %2, float %tmp3, i32 3
  ret <4 x float> %3
 }