[PowerPC] Fix small argument stack slot offset for LE

When small arguments (structures < 8 bytes or "float") are passed in a stack slot in the ppc64 SVR4 ABI, they must reside in the least significant part of that slot. On BE, this means that an offset needs to be added to the stack address of the parameter, but on LE, the least significant part of the slot has the same address as the slot itself. This changes the PowerPC back-end ABI code to only add the small argument stack slot offset for BE. It also adds test cases to verify the correct behavior on both BE and LE. llvm-svn: 211368
2014-06-20 16:34:05 +00:00 · 2014-06-20 16:34:05 +00:00 · 59c6ab20d6
parent d30a1f2cb2
commit 59c6ab20d6
3 changed files with 138 additions and 11 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -2433,6 +2433,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
                                      SmallVectorImpl<SDValue> &InVals) const {
  // TODO: add description of PPC stack frame format, or at least some docs.
  //
+  bool isLittleEndian = Subtarget.isLittleEndian();
  MachineFunction &MF = DAG.getMachineFunction();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@ -2533,7 +2534,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      }

      // All aggregates smaller than 8 bytes must be passed right-justified.
-      if (ObjSize < PtrByteSize)
+      if (ObjSize < PtrByteSize && !isLittleEndian)
        CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
      // The value of the object is its address.
      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
@ -2683,9 +2684,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
    // We need to load the argument to a virtual register if we determined
    // above that we ran out of physical registers of the appropriate type.
    if (needsLoad) {
-      int FI = MFI->CreateFixedObject(ObjSize,
-                                      CurArgOffset + (ArgSize - ObjSize),
-                                      isImmutable);
+      if (ObjSize < ArgSize && !isLittleEndian)
+        CurArgOffset += ArgSize - ObjSize;
+      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
                           false, false, false, 0);
@ -4034,6 +4035,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                    SDLoc dl, SelectionDAG &DAG,
                                    SmallVectorImpl<SDValue> &InVals) const {

+  bool isLittleEndian = Subtarget.isLittleEndian();
  unsigned NumOps = Outs.size();

  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@ -4177,9 +4179,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      }

      if (GPR_idx == NumGPRs && Size < 8) {
-        SDValue Const = DAG.getConstant(PtrByteSize - Size,
-                                        PtrOff.getValueType());
-        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        SDValue AddPtr = PtrOff;
+        if (!isLittleEndian) {
+          SDValue Const = DAG.getConstant(PtrByteSize - Size,
+                                          PtrOff.getValueType());
+          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        }
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
                                                          CallSeqStart,
                                                          Flags, DAG, dl);
@ -4214,8 +4219,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        // small aggregates, particularly for packed ones.
        // FIXME: It would be preferable to use the slot in the
        // parameter save area instead of a new local variable.
-        SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
-        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        SDValue AddPtr = PtrOff;
+        if (!isLittleEndian) {
+          SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+        }
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
                                                          CallSeqStart,
                                                          Flags, DAG, dl);
@ -4276,7 +4284,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
          // must be passed right-justified in the stack doubleword, and
          // in the GPR, if one is available.
          SDValue StoreOff;
-          if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
+          if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
+              !isLittleEndian) {
            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
            StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
          } else
@ -4300,7 +4309,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      } else {
        // Single-precision floating-point values are mapped to the
        // second (rightmost) word of the stack doubleword.
-        if (Arg.getValueType() == MVT::f32) {
+        if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
          SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
        }
--- a/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll
@ -0,0 +1,59 @@
+; Verify that small structures and float arguments are passed in the
+; least significant part of a stack slot doubleword.
+
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.large_arg = type { [8 x i64] }
+%struct.small_arg = type { i16, i8 }
+
+@gl = common global %struct.large_arg zeroinitializer, align 8
+@gs = common global %struct.small_arg zeroinitializer, align 2
+@gf = common global float 0.000000e+00, align 4
+
+define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) {
+entry:
+  %0 = bitcast %struct.small_arg* %x to i32*
+  %1 = bitcast %struct.small_arg* %agg.result to i32*
+  %2 = load i32* %0, align 2
+  store i32 %2, i32* %1, align 2
+  ret void
+}
+; CHECK: @callee1
+; CHECK: lwz {{[0-9]+}}, 124(1)
+; CHECK: blr
+
+define void @caller1() {
+entry:
+  %tmp = alloca %struct.small_arg, align 2
+  call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs)
+  ret void
+}
+; CHECK: @caller1
+; CHECK: stw {{[0-9]+}}, 124(1)
+; CHECK: bl test1
+
+declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
+
+define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) {
+entry:
+  ret float %x
+}
+; CHECK: @callee2
+; CHECK: lfs {{[0-9]+}}, 156(1)
+; CHECK: blr
+
+define void @caller2() {
+entry:
+  %0 = load float* @gf, align 4
+  %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
+  ret void
+}
+; CHECK: @caller2
+; CHECK: stfs {{[0-9]+}}, 156(1)
+; CHECK: bl test2
+
+declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
+
--- a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@ -0,0 +1,59 @@
+; Verify that small structures and float arguments are passed in the
+; least significant part of a stack slot doubleword.
+
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%struct.large_arg = type { [8 x i64] }
+%struct.small_arg = type { i16, i8 }
+
+@gl = common global %struct.large_arg zeroinitializer, align 8
+@gs = common global %struct.small_arg zeroinitializer, align 2
+@gf = common global float 0.000000e+00, align 4
+
+define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %struct.large_arg* byval nocapture readnone %pad, %struct.small_arg* byval nocapture readonly %x) {
+entry:
+  %0 = bitcast %struct.small_arg* %x to i32*
+  %1 = bitcast %struct.small_arg* %agg.result to i32*
+  %2 = load i32* %0, align 2
+  store i32 %2, i32* %1, align 2
+  ret void
+}
+; CHECK: @callee1
+; CHECK: lwz {{[0-9]+}}, 120(1)
+; CHECK: blr
+
+define void @caller1() {
+entry:
+  %tmp = alloca %struct.small_arg, align 2
+  call void @test1(%struct.small_arg* sret %tmp, %struct.large_arg* byval @gl, %struct.small_arg* byval @gs)
+  ret void
+}
+; CHECK: @caller1
+; CHECK: stw {{[0-9]+}}, 120(1)
+; CHECK: bl test1
+
+declare void @test1(%struct.small_arg* sret, %struct.large_arg* byval, %struct.small_arg* byval)
+
+define float @callee2(float %pad1, float %pad2, float %pad3, float %pad4, float %pad5, float %pad6, float %pad7, float %pad8, float %pad9, float %pad10, float %pad11, float %pad12, float %pad13, float %x) {
+entry:
+  ret float %x
+}
+; CHECK: @callee2
+; CHECK: lfs {{[0-9]+}}, 152(1)
+; CHECK: blr
+
+define void @caller2() {
+entry:
+  %0 = load float* @gf, align 4
+  %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
+  ret void
+}
+; CHECK: @caller2
+; CHECK: stfs {{[0-9]+}}, 152(1)
+; CHECK: bl test2
+
+declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
+