Expand unaligned 32 bit loads from an address which is a constant

offset from a 32 bit aligned base as follows: ldw low, base[offset >> 2] ldw high, base[(offset >> 2) + 1] shr low_shifted, low, (offset & 0x3) * 8 shl high_shifted, high, 32 - (offset & 0x3) * 8 or result, low_shifted, high_shifted Expand 32 bit loads / stores with 16 bit alignment into two 16 bit loads / stores. llvm-svn: 75902
2009-07-16 10:42:35 +00:00 · 2009-07-16 10:42:35 +00:00 · bfdc557c8a
parent 25b33cb035
commit bfdc557c8a
3 changed files with 140 additions and 0 deletions
--- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@ -325,6 +325,44 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
  return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
 }
 static bool
 IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
                                    int64_t &Offset)
 {
  if (Addr.getOpcode() != ISD::ADD) {
    return false;
  }
  ConstantSDNode *CN = 0;
  if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    return false;
  }
  int64_t off = CN->getSExtValue();
  const SDValue &Base = Addr.getOperand(0);
  const SDValue *Root = &Base;
  if (Base.getOpcode() == ISD::ADD &&
      Base.getOperand(1).getOpcode() == ISD::SHL) {
    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
                                                      .getOperand(1));
    if (CN && (CN->getSExtValue() >= 2)) {
      Root = &Base.getOperand(0);
    }
  }
  if (isa<FrameIndexSDNode>(*Root)) {
    // All frame indicies are word aligned
    AlignedBase = Base;
    Offset = off;
    return true;
  }
  if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
      Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
    // All dp / cp relative addresses are word aligned
    AlignedBase = Base;
    Offset = off;
    return true;
  }
  return false;
 }
 SDValue XCoreTargetLowering::
 LowerLOAD(SDValue Op, SelectionDAG &DAG)
 {
@ -344,6 +382,61 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
  SDValue BasePtr = LD->getBasePtr();
  DebugLoc dl = Op.getDebugLoc();
  SDValue Base;
  int64_t Offset;
  if (!LD->isVolatile() &&
      IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
    if (Offset % 4 == 0) {
      // We've managed to infer better alignment information than the load
      // already has. Use an aligned load.
      return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4);
    }
    // Lower to
    // ldw low, base[offset >> 2]
    // ldw high, base[(offset >> 2) + 1]
    // shr low_shifted, low, (offset & 0x3) * 8
    // shl high_shifted, high, 32 - (offset & 0x3) * 8
    // or result, low_shifted, high_shifted
    SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
    SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
    SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
    SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
    SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
    SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
                               LowAddr, NULL, 4);
    SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
                               HighAddr, NULL, 4);
    SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
    SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
    SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
                             High.getValue(1));
    SDValue Ops[] = { Result, Chain };
    return DAG.getMergeValues(Ops, 2, dl);
  }
  if (LD->getAlignment() == 2) {
    int SVOffset = LD->getSrcValueOffset();
    SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
                                 BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
                                 LD->isVolatile(), 2);
    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
                                   DAG.getConstant(1, MVT::i32));
    SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
                                  HighAddr, LD->getSrcValue(), SVOffset + 2,
                                  MVT::i16, LD->isVolatile(), 2);
    SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
                                      DAG.getConstant(16, MVT::i32));
    SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
                             High.getValue(1));
    SDValue Ops[] = { Result, Chain };
    return DAG.getMergeValues(Ops, 2, dl);
  }
  // Lower to a call to __misaligned_load(BasePtr).
  const Type *IntPtrTy = getTargetData()->getIntPtrType();
  TargetLowering::ArgListTy Args;
@ -385,6 +478,22 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
  SDValue Value = ST->getValue();
  DebugLoc dl = Op.getDebugLoc();
  if (ST->getAlignment() == 2) {
    int SVOffset = ST->getSrcValueOffset();
    SDValue Low = Value;
    SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
                                      DAG.getConstant(16, MVT::i32));
    SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
                                         ST->getSrcValue(), SVOffset, MVT::i16,
                                         ST->isVolatile(), 2);
    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
                                   DAG.getConstant(1, MVT::i32));
    SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
                                          ST->getSrcValue(), SVOffset + 2,
                                          MVT::i16, ST->isVolatile(), 2);
    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
  }
  // Lower to a call to __misaligned_store(BasePtr, Value).
  const Type *IntPtrTy = getTargetData()->getIntPtrType();
  TargetLowering::ArgListTy Args;
--- a/llvm/test/CodeGen/XCore/unaligned_load.ll
+++ b/llvm/test/CodeGen/XCore/unaligned_load.ll
@ -1,5 +1,11 @@
 ; RUN: llvm-as < %s | llc -march=xcore > %t1.s
 ; RUN: grep "bl __misaligned_load" %t1.s | count 1
 ; RUN: grep ld16s %t1.s | count 2
 ; RUN: grep ldw %t1.s | count 2
 ; RUN: grep shl %t1.s | count 2
 ; RUN: grep shr %t1.s | count 1
 ; RUN: grep zext %t1.s | count 1
 ; RUN: grep "or " %t1.s | count 2
 ; Byte aligned load. Expands to call to __misaligned_load.
 define i32 @align1(i32* %p) nounwind {
@ -7,3 +13,19 @@ entry:
 	%0 = load i32* %p, align 1		; <i32> [#uses=1]
 	ret i32 %0
 }
 ; Half word aligned load. Expands to two 16bit loads.
 define i32 @align2(i32* %p) nounwind {
 entry:
 	%0 = load i32* %p, align 2		; <i32> [#uses=1]
 	ret i32 %0
 }
@a = global [5 x i8] zeroinitializer, align 4
 ; Constant offset from word aligned base. Expands to two 32bit loads.
 define i32 @align3() nounwind {
 entry:
 	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
 	ret i32 %0
 }
--- a/llvm/test/CodeGen/XCore/unaligned_store.ll
+++ b/llvm/test/CodeGen/XCore/unaligned_store.ll
@ -1,5 +1,7 @@
 ; RUN: llvm-as < %s | llc -march=xcore > %t1.s
 ; RUN: grep "bl __misaligned_store" %t1.s | count 1
 ; RUN: grep st16 %t1.s | count 2
 ; RUN: grep shr %t1.s | count 1
 ; Byte aligned store. Expands to call to __misaligned_store.
 define void @align1(i32* %p, i32 %val) nounwind {
@ -7,3 +9,10 @@ entry:
 	store i32 %val, i32* %p, align 1
 	ret void
 }
 ; Half word aligned store. Expands to two 16bit stores.
 define void @align2(i32* %p, i32 %val) nounwind {
 entry:
 	store i32 %val, i32* %p, align 2
 	ret void
 }