Expand unaligned 32 bit loads from an address which is a constant

offset from a 32 bit aligned base as follows:

  ldw low, base[offset >> 2]
  ldw high, base[(offset >> 2) + 1]
  shr low_shifted, low, (offset & 0x3) * 8
  shl high_shifted, high, 32 - (offset & 0x3) * 8
  or result, low_shifted, high_shifted

Expand 32 bit loads / stores with 16 bit alignment into two 16 bit
loads / stores.

llvm-svn: 75902
This commit is contained in:
Richard Osborne 2009-07-16 10:42:35 +00:00
parent 25b33cb035
commit bfdc557c8a
3 changed files with 140 additions and 0 deletions

View File

@ -325,6 +325,44 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
}
static bool
IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
int64_t &Offset)
{
if (Addr.getOpcode() != ISD::ADD) {
return false;
}
ConstantSDNode *CN = 0;
if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
return false;
}
int64_t off = CN->getSExtValue();
const SDValue &Base = Addr.getOperand(0);
const SDValue *Root = &Base;
if (Base.getOpcode() == ISD::ADD &&
Base.getOperand(1).getOpcode() == ISD::SHL) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
.getOperand(1));
if (CN && (CN->getSExtValue() >= 2)) {
Root = &Base.getOperand(0);
}
}
if (isa<FrameIndexSDNode>(*Root)) {
// All frame indicies are word aligned
AlignedBase = Base;
Offset = off;
return true;
}
if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
// All dp / cp relative addresses are word aligned
AlignedBase = Base;
Offset = off;
return true;
}
return false;
}
SDValue XCoreTargetLowering::
LowerLOAD(SDValue Op, SelectionDAG &DAG)
{
@ -344,6 +382,61 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
SDValue BasePtr = LD->getBasePtr();
DebugLoc dl = Op.getDebugLoc();
SDValue Base;
int64_t Offset;
if (!LD->isVolatile() &&
IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
if (Offset % 4 == 0) {
// We've managed to infer better alignment information than the load
// already has. Use an aligned load.
return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4);
}
// Lower to
// ldw low, base[offset >> 2]
// ldw high, base[(offset >> 2) + 1]
// shr low_shifted, low, (offset & 0x3) * 8
// shl high_shifted, high, 32 - (offset & 0x3) * 8
// or result, low_shifted, high_shifted
SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
LowAddr, NULL, 4);
SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
HighAddr, NULL, 4);
SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
High.getValue(1));
SDValue Ops[] = { Result, Chain };
return DAG.getMergeValues(Ops, 2, dl);
}
if (LD->getAlignment() == 2) {
int SVOffset = LD->getSrcValueOffset();
SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
LD->isVolatile(), 2);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(1, MVT::i32));
SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
HighAddr, LD->getSrcValue(), SVOffset + 2,
MVT::i16, LD->isVolatile(), 2);
SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
DAG.getConstant(16, MVT::i32));
SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
High.getValue(1));
SDValue Ops[] = { Result, Chain };
return DAG.getMergeValues(Ops, 2, dl);
}
// Lower to a call to __misaligned_load(BasePtr).
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
@ -385,6 +478,22 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
SDValue Value = ST->getValue();
DebugLoc dl = Op.getDebugLoc();
if (ST->getAlignment() == 2) {
int SVOffset = ST->getSrcValueOffset();
SDValue Low = Value;
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
DAG.getConstant(16, MVT::i32));
SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
ST->getSrcValue(), SVOffset, MVT::i16,
ST->isVolatile(), 2);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(1, MVT::i32));
SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
ST->getSrcValue(), SVOffset + 2,
MVT::i16, ST->isVolatile(), 2);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
}
// Lower to a call to __misaligned_store(BasePtr, Value).
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;

View File

@ -1,5 +1,11 @@
; RUN: llvm-as < %s | llc -march=xcore > %t1.s
; RUN: grep "bl __misaligned_load" %t1.s | count 1
; RUN: grep ld16s %t1.s | count 2
; RUN: grep ldw %t1.s | count 2
; RUN: grep shl %t1.s | count 2
; RUN: grep shr %t1.s | count 1
; RUN: grep zext %t1.s | count 1
; RUN: grep "or " %t1.s | count 2
; Byte aligned load. Expands to call to __misaligned_load.
define i32 @align1(i32* %p) nounwind {
@ -7,3 +13,19 @@ entry:
%0 = load i32* %p, align 1 ; <i32> [#uses=1]
ret i32 %0
}
; Half word aligned load. Expands to two 16bit loads.
define i32 @align2(i32* %p) nounwind {
entry:
%0 = load i32* %p, align 2 ; <i32> [#uses=1]
ret i32 %0
}
@a = global [5 x i8] zeroinitializer, align 4
; Constant offset from word aligned base. Expands to two 32bit loads.
define i32 @align3() nounwind {
entry:
%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
ret i32 %0
}

View File

@ -1,5 +1,7 @@
; RUN: llvm-as < %s | llc -march=xcore > %t1.s
; RUN: grep "bl __misaligned_store" %t1.s | count 1
; RUN: grep st16 %t1.s | count 2
; RUN: grep shr %t1.s | count 1
; Byte aligned store. Expands to call to __misaligned_store.
define void @align1(i32* %p, i32 %val) nounwind {
@ -7,3 +9,10 @@ entry:
store i32 %val, i32* %p, align 1
ret void
}
; Half word aligned store. Expands to two 16bit stores.
define void @align2(i32* %p, i32 %val) nounwind {
entry:
store i32 %val, i32* %p, align 2
ret void
}