forked from OSchip/llvm-project
Expand unaligned 32 bit loads from an address which is a constant
offset from a 32 bit aligned base as follows: ldw low, base[offset >> 2] ldw high, base[(offset >> 2) + 1] shr low_shifted, low, (offset & 0x3) * 8 shl high_shifted, high, 32 - (offset & 0x3) * 8 or result, low_shifted, high_shifted Expand 32 bit loads / stores with 16 bit alignment into two 16 bit loads / stores. llvm-svn: 75902
This commit is contained in:
parent
25b33cb035
commit
bfdc557c8a
|
@ -325,6 +325,44 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
|
|||
return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
|
||||
}
|
||||
|
||||
static bool
|
||||
IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
|
||||
int64_t &Offset)
|
||||
{
|
||||
if (Addr.getOpcode() != ISD::ADD) {
|
||||
return false;
|
||||
}
|
||||
ConstantSDNode *CN = 0;
|
||||
if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
||||
return false;
|
||||
}
|
||||
int64_t off = CN->getSExtValue();
|
||||
const SDValue &Base = Addr.getOperand(0);
|
||||
const SDValue *Root = &Base;
|
||||
if (Base.getOpcode() == ISD::ADD &&
|
||||
Base.getOperand(1).getOpcode() == ISD::SHL) {
|
||||
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
|
||||
.getOperand(1));
|
||||
if (CN && (CN->getSExtValue() >= 2)) {
|
||||
Root = &Base.getOperand(0);
|
||||
}
|
||||
}
|
||||
if (isa<FrameIndexSDNode>(*Root)) {
|
||||
// All frame indicies are word aligned
|
||||
AlignedBase = Base;
|
||||
Offset = off;
|
||||
return true;
|
||||
}
|
||||
if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
|
||||
Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
|
||||
// All dp / cp relative addresses are word aligned
|
||||
AlignedBase = Base;
|
||||
Offset = off;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue XCoreTargetLowering::
|
||||
LowerLOAD(SDValue Op, SelectionDAG &DAG)
|
||||
{
|
||||
|
@ -344,6 +382,61 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
|
|||
SDValue BasePtr = LD->getBasePtr();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
SDValue Base;
|
||||
int64_t Offset;
|
||||
if (!LD->isVolatile() &&
|
||||
IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
|
||||
if (Offset % 4 == 0) {
|
||||
// We've managed to infer better alignment information than the load
|
||||
// already has. Use an aligned load.
|
||||
return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4);
|
||||
}
|
||||
// Lower to
|
||||
// ldw low, base[offset >> 2]
|
||||
// ldw high, base[(offset >> 2) + 1]
|
||||
// shr low_shifted, low, (offset & 0x3) * 8
|
||||
// shl high_shifted, high, 32 - (offset & 0x3) * 8
|
||||
// or result, low_shifted, high_shifted
|
||||
SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
|
||||
SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
|
||||
SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
|
||||
SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
|
||||
|
||||
SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
|
||||
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
|
||||
|
||||
SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
|
||||
LowAddr, NULL, 4);
|
||||
SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
|
||||
HighAddr, NULL, 4);
|
||||
SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
|
||||
SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
|
||||
SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
|
||||
High.getValue(1));
|
||||
SDValue Ops[] = { Result, Chain };
|
||||
return DAG.getMergeValues(Ops, 2, dl);
|
||||
}
|
||||
|
||||
if (LD->getAlignment() == 2) {
|
||||
int SVOffset = LD->getSrcValueOffset();
|
||||
SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
|
||||
BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
|
||||
LD->isVolatile(), 2);
|
||||
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
|
||||
DAG.getConstant(1, MVT::i32));
|
||||
SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
|
||||
HighAddr, LD->getSrcValue(), SVOffset + 2,
|
||||
MVT::i16, LD->isVolatile(), 2);
|
||||
SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
|
||||
DAG.getConstant(16, MVT::i32));
|
||||
SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
|
||||
High.getValue(1));
|
||||
SDValue Ops[] = { Result, Chain };
|
||||
return DAG.getMergeValues(Ops, 2, dl);
|
||||
}
|
||||
|
||||
// Lower to a call to __misaligned_load(BasePtr).
|
||||
const Type *IntPtrTy = getTargetData()->getIntPtrType();
|
||||
TargetLowering::ArgListTy Args;
|
||||
|
@ -385,6 +478,22 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
|
|||
SDValue Value = ST->getValue();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
if (ST->getAlignment() == 2) {
|
||||
int SVOffset = ST->getSrcValueOffset();
|
||||
SDValue Low = Value;
|
||||
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
|
||||
DAG.getConstant(16, MVT::i32));
|
||||
SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
|
||||
ST->getSrcValue(), SVOffset, MVT::i16,
|
||||
ST->isVolatile(), 2);
|
||||
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
|
||||
DAG.getConstant(1, MVT::i32));
|
||||
SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
|
||||
ST->getSrcValue(), SVOffset + 2,
|
||||
MVT::i16, ST->isVolatile(), 2);
|
||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
|
||||
}
|
||||
|
||||
// Lower to a call to __misaligned_store(BasePtr, Value).
|
||||
const Type *IntPtrTy = getTargetData()->getIntPtrType();
|
||||
TargetLowering::ArgListTy Args;
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
; RUN: llvm-as < %s | llc -march=xcore > %t1.s
|
||||
; RUN: grep "bl __misaligned_load" %t1.s | count 1
|
||||
; RUN: grep ld16s %t1.s | count 2
|
||||
; RUN: grep ldw %t1.s | count 2
|
||||
; RUN: grep shl %t1.s | count 2
|
||||
; RUN: grep shr %t1.s | count 1
|
||||
; RUN: grep zext %t1.s | count 1
|
||||
; RUN: grep "or " %t1.s | count 2
|
||||
|
||||
; Byte aligned load. Expands to call to __misaligned_load.
|
||||
define i32 @align1(i32* %p) nounwind {
|
||||
|
@ -7,3 +13,19 @@ entry:
|
|||
%0 = load i32* %p, align 1 ; <i32> [#uses=1]
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
; Half word aligned load. Expands to two 16bit loads.
|
||||
define i32 @align2(i32* %p) nounwind {
|
||||
entry:
|
||||
%0 = load i32* %p, align 2 ; <i32> [#uses=1]
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
@a = global [5 x i8] zeroinitializer, align 4
|
||||
|
||||
; Constant offset from word aligned base. Expands to two 32bit loads.
|
||||
define i32 @align3() nounwind {
|
||||
entry:
|
||||
%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
|
||||
ret i32 %0
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: llvm-as < %s | llc -march=xcore > %t1.s
|
||||
; RUN: grep "bl __misaligned_store" %t1.s | count 1
|
||||
; RUN: grep st16 %t1.s | count 2
|
||||
; RUN: grep shr %t1.s | count 1
|
||||
|
||||
; Byte aligned store. Expands to call to __misaligned_store.
|
||||
define void @align1(i32* %p, i32 %val) nounwind {
|
||||
|
@ -7,3 +9,10 @@ entry:
|
|||
store i32 %val, i32* %p, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Half word aligned store. Expands to two 16bit stores.
|
||||
define void @align2(i32* %p, i32 %val) nounwind {
|
||||
entry:
|
||||
store i32 %val, i32* %p, align 2
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue