forked from OSchip/llvm-project
Expand unaligned 32 bit loads from an address which is a constant
offset from a 32 bit aligned base as follows: ldw low, base[offset >> 2] ldw high, base[(offset >> 2) + 1] shr low_shifted, low, (offset & 0x3) * 8 shl high_shifted, high, 32 - (offset & 0x3) * 8 or result, low_shifted, high_shifted Expand 32 bit loads / stores with 16 bit alignment into two 16 bit loads / stores. llvm-svn: 75902
This commit is contained in:
parent
25b33cb035
commit
bfdc557c8a
|
@ -325,6 +325,44 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
|
||||||
return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
|
return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
|
||||||
|
int64_t &Offset)
|
||||||
|
{
|
||||||
|
if (Addr.getOpcode() != ISD::ADD) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ConstantSDNode *CN = 0;
|
||||||
|
if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int64_t off = CN->getSExtValue();
|
||||||
|
const SDValue &Base = Addr.getOperand(0);
|
||||||
|
const SDValue *Root = &Base;
|
||||||
|
if (Base.getOpcode() == ISD::ADD &&
|
||||||
|
Base.getOperand(1).getOpcode() == ISD::SHL) {
|
||||||
|
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
|
||||||
|
.getOperand(1));
|
||||||
|
if (CN && (CN->getSExtValue() >= 2)) {
|
||||||
|
Root = &Base.getOperand(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isa<FrameIndexSDNode>(*Root)) {
|
||||||
|
// All frame indicies are word aligned
|
||||||
|
AlignedBase = Base;
|
||||||
|
Offset = off;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
|
||||||
|
Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
|
||||||
|
// All dp / cp relative addresses are word aligned
|
||||||
|
AlignedBase = Base;
|
||||||
|
Offset = off;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
SDValue XCoreTargetLowering::
|
SDValue XCoreTargetLowering::
|
||||||
LowerLOAD(SDValue Op, SelectionDAG &DAG)
|
LowerLOAD(SDValue Op, SelectionDAG &DAG)
|
||||||
{
|
{
|
||||||
|
@ -344,6 +382,61 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
|
||||||
SDValue BasePtr = LD->getBasePtr();
|
SDValue BasePtr = LD->getBasePtr();
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
|
|
||||||
|
SDValue Base;
|
||||||
|
int64_t Offset;
|
||||||
|
if (!LD->isVolatile() &&
|
||||||
|
IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
|
||||||
|
if (Offset % 4 == 0) {
|
||||||
|
// We've managed to infer better alignment information than the load
|
||||||
|
// already has. Use an aligned load.
|
||||||
|
return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4);
|
||||||
|
}
|
||||||
|
// Lower to
|
||||||
|
// ldw low, base[offset >> 2]
|
||||||
|
// ldw high, base[(offset >> 2) + 1]
|
||||||
|
// shr low_shifted, low, (offset & 0x3) * 8
|
||||||
|
// shl high_shifted, high, 32 - (offset & 0x3) * 8
|
||||||
|
// or result, low_shifted, high_shifted
|
||||||
|
SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
|
||||||
|
SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
|
||||||
|
SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
|
||||||
|
SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
|
||||||
|
|
||||||
|
SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
|
||||||
|
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
|
||||||
|
|
||||||
|
SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
|
||||||
|
LowAddr, NULL, 4);
|
||||||
|
SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
|
||||||
|
HighAddr, NULL, 4);
|
||||||
|
SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
|
||||||
|
SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
|
||||||
|
SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
|
||||||
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
|
||||||
|
High.getValue(1));
|
||||||
|
SDValue Ops[] = { Result, Chain };
|
||||||
|
return DAG.getMergeValues(Ops, 2, dl);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LD->getAlignment() == 2) {
|
||||||
|
int SVOffset = LD->getSrcValueOffset();
|
||||||
|
SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
|
||||||
|
BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
|
||||||
|
LD->isVolatile(), 2);
|
||||||
|
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
|
||||||
|
DAG.getConstant(1, MVT::i32));
|
||||||
|
SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
|
||||||
|
HighAddr, LD->getSrcValue(), SVOffset + 2,
|
||||||
|
MVT::i16, LD->isVolatile(), 2);
|
||||||
|
SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
|
||||||
|
DAG.getConstant(16, MVT::i32));
|
||||||
|
SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
|
||||||
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
|
||||||
|
High.getValue(1));
|
||||||
|
SDValue Ops[] = { Result, Chain };
|
||||||
|
return DAG.getMergeValues(Ops, 2, dl);
|
||||||
|
}
|
||||||
|
|
||||||
// Lower to a call to __misaligned_load(BasePtr).
|
// Lower to a call to __misaligned_load(BasePtr).
|
||||||
const Type *IntPtrTy = getTargetData()->getIntPtrType();
|
const Type *IntPtrTy = getTargetData()->getIntPtrType();
|
||||||
TargetLowering::ArgListTy Args;
|
TargetLowering::ArgListTy Args;
|
||||||
|
@ -385,6 +478,22 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
|
||||||
SDValue Value = ST->getValue();
|
SDValue Value = ST->getValue();
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
|
|
||||||
|
if (ST->getAlignment() == 2) {
|
||||||
|
int SVOffset = ST->getSrcValueOffset();
|
||||||
|
SDValue Low = Value;
|
||||||
|
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
|
||||||
|
DAG.getConstant(16, MVT::i32));
|
||||||
|
SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
|
||||||
|
ST->getSrcValue(), SVOffset, MVT::i16,
|
||||||
|
ST->isVolatile(), 2);
|
||||||
|
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
|
||||||
|
DAG.getConstant(1, MVT::i32));
|
||||||
|
SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
|
||||||
|
ST->getSrcValue(), SVOffset + 2,
|
||||||
|
MVT::i16, ST->isVolatile(), 2);
|
||||||
|
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
|
||||||
|
}
|
||||||
|
|
||||||
// Lower to a call to __misaligned_store(BasePtr, Value).
|
// Lower to a call to __misaligned_store(BasePtr, Value).
|
||||||
const Type *IntPtrTy = getTargetData()->getIntPtrType();
|
const Type *IntPtrTy = getTargetData()->getIntPtrType();
|
||||||
TargetLowering::ArgListTy Args;
|
TargetLowering::ArgListTy Args;
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
; RUN: llvm-as < %s | llc -march=xcore > %t1.s
|
; RUN: llvm-as < %s | llc -march=xcore > %t1.s
|
||||||
; RUN: grep "bl __misaligned_load" %t1.s | count 1
|
; RUN: grep "bl __misaligned_load" %t1.s | count 1
|
||||||
|
; RUN: grep ld16s %t1.s | count 2
|
||||||
|
; RUN: grep ldw %t1.s | count 2
|
||||||
|
; RUN: grep shl %t1.s | count 2
|
||||||
|
; RUN: grep shr %t1.s | count 1
|
||||||
|
; RUN: grep zext %t1.s | count 1
|
||||||
|
; RUN: grep "or " %t1.s | count 2
|
||||||
|
|
||||||
; Byte aligned load. Expands to call to __misaligned_load.
|
; Byte aligned load. Expands to call to __misaligned_load.
|
||||||
define i32 @align1(i32* %p) nounwind {
|
define i32 @align1(i32* %p) nounwind {
|
||||||
|
@ -7,3 +13,19 @@ entry:
|
||||||
%0 = load i32* %p, align 1 ; <i32> [#uses=1]
|
%0 = load i32* %p, align 1 ; <i32> [#uses=1]
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Half word aligned load. Expands to two 16bit loads.
|
||||||
|
define i32 @align2(i32* %p) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = load i32* %p, align 2 ; <i32> [#uses=1]
|
||||||
|
ret i32 %0
|
||||||
|
}
|
||||||
|
|
||||||
|
@a = global [5 x i8] zeroinitializer, align 4
|
||||||
|
|
||||||
|
; Constant offset from word aligned base. Expands to two 32bit loads.
|
||||||
|
define i32 @align3() nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
|
||||||
|
ret i32 %0
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
; RUN: llvm-as < %s | llc -march=xcore > %t1.s
|
; RUN: llvm-as < %s | llc -march=xcore > %t1.s
|
||||||
; RUN: grep "bl __misaligned_store" %t1.s | count 1
|
; RUN: grep "bl __misaligned_store" %t1.s | count 1
|
||||||
|
; RUN: grep st16 %t1.s | count 2
|
||||||
|
; RUN: grep shr %t1.s | count 1
|
||||||
|
|
||||||
; Byte aligned store. Expands to call to __misaligned_store.
|
; Byte aligned store. Expands to call to __misaligned_store.
|
||||||
define void @align1(i32* %p, i32 %val) nounwind {
|
define void @align1(i32* %p, i32 %val) nounwind {
|
||||||
|
@ -7,3 +9,10 @@ entry:
|
||||||
store i32 %val, i32* %p, align 1
|
store i32 %val, i32* %p, align 1
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Half word aligned store. Expands to two 16bit stores.
|
||||||
|
define void @align2(i32* %p, i32 %val) nounwind {
|
||||||
|
entry:
|
||||||
|
store i32 %val, i32* %p, align 2
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue