forked from OSchip/llvm-project
Add support for alignment operands on VLD1-lane instructions.
This is another part of the fix for Radar 8599955. llvm-svn: 117976
This commit is contained in:
parent
7e57573844
commit
dd9fbaa9c0
|
@ -116,7 +116,7 @@ public:
|
|||
bool SelectAddrMode4(SDValue N, SDValue &Addr, SDValue &Mode);
|
||||
bool SelectAddrMode5(SDValue N, SDValue &Base,
|
||||
SDValue &Offset);
|
||||
bool SelectAddrMode6(SDValue N, SDValue &Addr, SDValue &Align);
|
||||
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
|
||||
|
||||
bool SelectAddrModePC(SDValue N, SDValue &Offset,
|
||||
SDValue &Label);
|
||||
|
@ -222,6 +222,9 @@ private:
|
|||
SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
|
||||
// Get the alignment operand for a NEON VLD or VST instruction.
|
||||
SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -769,10 +772,26 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ARMDAGToDAGISel::SelectAddrMode6(SDValue N, SDValue &Addr, SDValue &Align){
|
||||
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
|
||||
SDValue &Align) {
|
||||
Addr = N;
|
||||
// Default to no alignment.
|
||||
Align = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
|
||||
unsigned Alignment = 0;
|
||||
if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
|
||||
// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
|
||||
// The maximum alignment is equal to the memory size being referenced.
|
||||
unsigned LSNAlign = LSN->getAlignment();
|
||||
unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
|
||||
if (LSNAlign > MemSize && MemSize > 1)
|
||||
Alignment = MemSize;
|
||||
} else {
|
||||
// All other uses of addrmode6 are for intrinsics. For now just record
|
||||
// the raw alignment value; it will be refined later based on the legal
|
||||
// alignment operands for the intrinsic.
|
||||
Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
|
||||
}
|
||||
|
||||
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1261,19 +1280,23 @@ SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
|
|||
/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
|
||||
/// of a NEON VLD or VST instruction. The supported values depend on the
|
||||
/// number of registers being loaded.
|
||||
static unsigned GetVLDSTAlign(SDNode *N, unsigned NumVecs, bool is64BitVector) {
|
||||
SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
|
||||
bool is64BitVector) {
|
||||
unsigned NumRegs = NumVecs;
|
||||
if (!is64BitVector && NumVecs < 3)
|
||||
NumRegs *= 2;
|
||||
|
||||
unsigned Alignment = cast<MemIntrinsicSDNode>(N)->getAlignment();
|
||||
unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
|
||||
if (Alignment >= 32 && NumRegs == 4)
|
||||
return 32;
|
||||
if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
|
||||
return 16;
|
||||
if (Alignment >= 8)
|
||||
return 8;
|
||||
return 0;
|
||||
Alignment = 32;
|
||||
else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
|
||||
Alignment = 16;
|
||||
else if (Alignment >= 8)
|
||||
Alignment = 8;
|
||||
else
|
||||
Alignment = 0;
|
||||
|
||||
return CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
}
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
||||
|
@ -1283,15 +1306,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
|||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
SDValue MemAddr, Align;
|
||||
if (!SelectAddrMode6(N->getOperand(2), MemAddr, Align))
|
||||
if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
|
||||
return NULL;
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
bool is64BitVector = VT.is64BitVector();
|
||||
|
||||
unsigned Alignment = GetVLDSTAlign(N, NumVecs, is64BitVector);
|
||||
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
|
||||
|
||||
unsigned OpcodeIndex;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
|
@ -1397,15 +1418,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
|||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
SDValue MemAddr, Align;
|
||||
if (!SelectAddrMode6(N->getOperand(2), MemAddr, Align))
|
||||
if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
|
||||
return NULL;
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
EVT VT = N->getOperand(3).getValueType();
|
||||
bool is64BitVector = VT.is64BitVector();
|
||||
|
||||
unsigned Alignment = GetVLDSTAlign(N, NumVecs, is64BitVector);
|
||||
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
|
||||
|
||||
unsigned OpcodeIndex;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
|
@ -1520,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
|
|||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
SDValue MemAddr, Align;
|
||||
if (!SelectAddrMode6(N->getOperand(2), MemAddr, Align))
|
||||
if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
|
||||
return NULL;
|
||||
|
||||
SDValue Chain = N->getOperand(0);
|
||||
|
@ -1529,16 +1548,18 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
|
|||
EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
|
||||
bool is64BitVector = VT.is64BitVector();
|
||||
|
||||
unsigned Alignment = 0;
|
||||
if (NumVecs != 3) {
|
||||
unsigned Alignment = cast<MemIntrinsicSDNode>(N)->getAlignment();
|
||||
Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
|
||||
unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
|
||||
if (Alignment > NumBytes)
|
||||
Alignment = NumBytes;
|
||||
// Alignment must be a power of two; make sure of that.
|
||||
Alignment = (Alignment & -Alignment);
|
||||
if (Alignment > 1)
|
||||
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
if (Alignment == 1)
|
||||
Alignment = 0;
|
||||
}
|
||||
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
|
||||
unsigned OpcodeIndex;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
|
|
|
@ -469,7 +469,7 @@ def addrmode5 : Operand<i32>,
|
|||
// addrmode6 := reg with optional writeback
|
||||
//
|
||||
def addrmode6 : Operand<i32>,
|
||||
ComplexPattern<i32, 2, "SelectAddrMode6", []> {
|
||||
ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
|
||||
let PrintMethod = "printAddrMode6Operand";
|
||||
let MIOperandInfo = (ops GPR:$addr, i32imm);
|
||||
}
|
||||
|
|
|
@ -2,27 +2,30 @@
|
|||
|
||||
define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
|
||||
;CHECK: vld1lanei8:
|
||||
;Check the (default) alignment value.
|
||||
;CHECK: vld1.8 {d16[3]}, [r0]
|
||||
%tmp1 = load <8 x i8>* %B
|
||||
%tmp2 = load i8* %A, align 1
|
||||
%tmp2 = load i8* %A, align 8
|
||||
%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
|
||||
;CHECK: vld1lanei16:
|
||||
;CHECK: vld1.16 {d16[2]}, [r0]
|
||||
;Check the alignment value. Max for this instruction is 16 bits:
|
||||
;CHECK: vld1.16 {d16[2]}, [r0, :16]
|
||||
%tmp1 = load <4 x i16>* %B
|
||||
%tmp2 = load i16* %A, align 2
|
||||
%tmp2 = load i16* %A, align 8
|
||||
%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
|
||||
;CHECK: vld1lanei32:
|
||||
;CHECK: vld1.32 {d16[1]}, [r0]
|
||||
;Check the alignment value. Max for this instruction is 16 bits:
|
||||
;CHECK: vld1.32 {d16[1]}, [r0, :32]
|
||||
%tmp1 = load <2 x i32>* %B
|
||||
%tmp2 = load i32* %A, align 4
|
||||
%tmp2 = load i32* %A, align 8
|
||||
%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
@ -31,25 +34,25 @@ define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
|
|||
;CHECK: vld1laneQi8:
|
||||
;CHECK: vld1.8 {d17[1]}, [r0]
|
||||
%tmp1 = load <16 x i8>* %B
|
||||
%tmp2 = load i8* %A, align 1
|
||||
%tmp2 = load i8* %A, align 8
|
||||
%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
|
||||
ret <16 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
|
||||
;CHECK: vld1laneQi16:
|
||||
;CHECK: vld1.16 {d17[1]}, [r0]
|
||||
;CHECK: vld1.16 {d17[1]}, [r0, :16]
|
||||
%tmp1 = load <8 x i16>* %B
|
||||
%tmp2 = load i16* %A, align 2
|
||||
%tmp2 = load i16* %A, align 8
|
||||
%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
|
||||
ret <8 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
|
||||
;CHECK: vld1laneQi32:
|
||||
;CHECK: vld1.32 {d17[1]}, [r0]
|
||||
;CHECK: vld1.32 {d17[1]}, [r0, :32]
|
||||
%tmp1 = load <4 x i32>* %B
|
||||
%tmp2 = load i32* %A, align 4
|
||||
%tmp2 = load i32* %A, align 8
|
||||
%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
|
||||
ret <4 x i32> %tmp3
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue