forked from OSchip/llvm-project
Add support for Neon VEXT (vector extract) shuffles.
This is derived from a patch by Anton Korzh. I modified it to recognize the VEXT shuffles during legalization and lower them to a target-specific DAG node. llvm-svn: 79428
This commit is contained in:
parent
5c947db79c
commit
32cd8550ce
|
@ -487,6 +487,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||||
case ARMISD::VST2D: return "ARMISD::VST2D";
|
case ARMISD::VST2D: return "ARMISD::VST2D";
|
||||||
case ARMISD::VST3D: return "ARMISD::VST3D";
|
case ARMISD::VST3D: return "ARMISD::VST3D";
|
||||||
case ARMISD::VST4D: return "ARMISD::VST4D";
|
case ARMISD::VST4D: return "ARMISD::VST4D";
|
||||||
|
case ARMISD::VEXT: return "ARMISD::VEXT";
|
||||||
case ARMISD::VREV64: return "ARMISD::VREV64";
|
case ARMISD::VREV64: return "ARMISD::VREV64";
|
||||||
case ARMISD::VREV32: return "ARMISD::VREV32";
|
case ARMISD::VREV32: return "ARMISD::VREV32";
|
||||||
case ARMISD::VREV16: return "ARMISD::VREV16";
|
case ARMISD::VREV16: return "ARMISD::VREV16";
|
||||||
|
@ -2343,6 +2344,41 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
|
||||||
SplatBitSize, DAG);
|
SplatBitSize, DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isVEXTMask(ShuffleVectorSDNode *N, bool &ReverseVEXT,
|
||||||
|
unsigned &Imm) {
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
unsigned NumElts = VT.getVectorNumElements();
|
||||||
|
ReverseVEXT = false;
|
||||||
|
Imm = N->getMaskElt(0);
|
||||||
|
|
||||||
|
// If this is a VEXT shuffle, the immediate value is the index of the first
|
||||||
|
// element. The other shuffle indices must be the successive elements after
|
||||||
|
// the first one.
|
||||||
|
unsigned ExpectedElt = Imm;
|
||||||
|
for (unsigned i = 1; i < NumElts; ++i) {
|
||||||
|
|
||||||
|
// Increment the expected index. If it wraps around, it may still be
|
||||||
|
// a VEXT but the source vectors must be swapped.
|
||||||
|
ExpectedElt += 1;
|
||||||
|
if (ExpectedElt == NumElts * 2) {
|
||||||
|
ExpectedElt = 0;
|
||||||
|
ReverseVEXT = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ExpectedElt != static_cast<unsigned>(N->getMaskElt(i)))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adjust the index value if the source operands will be swapped.
|
||||||
|
if (ReverseVEXT)
|
||||||
|
Imm -= NumElts;
|
||||||
|
|
||||||
|
// VEXT only handles 8-bit elements so scale the index for larger elements.
|
||||||
|
Imm *= VT.getVectorElementType().getSizeInBits() / 8;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// isVREVMask - Check if a vector shuffle corresponds to a VREV
|
/// isVREVMask - Check if a vector shuffle corresponds to a VREV
|
||||||
/// instruction with the specified blocksize. (The order of the elements
|
/// instruction with the specified blocksize. (The order of the elements
|
||||||
/// within each block of the vector is reversed.)
|
/// within each block of the vector is reversed.)
|
||||||
|
@ -2460,6 +2496,18 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||||
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0),
|
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0),
|
||||||
DAG.getConstant(Lane, MVT::i32));
|
DAG.getConstant(Lane, MVT::i32));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ReverseVEXT;
|
||||||
|
unsigned Imm;
|
||||||
|
if (isVEXTMask(SVN, ReverseVEXT, Imm)) {
|
||||||
|
SDValue Op0 = SVN->getOperand(0);
|
||||||
|
SDValue Op1 = SVN->getOperand(1);
|
||||||
|
if (ReverseVEXT)
|
||||||
|
std::swap(Op0, Op1);
|
||||||
|
return DAG.getNode(ARMISD::VEXT, dl, VT, Op0, Op1,
|
||||||
|
DAG.getConstant(Imm, MVT::i32));
|
||||||
|
}
|
||||||
|
|
||||||
if (isVREVMask(SVN, 64))
|
if (isVREVMask(SVN, 64))
|
||||||
return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0));
|
return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0));
|
||||||
if (isVREVMask(SVN, 32))
|
if (isVREVMask(SVN, 32))
|
||||||
|
|
|
@ -128,6 +128,7 @@ namespace llvm {
|
||||||
VST4D,
|
VST4D,
|
||||||
|
|
||||||
// Vector shuffles:
|
// Vector shuffles:
|
||||||
|
VEXT, // extract
|
||||||
VREV64, // reverse elements within 64-bit doublewords
|
VREV64, // reverse elements within 64-bit doublewords
|
||||||
VREV32, // reverse elements within 32-bit words
|
VREV32, // reverse elements within 32-bit words
|
||||||
VREV16 // reverse elements within 16-bit halfwords
|
VREV16 // reverse elements within 16-bit halfwords
|
||||||
|
|
|
@ -100,6 +100,10 @@ def NEONvst3d : SDNode<"ARMISD::VST3D", SDTARMVST3,
|
||||||
def NEONvst4d : SDNode<"ARMISD::VST4D", SDTARMVST4,
|
def NEONvst4d : SDNode<"ARMISD::VST4D", SDTARMVST4,
|
||||||
[SDNPHasChain, SDNPMayStore]>;
|
[SDNPHasChain, SDNPMayStore]>;
|
||||||
|
|
||||||
|
def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
|
||||||
|
SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
|
||||||
|
def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
|
||||||
|
|
||||||
def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
||||||
def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
|
def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
|
||||||
def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
|
def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
|
||||||
|
@ -1941,6 +1945,21 @@ class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
|
||||||
def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>;
|
def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>;
|
||||||
def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>;
|
def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>;
|
||||||
|
|
||||||
|
// Other Vector Shuffles.
|
||||||
|
|
||||||
|
// VEXT : Vector Extract
|
||||||
|
|
||||||
|
def VEXTd : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst),
|
||||||
|
(ins DPR:$lhs, DPR:$rhs, i32imm:$index), NoItinerary,
|
||||||
|
"vext.8\t$dst, $lhs, $rhs, $index", "",
|
||||||
|
[(set DPR:$dst, (v8i8 (NEONvext (v8i8 DPR:$lhs),
|
||||||
|
(v8i8 DPR:$rhs), imm:$index)))]>;
|
||||||
|
def VEXTq : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst),
|
||||||
|
(ins QPR:$lhs, QPR:$rhs, i32imm:$index), NoItinerary,
|
||||||
|
"vext.8\t$dst, $lhs, $rhs, $index", "",
|
||||||
|
[(set QPR:$dst, (v16i8 (NEONvext (v16i8 QPR:$lhs),
|
||||||
|
(v16i8 QPR:$rhs), imm:$index)))]>;
|
||||||
|
|
||||||
// VTRN : Vector Transpose
|
// VTRN : Vector Transpose
|
||||||
|
|
||||||
def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">;
|
def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">;
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
|
||||||
|
|
||||||
|
define arm_apcscc <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||||
|
;CHECK: test_vextd:
|
||||||
|
;CHECK: vext
|
||||||
|
%tmp1 = load <8 x i8>* %A
|
||||||
|
%tmp2 = load <8 x i8>* %B
|
||||||
|
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
|
||||||
|
ret <8 x i8> %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_apcscc <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||||
|
;CHECK: test_vextRd:
|
||||||
|
;CHECK: vext
|
||||||
|
%tmp1 = load <8 x i8>* %A
|
||||||
|
%tmp2 = load <8 x i8>* %B
|
||||||
|
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
|
||||||
|
ret <8 x i8> %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_apcscc <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
||||||
|
;CHECK: test_vextq:
|
||||||
|
;CHECK: vext
|
||||||
|
%tmp1 = load <16 x i8>* %A
|
||||||
|
%tmp2 = load <16 x i8>* %B
|
||||||
|
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
|
||||||
|
ret <16 x i8> %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_apcscc <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
|
||||||
|
;CHECK: test_vextRq:
|
||||||
|
;CHECK: vext
|
||||||
|
%tmp1 = load <16 x i8>* %A
|
||||||
|
%tmp2 = load <16 x i8>* %B
|
||||||
|
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
|
||||||
|
ret <16 x i8> %tmp3
|
||||||
|
}
|
Loading…
Reference in New Issue