forked from OSchip/llvm-project
[PowerPC] Avoid scalarization of vector truncate
The PowerPC code generator currently scalarizes vector truncates that would fit in a vector register, resulting in vector extracts, scalar operations, and vector merges. This patch custom lowers a vector truncate that would fit in a register to a vector shuffle instead. Differential Revision: https://reviews.llvm.org/D56507 llvm-svn: 353724
This commit is contained in:
parent
ebdb021031
commit
732fe22454
|
@ -118,6 +118,8 @@ STATISTIC(NumSiblingCalls, "Number of sibling calls");
|
|||
|
||||
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
|
||||
|
||||
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
|
||||
|
||||
// FIXME: Remove this once the bug has been fixed!
|
||||
extern cl::opt<bool> ANDIGlueBug;
|
||||
|
||||
|
@ -639,6 +641,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
// with merges, splats, etc.
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
|
||||
|
||||
// Vector truncates to sub-word integer that fit in an Altivec/VSX register
|
||||
// are cheap, so handle them before they get expanded to scalar.
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
|
||||
|
||||
setOperationAction(ISD::AND , MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::OR , MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
|
||||
|
@ -6794,6 +6804,61 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
|||
Op.getOperand(0));
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
// Implements a vector truncate that fits in a vector register as a shuffle.
|
||||
// We want to legalize vector truncates down to where the source fits in
|
||||
// a vector register (and target is therefore smaller than vector register
|
||||
// size). At that point legalization will try to custom lower the sub-legal
|
||||
// result and get here - where we can contain the truncate as a single target
|
||||
// operation.
|
||||
|
||||
// For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
|
||||
// <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
|
||||
//
|
||||
// We will implement it for big-endian ordering as this (where x denotes
|
||||
// undefined):
|
||||
// < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
|
||||
// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
|
||||
//
|
||||
// The same operation in little-endian ordering will be:
|
||||
// <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
|
||||
// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
|
||||
|
||||
assert(Op.getValueType().isVector() && "Vector type expected.");
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue N1 = Op.getOperand(0);
|
||||
unsigned SrcSize = N1.getValueType().getSizeInBits();
|
||||
assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
|
||||
SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
|
||||
|
||||
EVT TrgVT = Op.getValueType();
|
||||
unsigned TrgNumElts = TrgVT.getVectorNumElements();
|
||||
EVT EltVT = TrgVT.getVectorElementType();
|
||||
unsigned WideNumElts = 128 / EltVT.getSizeInBits();
|
||||
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
|
||||
|
||||
// First list the elements we want to keep.
|
||||
unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
|
||||
SmallVector<int, 16> ShuffV;
|
||||
if (Subtarget.isLittleEndian())
|
||||
for (unsigned i = 0; i < TrgNumElts; ++i)
|
||||
ShuffV.push_back(i * SizeMult);
|
||||
else
|
||||
for (unsigned i = 1; i <= TrgNumElts; ++i)
|
||||
ShuffV.push_back(i * SizeMult - 1);
|
||||
|
||||
// Populate the remaining elements with undefs.
|
||||
for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
|
||||
// ShuffV.push_back(i + WideNumElts);
|
||||
ShuffV.push_back(WideNumElts + 1);
|
||||
|
||||
SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
|
||||
return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
|
||||
}
|
||||
|
||||
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
|
||||
/// possible.
|
||||
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
@ -9641,6 +9706,14 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
return;
|
||||
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
|
||||
return;
|
||||
case ISD::TRUNCATE: {
|
||||
EVT TrgVT = N->getValueType(0);
|
||||
if (TrgVT.isVector() &&
|
||||
isOperationCustom(N->getOpcode(), TrgVT) &&
|
||||
N->getOperand(0).getValueType().getSizeInBits() <= 128)
|
||||
Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
|
||||
return;
|
||||
}
|
||||
case ISD::BITCAST:
|
||||
// Don't handle bitcast here.
|
||||
return;
|
||||
|
|
|
@ -952,6 +952,8 @@ namespace llvm {
|
|||
SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
|
||||
const SDLoc &dl) const;
|
||||
|
||||
SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
|
||||
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
|
||||
|
||||
|
|
|
@ -10,90 +10,17 @@ define void @test8i8(<8 x i8>* nocapture %Sink, <8 x i16>* nocapture readonly %S
|
|||
; CHECK-LABEL: test8i8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lvx v2, 0, r4
|
||||
; CHECK-NEXT: mfvsrd r4, v2
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: clrldi r5, r4, 48
|
||||
; CHECK-NEXT: mtvsrd f1, r5
|
||||
; CHECK-NEXT: rldicl r5, r4, 48, 48
|
||||
; CHECK-NEXT: mtvsrd f2, r5
|
||||
; CHECK-NEXT: rldicl r5, r4, 32, 48
|
||||
; CHECK-NEXT: rldicl r4, r4, 16, 48
|
||||
; CHECK-NEXT: mtvsrd f3, r5
|
||||
; CHECK-NEXT: xxswapd v2, vs1
|
||||
; CHECK-NEXT: mfvsrd r5, f0
|
||||
; CHECK-NEXT: xxswapd v3, vs2
|
||||
; CHECK-NEXT: mtvsrd f0, r4
|
||||
; CHECK-NEXT: clrldi r4, r5, 48
|
||||
; CHECK-NEXT: mtvsrd f1, r4
|
||||
; CHECK-NEXT: rldicl r4, r5, 48, 48
|
||||
; CHECK-NEXT: xxswapd v4, vs0
|
||||
; CHECK-NEXT: mtvsrd f2, r4
|
||||
; CHECK-NEXT: rldicl r4, r5, 32, 48
|
||||
; CHECK-NEXT: rldicl r5, r5, 16, 48
|
||||
; CHECK-NEXT: vmrglb v2, v3, v2
|
||||
; CHECK-NEXT: xxswapd v3, vs3
|
||||
; CHECK-NEXT: mtvsrd f3, r4
|
||||
; CHECK-NEXT: xxswapd v5, vs1
|
||||
; CHECK-NEXT: mtvsrd f0, r5
|
||||
; CHECK-NEXT: xxswapd v0, vs2
|
||||
; CHECK-NEXT: xxswapd v1, vs3
|
||||
; CHECK-NEXT: vmrglb v3, v4, v3
|
||||
; CHECK-NEXT: xxswapd v6, vs0
|
||||
; CHECK-NEXT: vmrglb v4, v0, v5
|
||||
; CHECK-NEXT: vmrglb v5, v6, v1
|
||||
; CHECK-NEXT: vmrglh v2, v3, v2
|
||||
; CHECK-NEXT: vmrglh v3, v5, v4
|
||||
; CHECK-NEXT: vmrglw v2, v2, v3
|
||||
; CHECK-NEXT: vpkuhum v2, v2, v2
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: stfdx f0, 0, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test8i8:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: lhz r4, -18(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -48(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -20(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -64(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -22(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -80(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -24(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -96(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -26(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -112(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -28(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -128(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -30(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -144(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -32(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -160(r1)
|
||||
; CHECK-BE-NEXT: addi r4, r1, -48
|
||||
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -64
|
||||
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -80
|
||||
; CHECK-BE-NEXT: lxvw4x v4, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -96
|
||||
; CHECK-BE-NEXT: lxvw4x v5, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -112
|
||||
; CHECK-BE-NEXT: lxvw4x v0, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -128
|
||||
; CHECK-BE-NEXT: lxvw4x v1, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -144
|
||||
; CHECK-BE-NEXT: lxvw4x v6, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -160
|
||||
; CHECK-BE-NEXT: lxvw4x v7, 0, r4
|
||||
; CHECK-BE-NEXT: vmrghb v2, v3, v2
|
||||
; CHECK-BE-NEXT: vmrghb v3, v5, v4
|
||||
; CHECK-BE-NEXT: vmrghb v4, v1, v0
|
||||
; CHECK-BE-NEXT: addi r4, r1, -16
|
||||
; CHECK-BE-NEXT: vmrghh v2, v3, v2
|
||||
; CHECK-BE-NEXT: vmrghb v5, v7, v6
|
||||
; CHECK-BE-NEXT: vmrghh v3, v5, v4
|
||||
; CHECK-BE-NEXT: vmrghw v2, v3, v2
|
||||
; CHECK-BE-NEXT: stxvd2x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r5, r1, -16
|
||||
; CHECK-BE-NEXT: vpkuhum v2, v2, v2
|
||||
; CHECK-BE-NEXT: stxvd2x v2, 0, r5
|
||||
; CHECK-BE-NEXT: ld r4, -16(r1)
|
||||
; CHECK-BE-NEXT: std r4, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
|
@ -108,53 +35,17 @@ define void @test4i8(<4 x i8>* nocapture %Sink, <4 x i16>* nocapture readonly %S
|
|||
; CHECK-LABEL: test4i8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lvx v2, 0, r4
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: mfvsrd r4, f0
|
||||
; CHECK-NEXT: clrldi r5, r4, 48
|
||||
; CHECK-NEXT: mtvsrd f0, r5
|
||||
; CHECK-NEXT: rldicl r5, r4, 48, 48
|
||||
; CHECK-NEXT: mtvsrd f1, r5
|
||||
; CHECK-NEXT: rldicl r5, r4, 32, 48
|
||||
; CHECK-NEXT: rldicl r4, r4, 16, 48
|
||||
; CHECK-NEXT: mtvsrd f2, r5
|
||||
; CHECK-NEXT: xxswapd v2, vs0
|
||||
; CHECK-NEXT: mtvsrd f3, r4
|
||||
; CHECK-NEXT: xxswapd v3, vs1
|
||||
; CHECK-NEXT: xxswapd v4, vs2
|
||||
; CHECK-NEXT: xxswapd v5, vs3
|
||||
; CHECK-NEXT: vmrglb v2, v3, v2
|
||||
; CHECK-NEXT: vmrglb v3, v5, v4
|
||||
; CHECK-NEXT: vmrglh v2, v3, v2
|
||||
; CHECK-NEXT: vpkuhum v2, v2, v2
|
||||
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
|
||||
; CHECK-NEXT: stfiwx f0, 0, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test4i8:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: lhz r4, -26(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -48(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -28(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -64(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -30(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -80(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -32(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -96(r1)
|
||||
; CHECK-BE-NEXT: addi r4, r1, -48
|
||||
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -64
|
||||
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -80
|
||||
; CHECK-BE-NEXT: lxvw4x v4, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -96
|
||||
; CHECK-BE-NEXT: lxvw4x v5, 0, r4
|
||||
; CHECK-BE-NEXT: vmrghb v2, v3, v2
|
||||
; CHECK-BE-NEXT: addi r4, r1, -16
|
||||
; CHECK-BE-NEXT: vmrghb v3, v5, v4
|
||||
; CHECK-BE-NEXT: vmrghh v2, v3, v2
|
||||
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r5, r1, -16
|
||||
; CHECK-BE-NEXT: vpkuhum v2, v2, v2
|
||||
; CHECK-BE-NEXT: stxvw4x v2, 0, r5
|
||||
; CHECK-BE-NEXT: lwz r4, -16(r1)
|
||||
; CHECK-BE-NEXT: stw r4, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
|
@ -168,54 +59,23 @@ entry:
|
|||
define void @test4i8w(<4 x i8>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) {
|
||||
; CHECK-LABEL: test4i8w:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lvx v2, 0, r4
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: mfvsrwz r4, v2
|
||||
; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
|
||||
; CHECK-NEXT: xxsldwi vs3, v2, v2, 3
|
||||
; CHECK-NEXT: mtvsrd f2, r4
|
||||
; CHECK-NEXT: mfvsrwz r4, f0
|
||||
; CHECK-NEXT: mfvsrwz r5, f1
|
||||
; CHECK-NEXT: xxswapd v4, vs2
|
||||
; CHECK-NEXT: mtvsrd f0, r4
|
||||
; CHECK-NEXT: mfvsrwz r4, f3
|
||||
; CHECK-NEXT: mtvsrd f1, r5
|
||||
; CHECK-NEXT: xxswapd v2, vs0
|
||||
; CHECK-NEXT: mtvsrd f3, r4
|
||||
; CHECK-NEXT: xxswapd v3, vs1
|
||||
; CHECK-NEXT: xxswapd v5, vs3
|
||||
; CHECK-NEXT: vmrglb v2, v3, v2
|
||||
; CHECK-NEXT: vmrglb v3, v5, v4
|
||||
; CHECK-NEXT: vmrglh v2, v3, v2
|
||||
; CHECK-NEXT: addis r5, r2, .LCPI2_0@toc@ha
|
||||
; CHECK-NEXT: lvx v3, 0, r4
|
||||
; CHECK-NEXT: addi r5, r5, .LCPI2_0@toc@l
|
||||
; CHECK-NEXT: lvx v2, 0, r5
|
||||
; CHECK-NEXT: vperm v2, v3, v3, v2
|
||||
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
|
||||
; CHECK-NEXT: stfiwx f0, 0, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test4i8w:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: lwz r4, -20(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -48(r1)
|
||||
; CHECK-BE-NEXT: lwz r4, -24(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -64(r1)
|
||||
; CHECK-BE-NEXT: lwz r4, -28(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -80(r1)
|
||||
; CHECK-BE-NEXT: lwz r4, -32(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -96(r1)
|
||||
; CHECK-BE-NEXT: addi r4, r1, -48
|
||||
; CHECK-BE-NEXT: addis r5, r2, .LCPI2_0@toc@ha
|
||||
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -64
|
||||
; CHECK-BE-NEXT: addi r4, r5, .LCPI2_0@toc@l
|
||||
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -80
|
||||
; CHECK-BE-NEXT: lxvw4x v4, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -96
|
||||
; CHECK-BE-NEXT: lxvw4x v5, 0, r4
|
||||
; CHECK-BE-NEXT: vmrghb v2, v3, v2
|
||||
; CHECK-BE-NEXT: addi r4, r1, -16
|
||||
; CHECK-BE-NEXT: vmrghb v3, v5, v4
|
||||
; CHECK-BE-NEXT: vmrghh v2, v3, v2
|
||||
; CHECK-BE-NEXT: vperm v2, v2, v2, v3
|
||||
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: lwz r4, -16(r1)
|
||||
; CHECK-BE-NEXT: stw r4, 0(r3)
|
||||
|
@ -231,15 +91,7 @@ define void @test2i8(<2 x i8>* nocapture %Sink, <2 x i16>* nocapture readonly %S
|
|||
; CHECK-LABEL: test2i8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lvx v2, 0, r4
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: mfvsrd r4, f0
|
||||
; CHECK-NEXT: clrldi r5, r4, 48
|
||||
; CHECK-NEXT: rldicl r4, r4, 48, 48
|
||||
; CHECK-NEXT: mtvsrd f0, r5
|
||||
; CHECK-NEXT: mtvsrd f1, r4
|
||||
; CHECK-NEXT: xxswapd v2, vs0
|
||||
; CHECK-NEXT: xxswapd v3, vs1
|
||||
; CHECK-NEXT: vmrglb v2, v3, v2
|
||||
; CHECK-NEXT: vpkuhum v2, v2, v2
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: mfvsrd r4, f0
|
||||
; CHECK-NEXT: clrldi r4, r4, 48
|
||||
|
@ -248,20 +100,10 @@ define void @test2i8(<2 x i8>* nocapture %Sink, <2 x i16>* nocapture readonly %S
|
|||
;
|
||||
; CHECK-BE-LABEL: test2i8:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: lhz r4, -30(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -48(r1)
|
||||
; CHECK-BE-NEXT: lhz r4, -32(r1)
|
||||
; CHECK-BE-NEXT: stb r4, -64(r1)
|
||||
; CHECK-BE-NEXT: addi r4, r1, -48
|
||||
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -64
|
||||
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -16
|
||||
; CHECK-BE-NEXT: vmrghb v2, v3, v2
|
||||
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r5, r1, -16
|
||||
; CHECK-BE-NEXT: vpkuhum v2, v2, v2
|
||||
; CHECK-BE-NEXT: stxvw4x v2, 0, r5
|
||||
; CHECK-BE-NEXT: lhz r4, -16(r1)
|
||||
; CHECK-BE-NEXT: sth r4, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
|
@ -276,54 +118,17 @@ define void @test4i16(<4 x i16>* nocapture %Sink, <4 x i32>* nocapture readonly
|
|||
; CHECK-LABEL: test4i16:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lvx v2, 0, r4
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: mfvsrwz r4, v2
|
||||
; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
|
||||
; CHECK-NEXT: xxsldwi vs3, v2, v2, 3
|
||||
; CHECK-NEXT: mtvsrd f2, r4
|
||||
; CHECK-NEXT: mfvsrwz r4, f0
|
||||
; CHECK-NEXT: mfvsrwz r5, f1
|
||||
; CHECK-NEXT: xxswapd v4, vs2
|
||||
; CHECK-NEXT: mtvsrd f0, r4
|
||||
; CHECK-NEXT: mfvsrwz r4, f3
|
||||
; CHECK-NEXT: mtvsrd f1, r5
|
||||
; CHECK-NEXT: xxswapd v2, vs0
|
||||
; CHECK-NEXT: mtvsrd f3, r4
|
||||
; CHECK-NEXT: xxswapd v3, vs1
|
||||
; CHECK-NEXT: xxswapd v5, vs3
|
||||
; CHECK-NEXT: vmrglh v2, v3, v2
|
||||
; CHECK-NEXT: vmrglh v3, v5, v4
|
||||
; CHECK-NEXT: vmrglw v2, v3, v2
|
||||
; CHECK-NEXT: vpkuwum v2, v2, v2
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: stfdx f0, 0, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test4i16:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: lwz r4, -20(r1)
|
||||
; CHECK-BE-NEXT: sth r4, -48(r1)
|
||||
; CHECK-BE-NEXT: lwz r4, -24(r1)
|
||||
; CHECK-BE-NEXT: sth r4, -64(r1)
|
||||
; CHECK-BE-NEXT: lwz r4, -28(r1)
|
||||
; CHECK-BE-NEXT: sth r4, -80(r1)
|
||||
; CHECK-BE-NEXT: lwz r4, -32(r1)
|
||||
; CHECK-BE-NEXT: sth r4, -96(r1)
|
||||
; CHECK-BE-NEXT: addi r4, r1, -48
|
||||
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -64
|
||||
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -80
|
||||
; CHECK-BE-NEXT: lxvw4x v4, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -96
|
||||
; CHECK-BE-NEXT: lxvw4x v5, 0, r4
|
||||
; CHECK-BE-NEXT: vmrghh v2, v3, v2
|
||||
; CHECK-BE-NEXT: addi r4, r1, -16
|
||||
; CHECK-BE-NEXT: vmrghh v3, v5, v4
|
||||
; CHECK-BE-NEXT: vmrghw v2, v3, v2
|
||||
; CHECK-BE-NEXT: stxvd2x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r5, r1, -16
|
||||
; CHECK-BE-NEXT: vpkuwum v2, v2, v2
|
||||
; CHECK-BE-NEXT: stxvd2x v2, 0, r5
|
||||
; CHECK-BE-NEXT: ld r4, -16(r1)
|
||||
; CHECK-BE-NEXT: std r4, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
|
@ -338,35 +143,17 @@ define void @test2i16(<2 x i16>* nocapture %Sink, <2 x i32>* nocapture readonly
|
|||
; CHECK-LABEL: test2i16:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lvx v2, 0, r4
|
||||
; CHECK-NEXT: xxswapd vs0, v2
|
||||
; CHECK-NEXT: xxsldwi vs1, v2, v2, 1
|
||||
; CHECK-NEXT: mfvsrwz r4, f0
|
||||
; CHECK-NEXT: mfvsrwz r5, f1
|
||||
; CHECK-NEXT: mtvsrd f0, r4
|
||||
; CHECK-NEXT: mtvsrd f1, r5
|
||||
; CHECK-NEXT: xxswapd v2, vs0
|
||||
; CHECK-NEXT: xxswapd v3, vs1
|
||||
; CHECK-NEXT: vmrglh v2, v3, v2
|
||||
; CHECK-NEXT: vpkuwum v2, v2, v2
|
||||
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
|
||||
; CHECK-NEXT: stfiwx f0, 0, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test2i16:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-NEXT: stxvw4x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: lwz r4, -28(r1)
|
||||
; CHECK-BE-NEXT: sth r4, -48(r1)
|
||||
; CHECK-BE-NEXT: lwz r4, -32(r1)
|
||||
; CHECK-BE-NEXT: sth r4, -64(r1)
|
||||
; CHECK-BE-NEXT: addi r4, r1, -48
|
||||
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -64
|
||||
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -16
|
||||
; CHECK-BE-NEXT: vmrghh v2, v3, v2
|
||||
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r5, r1, -16
|
||||
; CHECK-BE-NEXT: vpkuwum v2, v2, v2
|
||||
; CHECK-BE-NEXT: stxvw4x v2, 0, r5
|
||||
; CHECK-BE-NEXT: lwz r4, -16(r1)
|
||||
; CHECK-BE-NEXT: stw r4, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
|
@ -381,33 +168,23 @@ define void @test2i16d(<2 x i16>* nocapture %Sink, <2 x i64>* nocapture readonly
|
|||
; CHECK-LABEL: test2i16d:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvd2x vs0, 0, r4
|
||||
; CHECK-NEXT: xxswapd vs1, vs0
|
||||
; CHECK-NEXT: mfvsrwz r4, f0
|
||||
; CHECK-NEXT: mtvsrd f0, r4
|
||||
; CHECK-NEXT: mfvsrwz r5, f1
|
||||
; CHECK-NEXT: addis r5, r2, .LCPI6_0@toc@ha
|
||||
; CHECK-NEXT: addi r4, r5, .LCPI6_0@toc@l
|
||||
; CHECK-NEXT: lvx v3, 0, r4
|
||||
; CHECK-NEXT: xxswapd v2, vs0
|
||||
; CHECK-NEXT: mtvsrd f1, r5
|
||||
; CHECK-NEXT: xxswapd v3, vs1
|
||||
; CHECK-NEXT: vmrglh v2, v3, v2
|
||||
; CHECK-NEXT: vperm v2, v2, v2, v3
|
||||
; CHECK-NEXT: xxsldwi vs0, v2, v2, 2
|
||||
; CHECK-NEXT: stfiwx f0, 0, r3
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test2i16d:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvd2x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -32
|
||||
; CHECK-BE-NEXT: stxvd2x vs0, 0, r4
|
||||
; CHECK-BE-NEXT: lwz r4, -20(r1)
|
||||
; CHECK-BE-NEXT: sth r4, -48(r1)
|
||||
; CHECK-BE-NEXT: lwz r4, -28(r1)
|
||||
; CHECK-BE-NEXT: sth r4, -64(r1)
|
||||
; CHECK-BE-NEXT: addi r4, r1, -48
|
||||
; CHECK-BE-NEXT: addis r5, r2, .LCPI6_0@toc@ha
|
||||
; CHECK-BE-NEXT: lxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -64
|
||||
; CHECK-BE-NEXT: addi r4, r5, .LCPI6_0@toc@l
|
||||
; CHECK-BE-NEXT: lxvw4x v3, 0, r4
|
||||
; CHECK-BE-NEXT: addi r4, r1, -16
|
||||
; CHECK-BE-NEXT: vmrghh v2, v3, v2
|
||||
; CHECK-BE-NEXT: vperm v2, v2, v2, v3
|
||||
; CHECK-BE-NEXT: stxvw4x v2, 0, r4
|
||||
; CHECK-BE-NEXT: lwz r4, -16(r1)
|
||||
; CHECK-BE-NEXT: stw r4, 0(r3)
|
||||
|
|
Loading…
Reference in New Issue