forked from OSchip/llvm-project
[PowerPC] Keep vector int to fp conversions in vector domain
At present a v2i16 -> v2f64 convert is implemented by extracts to scalar, scalar converts, and merge back into a vector. Use vector converts instead, with the int data permuted into the proper position and extended if necessary. Patch by RolandF. Differential revision: https://reviews.llvm.org/D53346 llvm-svn: 345361
This commit is contained in:
parent
98ac9984b0
commit
6a74bfba20
|
@ -792,6 +792,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
|
||||
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
|
||||
|
||||
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
|
||||
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
|
||||
|
@ -7265,10 +7268,75 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
|
|||
return FP;
|
||||
}
|
||||
|
||||
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
|
||||
|
||||
EVT VecVT = Vec.getValueType();
|
||||
assert(VecVT.isVector() && "Expected a vector type.");
|
||||
assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
|
||||
|
||||
EVT EltVT = VecVT.getVectorElementType();
|
||||
unsigned WideNumElts = 128 / EltVT.getSizeInBits();
|
||||
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
|
||||
|
||||
unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
Ops[0] = Vec;
|
||||
SDValue UndefVec = DAG.getUNDEF(VecVT);
|
||||
for (unsigned i = 1; i < NumConcat; ++i)
|
||||
Ops[i] = UndefVec;
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op,
|
||||
SelectionDAG &DAG,
|
||||
const SDLoc &dl) const {
|
||||
|
||||
unsigned Opc = Op.getOpcode();
|
||||
assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
|
||||
"Unexpected conversion type");
|
||||
assert(Op.getValueType() == MVT::v2f64 && "Supports v2f64 only.");
|
||||
|
||||
// CPU's prior to P9 don't have a way to sign-extend in vectors.
|
||||
bool SignedConv = Opc == ISD::SINT_TO_FP;
|
||||
if (SignedConv && !Subtarget.hasP9Altivec())
|
||||
return SDValue();
|
||||
|
||||
SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
|
||||
EVT WideVT = Wide.getValueType();
|
||||
unsigned WideNumElts = WideVT.getVectorNumElements();
|
||||
|
||||
SmallVector<int, 16> ShuffV;
|
||||
for (unsigned i = 0; i < WideNumElts; ++i)
|
||||
ShuffV.push_back(i + WideNumElts);
|
||||
|
||||
if (Subtarget.isLittleEndian()) {
|
||||
ShuffV[0] = 0;
|
||||
ShuffV[WideNumElts / 2] = 1;
|
||||
}
|
||||
else {
|
||||
ShuffV[WideNumElts / 2 - 1] = 0;
|
||||
ShuffV[WideNumElts - 1] = 1;
|
||||
}
|
||||
|
||||
SDValue ShuffleSrc2 = SignedConv ? DAG.getUNDEF(WideVT) :
|
||||
DAG.getConstant(0, dl, WideVT);
|
||||
SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
|
||||
unsigned ExtendOp = SignedConv ? (unsigned) PPCISD::SExtVElems :
|
||||
(unsigned) ISD::BITCAST;
|
||||
SDValue Extend = DAG.getNode(ExtendOp, dl, MVT::v2i64, Arrange);
|
||||
|
||||
return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (Op.getValueType() == MVT::v2f64 &&
|
||||
Op.getOperand(0).getValueType() == MVT::v2i16)
|
||||
return LowerINT_TO_FPVector(Op, DAG, dl);
|
||||
|
||||
// Conversions to f128 are legal.
|
||||
if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
|
||||
return Op;
|
||||
|
|
|
@ -927,6 +927,9 @@ namespace llvm {
|
|||
SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
|
||||
const SDLoc &dl) const;
|
||||
|
||||
SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
|
||||
const SDLoc &dl) const;
|
||||
|
||||
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
|
||||
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
|
||||
|
||||
|
|
|
@ -0,0 +1,192 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=CHECK-P8
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=CHECK-P9
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=CHECK-BE
|
||||
|
||||
define void @test8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
|
||||
entry:
|
||||
%0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
|
||||
%1 = uitofp <8 x i16> %0 to <8 x double>
|
||||
store <8 x double> %1, <8 x double>* %Sink, align 16
|
||||
ret void
|
||||
; CHECK-P9-LABEL: @test8
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: xvcvuxddp
|
||||
; CHECK-P9: xvcvuxddp
|
||||
; CHECK-P9: xvcvuxddp
|
||||
; CHECK-P9: xvcvuxddp
|
||||
; CHECK-P8-LABEL: @test8
|
||||
; CHECK-P8: vperm
|
||||
; CHECK-P8: vperm
|
||||
; CHECK-P8: vperm
|
||||
; CHECK-P8: vperm
|
||||
; CHECK-P8: xvcvuxddp
|
||||
; CHECK-P8: xvcvuxddp
|
||||
; CHECK-P8: xvcvuxddp
|
||||
; CHECK-P8: xvcvuxddp
|
||||
}
|
||||
|
||||
define void @test4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
|
||||
entry:
|
||||
%0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16
|
||||
%1 = uitofp <4 x i16> %0 to <4 x double>
|
||||
store <4 x double> %1, <4 x double>* %Sink, align 16
|
||||
ret void
|
||||
; CHECK-P9-LABEL: @test4
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: xvcvuxddp
|
||||
; CHECK-P9: xvcvuxddp
|
||||
; CHECK-P8-LABEL: @test4
|
||||
; CHECK-P8: vperm
|
||||
; CHECK-P8: vperm
|
||||
; CHECK-P8: xvcvuxddp
|
||||
; CHECK-P8: xvcvuxddp
|
||||
}
|
||||
|
||||
define void @test2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
|
||||
entry:
|
||||
%0 = load <2 x i16>, <2 x i16>* %SrcPtr, align 16
|
||||
%1 = uitofp <2 x i16> %0 to <2 x double>
|
||||
store <2 x double> %1, <2 x double>* %Sink, align 16
|
||||
ret void
|
||||
; CHECK-P9-LABEL: .LCPI2_0:
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 30
|
||||
; CHECK-P9-NEXT: .byte 13
|
||||
; CHECK-P9-NEXT: .byte 12
|
||||
; CHECK-P9-NEXT: .byte 11
|
||||
; CHECK-P9-NEXT: .byte 10
|
||||
; CHECK-P9-NEXT: .byte 9
|
||||
; CHECK-P9-NEXT: .byte 8
|
||||
; CHECK-P9-NEXT: .byte 29
|
||||
; CHECK-P9-NEXT: .byte 28
|
||||
; CHECK-P9-NEXT: .byte 5
|
||||
; CHECK-P9-NEXT: .byte 4
|
||||
; CHECK-P9-NEXT: .byte 3
|
||||
; CHECK-P9-NEXT: .byte 2
|
||||
; CHECK-P9-NEXT: .byte 1
|
||||
; CHECK-P9-NEXT: .byte 0
|
||||
; CHECK-P9: addi [[REG1:r[0-9]+]], {{r[0-9]+}}, .LCPI2_0@toc@l
|
||||
; CHECK-P9: lxvx [[REG2:v[0-9]+]], 0, [[REG1]]
|
||||
; CHECK-P9: vperm [[REG3:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[REG2]]
|
||||
; CHECK-P9: xvcvuxddp {{vs[0-9]+}}, [[REG3]]
|
||||
; CHECK-P8-LABEL: @test2
|
||||
; CHECK-P8: vperm [[REG1:v[0-9]+]]
|
||||
; CHECK-P8: xvcvuxddp {{vs[0-9]+}}, [[REG1]]
|
||||
; CHECK-BE-LABEL: .LCPI2_0:
|
||||
; CHECK-BE-NEXT: .byte 16
|
||||
; CHECK-BE-NEXT: .byte 17
|
||||
; CHECK-BE-NEXT: .byte 18
|
||||
; CHECK-BE-NEXT: .byte 19
|
||||
; CHECK-BE-NEXT: .byte 20
|
||||
; CHECK-BE-NEXT: .byte 21
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 1
|
||||
; CHECK-BE-NEXT: .byte 24
|
||||
; CHECK-BE-NEXT: .byte 25
|
||||
; CHECK-BE-NEXT: .byte 26
|
||||
; CHECK-BE-NEXT: .byte 27
|
||||
; CHECK-BE-NEXT: .byte 28
|
||||
; CHECK-BE-NEXT: .byte 29
|
||||
; CHECK-BE-NEXT: .byte 2
|
||||
; CHECK-BE-NEXT: .byte 3
|
||||
; CHECK-BE: addi [[REG1:r[0-9]+]], {{r[0-9]+}}, .LCPI2_0@toc@l
|
||||
; CHECK-BE: lxvx [[REG2:v[0-9]+]], 0, [[REG1]]
|
||||
; CHECK-BE: vperm [[REG3:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[REG2]]
|
||||
; CHECK-BE: xvcvuxddp {{vs[0-9]+}}, [[REG3]]
|
||||
}
|
||||
|
||||
define void @stest8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
|
||||
entry:
|
||||
%0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
|
||||
%1 = sitofp <8 x i16> %0 to <8 x double>
|
||||
store <8 x double> %1, <8 x double>* %Sink, align 16
|
||||
ret void
|
||||
; CHECK-P9-LABEL: @stest8
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vextsh2d
|
||||
; CHECK-P9: vextsh2d
|
||||
; CHECK-P9: vextsh2d
|
||||
; CHECK-P9: vextsh2d
|
||||
; CHECK-P9: xvcvsxddp
|
||||
; CHECK-P9: xvcvsxddp
|
||||
; CHECK-P9: xvcvsxddp
|
||||
; CHECK-P9: xvcvsxddp
|
||||
}
|
||||
|
||||
define void @stest4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
|
||||
entry:
|
||||
%0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16
|
||||
%1 = sitofp <4 x i16> %0 to <4 x double>
|
||||
store <4 x double> %1, <4 x double>* %Sink, align 16
|
||||
ret void
|
||||
; CHECK-P9-LABEL: @stest4
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vperm
|
||||
; CHECK-P9: vextsh2d
|
||||
; CHECK-P9: vextsh2d
|
||||
; CHECK-P9: xvcvsxddp
|
||||
; CHECK-P9: xvcvsxddp
|
||||
}
|
||||
|
||||
define void @stest2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
|
||||
entry:
|
||||
%0 = load <2 x i16>, <2 x i16>* %SrcPtr, align 16
|
||||
%1 = sitofp <2 x i16> %0 to <2 x double>
|
||||
store <2 x double> %1, <2 x double>* %Sink, align 16
|
||||
ret void
|
||||
; CHECK-P9-LABEL: .LCPI5_0:
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 30
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 29
|
||||
; CHECK-P9-NEXT: .byte 28
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9-NEXT: .byte 31
|
||||
; CHECK-P9: vperm [[REG1:v[0-9]+]]
|
||||
; CHECK-P9: vextsh2d [[REG2:v[0-9]+]], [[REG1]]
|
||||
; CHECK-P9: xvcvsxddp {{vs[0-9]+}}, [[REG2]]
|
||||
; CHECK-BE-LABEL: .LCPI5_0:
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 1
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 0
|
||||
; CHECK-BE-NEXT: .byte 2
|
||||
; CHECK-BE-NEXT: .byte 3
|
||||
; CHECK-BE: addi [[REG1:r[0-9]+]], {{r[0-9]+}}, .LCPI5_0@toc@l
|
||||
; CHECK-BE: lxvx [[REG2:v[0-9]+]], 0, [[REG1]]
|
||||
; CHECK-BE: vperm [[REG3:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[REG2]]
|
||||
; CHECK-BE: vextsh2d [[REG4:v[0-9]+]], [[REG3]]
|
||||
; CHECK-BE: xvcvsxddp {{vs[0-9]+}}, [[REG4]]
|
||||
}
|
Loading…
Reference in New Issue