forked from OSchip/llvm-project
[PPC] Generate positive FP zero using xor insn instead of loading from constant area
https://reviews.llvm.org/D23614 Currently we load +0.0 from constant area. That can change to be generated using XOR instruction. llvm-svn: 284995
This commit is contained in:
parent
47f2616b6a
commit
c90b02cf50
|
@ -562,6 +562,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
||||
if (Subtarget.hasP8Altivec())
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
|
||||
if (Subtarget.hasVSX())
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
|
||||
|
||||
// Altivec does not contain unordered floating-point compare instructions
|
||||
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
|
||||
|
@ -12367,3 +12371,20 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
|
|||
if (!Subtarget.isTargetLinux())
|
||||
return TargetLowering::insertSSPDeclarations(M);
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
||||
|
||||
if (!VT.isSimple() || !Subtarget.hasVSX())
|
||||
return false;
|
||||
|
||||
switch(VT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
// For FP types that are currently not supported by PPC backend, return
|
||||
// false. Examples: f16, f80.
|
||||
return false;
|
||||
case MVT::f32:
|
||||
case MVT::f64:
|
||||
case MVT::ppcf128:
|
||||
return Imm.isPosZero();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -769,6 +769,7 @@ namespace llvm {
|
|||
bool useLoadStackGuardNode() const override;
|
||||
void insertSSPDeclarations(Module &M) const override;
|
||||
|
||||
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
|
||||
private:
|
||||
struct ReuseLoadInfo {
|
||||
SDValue Ptr;
|
||||
|
|
|
@ -1076,6 +1076,13 @@ class XX3Form_Zero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
|||
let XB = XT;
|
||||
}
|
||||
|
||||
class XX3Form_SetZero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
|
||||
let XB = XT;
|
||||
let XA = XT;
|
||||
}
|
||||
|
||||
class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: I<opcode, OOL, IOL, asmstr, itin> {
|
||||
|
|
|
@ -621,6 +621,9 @@ def s17imm : Operand<i32> {
|
|||
let ParserMatchClass = PPCS17ImmAsmOperand;
|
||||
let DecoderMethod = "decodeSImmOperand<16>";
|
||||
}
|
||||
|
||||
def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
|
||||
|
||||
def PPCDirectBrAsmOperand : AsmOperandClass {
|
||||
let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
|
||||
let RenderMethod = "addBranchTargetOperands";
|
||||
|
|
|
@ -791,6 +791,17 @@ let Uses = [RM] in {
|
|||
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
|
||||
[(set v4i32:$XT, (v4i32 immAllZerosV))]>;
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
def XXLXORdpz : XX3Form_SetZero<60, 154,
|
||||
(outs vsfrc:$XT), (ins),
|
||||
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
|
||||
[(set f64:$XT, (fpimm0))]>;
|
||||
def XXLXORspz : XX3Form_SetZero<60, 154,
|
||||
(outs vssrc:$XT), (ins),
|
||||
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
|
||||
[(set f32:$XT, (fpimm0))]>;
|
||||
}
|
||||
|
||||
// Permutation Instructions
|
||||
def XXMRGHW : XX3Form<60, 18,
|
||||
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mattr=+vsx < %s | \
|
||||
; RUN: FileCheck %s --implicit-check-not lxvd2x --implicit-check-not lfs
|
||||
; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mattr=-vsx -mattr=-p8altivec < %s | \
|
||||
; RUN: FileCheck %s --check-prefix=CHECK-NVSXP8A --implicit-check-not xxlxor \
|
||||
; RUN: --implicit-check-not vxor
|
||||
|
||||
define signext i32 @t1(float %x) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%cmp = fcmp ogt float %x, 0.000000e+00
|
||||
%tmp = select i1 %cmp, i32 43, i32 11
|
||||
ret i32 %tmp
|
||||
|
||||
; CHECK-LABEL: t1:
|
||||
; CHECK: xxlxor [[REG1:[0-9]+]], [[REG1]], [[REG1]]
|
||||
; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG1]]
|
||||
; CHECK: blr
|
||||
; CHECK-NVSXP8A: lfs [[REG1:[0-9]+]]
|
||||
; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG1]]
|
||||
; CHECK-NVSXP8A: blr
|
||||
}
|
||||
|
||||
define signext i32 @t2(double %x) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%cmp = fcmp ogt double %x, 0.000000e+00
|
||||
%tmp = select i1 %cmp, i32 43, i32 11
|
||||
ret i32 %tmp
|
||||
|
||||
; CHECK-LABEL: t2:
|
||||
; CHECK: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]]
|
||||
; CHECK: xscmpudp {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
|
||||
; CHECK: blr
|
||||
; CHECK-NVSXP8A: lfs [[REG2:[0-9]+]]
|
||||
; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
|
||||
; CHECK-NVSXP8A: blr
|
||||
}
|
||||
|
||||
define signext i32 @t3(ppc_fp128 %x) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%cmp = fcmp ogt ppc_fp128 %x, 0xM00000000000000000000000000000000
|
||||
%tmp = select i1 %cmp, i32 43, i32 11
|
||||
ret i32 %tmp
|
||||
|
||||
; CHECK-LABEL: t3:
|
||||
; CHECK: xxlxor [[REG3:[0-9]+]], [[REG3]], [[REG3]]
|
||||
; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
|
||||
; CHECK: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
|
||||
; CHECK: blr
|
||||
; CHECK-NVSXP8A: lfs [[REG3:[0-9]+]]
|
||||
; CHECK-NVSXP8A: fcmpu {{[0-9]+}}, {{[0-9]+}}, [[REG3]]
|
||||
; CHECK-NVSXP8A: blr
|
||||
}
|
||||
|
||||
define <2 x double> @t4() local_unnamed_addr #0 {
|
||||
ret <2 x double> zeroinitializer
|
||||
; CHECK-LABEL: t4:
|
||||
; CHECK: vxor [[REG4:[0-9]+]], [[REG4]], [[REG4]]
|
||||
; CHECK: blr
|
||||
; CHECK-NVSXP8A: lfs [[REG4:[0-9]+]]
|
||||
; CHECK-NVSXP8A: fmr {{[0-9]+}}, [[REG4:[0-9]+]]
|
||||
; CHECK-NVSXP8A: blr
|
||||
}
|
||||
|
||||
define <2 x i64> @t5() local_unnamed_addr #0 {
|
||||
ret <2 x i64> zeroinitializer
|
||||
; CHECK-LABEL: t5:
|
||||
; CHECK: vxor [[REG5:[0-9]+]], [[REG5]], [[REG5]]
|
||||
; CHECK: blr
|
||||
; CHECK-NVSXP8A: lvx
|
||||
; CHECK-NVSXP8A: blr
|
||||
}
|
||||
|
|
@ -13,7 +13,7 @@ entry:
|
|||
; CHECK-LABEL: @test1
|
||||
; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 1
|
||||
; CHECK-DAG: lfs [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]]
|
||||
; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]]
|
||||
; CHECK: crnor
|
||||
; CHECK: crnor
|
||||
|
@ -33,7 +33,7 @@ entry:
|
|||
; CHECK-LABEL: @test2
|
||||
; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 1
|
||||
; CHECK-DAG: lfs [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]]
|
||||
; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]]
|
||||
; CHECK: crnor
|
||||
; CHECK: crnor
|
||||
|
@ -55,7 +55,7 @@ entry:
|
|||
; CHECK-LABEL: @test3
|
||||
; CHECK-DAG: fcmpu {{[0-9]+}}, 1, 2
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 1
|
||||
; CHECK-DAG: lfs [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: xxlxor [[REG2:[0-9]+]], [[REG2]], [[REG2]]
|
||||
; CHECK-DAG: fcmpu {{[0-9]+}}, 2, [[REG2]]
|
||||
; CHECK: crnor
|
||||
; CHECK: crnor
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
define i1 @TestULT(double %t0) {
|
||||
; CHECK-LABEL: TestULT:
|
||||
; CHECK: mcrf
|
||||
; CHECK: xscmpudp
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%t1 = fcmp ult double %t0, 0.000000e+00
|
||||
|
@ -49,7 +49,7 @@ good:
|
|||
|
||||
define i1 @TestUEQ(double %t0) {
|
||||
; CHECK-LABEL: TestUEQ:
|
||||
; CHECK: mcrf
|
||||
; CHECK: xscmpudp
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%t1 = fcmp ueq double %t0, 0.000000e+00
|
||||
|
@ -64,7 +64,7 @@ good:
|
|||
|
||||
define i1 @TestUGT(double %t0) {
|
||||
; CHECK-LABEL: TestUGT:
|
||||
; CHECK: mcrf
|
||||
; CHECK: xscmpudp
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%t1 = fcmp ugt double %t0, 0.000000e+00
|
||||
|
@ -111,7 +111,7 @@ good:
|
|||
|
||||
define i1 @TestOLE(double %t0) {
|
||||
; CHECK-LABEL: TestOLE:
|
||||
; CHECK: mcrf
|
||||
; CHECK: xscmpudp
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%t1 = fcmp ole double %t0, 0.000000e+00
|
||||
|
@ -126,7 +126,7 @@ good:
|
|||
|
||||
define i1 @TestONE(double %t0) {
|
||||
; CHECK-LABEL: TestONE:
|
||||
; CHECK: mcrf
|
||||
; CHECK: xscmpudp
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%t1 = fcmp one double %t0, 0.000000e+00
|
||||
|
@ -173,7 +173,7 @@ good:
|
|||
|
||||
define i1 @TestOGE(double %t0) {
|
||||
; CHECK-LABEL: TestOGE:
|
||||
; CHECK: mcrf
|
||||
; CHECK: xscmpudp
|
||||
; CHECK: blr
|
||||
entry:
|
||||
%t1 = fcmp oge double %t0, 0.000000e+00
|
||||
|
|
|
@ -5,7 +5,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
|
|||
; Check that the conditional return block of fmax_double3.exit was not
|
||||
; duplicated into the if.then.i block
|
||||
; CHECK: # %if.then.i
|
||||
; CHECK: lxvd2x
|
||||
; CHECK: xxlxor
|
||||
; CHECK: stxvd2x
|
||||
; CHECK-NOT: bclr
|
||||
; CHECK: {{^}}.LBB{{[0-9_]+}}:
|
||||
|
|
Loading…
Reference in New Issue