forked from OSchip/llvm-project
VFP single precision arith instructions can go down to NEON pipeline, but on Cortex-A8 only.
llvm-svn: 126238
This commit is contained in:
parent
83a4ddd0cb
commit
04ad35b53f
|
@ -155,10 +155,11 @@ namespace ARMII {
|
||||||
//===------------------------------------------------------------------===//
|
//===------------------------------------------------------------------===//
|
||||||
// Code domain.
|
// Code domain.
|
||||||
DomainShift = 18,
|
DomainShift = 18,
|
||||||
DomainMask = 3 << DomainShift,
|
DomainMask = 7 << DomainShift,
|
||||||
DomainGeneral = 0 << DomainShift,
|
DomainGeneral = 0 << DomainShift,
|
||||||
DomainVFP = 1 << DomainShift,
|
DomainVFP = 1 << DomainShift,
|
||||||
DomainNEON = 2 << DomainShift,
|
DomainNEON = 2 << DomainShift,
|
||||||
|
DomainNEONA8 = 4 << DomainShift,
|
||||||
|
|
||||||
//===------------------------------------------------------------------===//
|
//===------------------------------------------------------------------===//
|
||||||
// Field shifts - such shifts are used to set field while generating
|
// Field shifts - such shifts are used to set field while generating
|
||||||
|
|
|
@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
|
||||||
// FIXME: Detect integer instructions properly.
|
// FIXME: Detect integer instructions properly.
|
||||||
const TargetInstrDesc &TID = MI->getDesc();
|
const TargetInstrDesc &TID = MI->getDesc();
|
||||||
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
|
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
|
||||||
if (Domain == ARMII::DomainVFP) {
|
if (TID.mayStore())
|
||||||
unsigned Opcode = MI->getOpcode();
|
|
||||||
if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
|
|
||||||
Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
|
|
||||||
return false;
|
return false;
|
||||||
} else if (Domain == ARMII::DomainNEON) {
|
unsigned Opcode = TID.getOpcode();
|
||||||
if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
|
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
|
||||||
return false;
|
|
||||||
} else
|
|
||||||
return false;
|
return false;
|
||||||
|
if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
|
||||||
return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
|
return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
ScheduleHazardRecognizer::HazardType
|
ScheduleHazardRecognizer::HazardType
|
||||||
|
|
|
@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>;
|
||||||
def IndexModeUpd : IndexMode<3>;
|
def IndexModeUpd : IndexMode<3>;
|
||||||
|
|
||||||
// Instruction execution domain.
|
// Instruction execution domain.
|
||||||
class Domain<bits<2> val> {
|
class Domain<bits<3> val> {
|
||||||
bits<2> Value = val;
|
bits<3> Value = val;
|
||||||
}
|
}
|
||||||
def GenericDomain : Domain<0>;
|
def GenericDomain : Domain<0>;
|
||||||
def VFPDomain : Domain<1>; // Instructions in VFP domain only
|
def VFPDomain : Domain<1>; // Instructions in VFP domain only
|
||||||
def NeonDomain : Domain<2>; // Instructions in Neon domain only
|
def NeonDomain : Domain<2>; // Instructions in Neon domain only
|
||||||
def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
|
def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
|
||||||
|
def VFPNeonA8Domain : Domain<7>; // Instructions in VFP & Neon under A8
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// ARM special operands.
|
// ARM special operands.
|
||||||
|
@ -249,7 +250,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
|
||||||
let TSFlags{15-10} = Form;
|
let TSFlags{15-10} = Form;
|
||||||
let TSFlags{16} = isUnaryDataProc;
|
let TSFlags{16} = isUnaryDataProc;
|
||||||
let TSFlags{17} = canXformTo16Bit;
|
let TSFlags{17} = canXformTo16Bit;
|
||||||
let TSFlags{19-18} = D.Value;
|
let TSFlags{20-18} = D.Value;
|
||||||
|
|
||||||
let Constraints = cstr;
|
let Constraints = cstr;
|
||||||
let Itinerary = itin;
|
let Itinerary = itin;
|
||||||
|
|
|
@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||||
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
|
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
|
||||||
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
|
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
|
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
|
||||||
|
@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||||
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
|
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
|
||||||
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
|
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
|
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
|
||||||
|
@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||||
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
|
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
|
||||||
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
|
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
|
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
|
||||||
|
@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
||||||
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
|
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
|
||||||
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
|
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match reassociated forms only if not sign dependent rounding.
|
// Match reassociated forms only if not sign dependent rounding.
|
||||||
|
@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
|
||||||
(outs), (ins SPR:$Sd, SPR:$Sm),
|
(outs), (ins SPR:$Sd, SPR:$Sm),
|
||||||
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
|
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
|
||||||
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
|
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Verify encoding after integrated assembler is working.
|
// FIXME: Verify encoding after integrated assembler is working.
|
||||||
|
@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
|
||||||
(outs), (ins SPR:$Sd, SPR:$Sm),
|
(outs), (ins SPR:$Sd, SPR:$Sm),
|
||||||
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
|
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
} // Defs = [FPSCR]
|
} // Defs = [FPSCR]
|
||||||
|
|
||||||
|
@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||||
IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
|
IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
|
||||||
[(set SPR:$Sd, (fabs SPR:$Sm))]> {
|
[(set SPR:$Sd, (fabs SPR:$Sm))]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Defs = [FPSCR] in {
|
let Defs = [FPSCR] in {
|
||||||
|
@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
|
||||||
let Inst{3-0} = 0b0000;
|
let Inst{3-0} = 0b0000;
|
||||||
let Inst{5} = 0;
|
let Inst{5} = 0;
|
||||||
|
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Verify encoding after integrated assembler is working.
|
// FIXME: Verify encoding after integrated assembler is working.
|
||||||
|
@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
|
||||||
let Inst{3-0} = 0b0000;
|
let Inst{3-0} = 0b0000;
|
||||||
let Inst{5} = 0;
|
let Inst{5} = 0;
|
||||||
|
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
} // Defs = [FPSCR]
|
} // Defs = [FPSCR]
|
||||||
|
|
||||||
|
@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
|
||||||
(outs SPR:$Sd), (ins SPR:$Sm),
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
||||||
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
|
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
|
||||||
[(set SPR:$Sd, (fneg SPR:$Sm))]> {
|
[(set SPR:$Sd, (fneg SPR:$Sm))]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
||||||
|
@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
|
||||||
[(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
|
[(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
|
||||||
let Inst{7} = 1; // s32
|
let Inst{7} = 1; // s32
|
||||||
|
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
|
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
|
||||||
|
@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
|
||||||
[(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
|
[(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
|
||||||
let Inst{7} = 0; // u32
|
let Inst{7} = 0; // u32
|
||||||
|
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
// FP -> Int:
|
// FP -> Int:
|
||||||
|
@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
|
||||||
[(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
|
[(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
|
||||||
let Inst{7} = 1; // Z bit
|
let Inst{7} = 1; // Z bit
|
||||||
|
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
|
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
|
||||||
|
@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
|
||||||
[(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
|
[(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
|
||||||
let Inst{7} = 1; // Z bit
|
let Inst{7} = 1; // Z bit
|
||||||
|
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
|
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
|
||||||
|
@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
|
||||||
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
||||||
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
|
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
|
def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
|
||||||
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
||||||
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
|
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
|
def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
|
||||||
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
||||||
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
|
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
|
def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
|
||||||
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
||||||
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
|
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
|
def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
|
||||||
|
@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
|
||||||
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
||||||
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
|
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
|
def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
|
||||||
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
||||||
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
|
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
|
def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
|
||||||
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
||||||
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
|
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
|
def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
|
||||||
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
|
||||||
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
|
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
|
||||||
[/* For disassembly only; pattern left blank */]> {
|
[/* For disassembly only; pattern left blank */]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
|
def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
|
||||||
|
@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
|
||||||
SPR:$Sdin))]>,
|
SPR:$Sdin))]>,
|
||||||
RegConstraint<"$Sdin = $Sd">,
|
RegConstraint<"$Sdin = $Sd">,
|
||||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
||||||
|
@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
|
||||||
SPR:$Sdin))]>,
|
SPR:$Sdin))]>,
|
||||||
RegConstraint<"$Sdin = $Sd">,
|
RegConstraint<"$Sdin = $Sd">,
|
||||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
||||||
|
@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
|
||||||
SPR:$Sdin))]>,
|
SPR:$Sdin))]>,
|
||||||
RegConstraint<"$Sdin = $Sd">,
|
RegConstraint<"$Sdin = $Sd">,
|
||||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
|
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
|
||||||
|
@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
|
||||||
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
||||||
RegConstraint<"$Sdin = $Sd">,
|
RegConstraint<"$Sdin = $Sd">,
|
||||||
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
|
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
|
||||||
|
@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
|
||||||
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
|
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
|
||||||
[/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
|
[/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
|
||||||
RegConstraint<"$Sn = $Sd"> {
|
RegConstraint<"$Sn = $Sd"> {
|
||||||
// Some single precision VFP instructions may be executed on both NEON and VFP
|
// Some single precision VFP instructions may be executed on both NEON and
|
||||||
// pipelines.
|
// VFP pipelines on A8.
|
||||||
let D = VFPNeonDomain;
|
let D = VFPNeonA8Domain;
|
||||||
}
|
}
|
||||||
} // neverHasSideEffects
|
} // neverHasSideEffects
|
||||||
|
|
||||||
|
|
|
@ -132,21 +132,15 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
|
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
|
||||||
const TargetInstrDesc &TID = MI->getDesc();
|
|
||||||
// FIXME: Detect integer instructions properly.
|
// FIXME: Detect integer instructions properly.
|
||||||
|
const TargetInstrDesc &TID = MI->getDesc();
|
||||||
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
|
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
|
||||||
if (Domain == ARMII::DomainVFP) {
|
if (TID.mayStore())
|
||||||
|
return false;
|
||||||
unsigned Opcode = TID.getOpcode();
|
unsigned Opcode = TID.getOpcode();
|
||||||
if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
|
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
|
||||||
Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
|
|
||||||
return false;
|
return false;
|
||||||
} else if (Domain == ARMII::DomainNEON) {
|
if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
|
||||||
if (TID.mayStore() || TID.mayLoad())
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return MI->readsRegister(Reg, TRI);
|
return MI->readsRegister(Reg, TRI);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,7 @@ namespace {
|
||||||
private:
|
private:
|
||||||
const TargetRegisterInfo *TRI;
|
const TargetRegisterInfo *TRI;
|
||||||
const ARMBaseInstrInfo *TII;
|
const ARMBaseInstrInfo *TII;
|
||||||
|
bool isA8;
|
||||||
|
|
||||||
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
|
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
|
||||||
|
|
||||||
|
@ -43,6 +44,16 @@ namespace {
|
||||||
char NEONMoveFixPass::ID = 0;
|
char NEONMoveFixPass::ID = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool inNEONDomain(unsigned Domain, bool isA8) {
|
||||||
|
if (Domain & ARMII::DomainNEON) {
|
||||||
|
// Some instructions only go down NEON pipeline when executed on CortexA8.
|
||||||
|
if (Domain & ARMII::DomainNEONA8)
|
||||||
|
return isA8;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
|
bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
|
||||||
RegMap Defs;
|
RegMap Defs;
|
||||||
bool Modified = false;
|
bool Modified = false;
|
||||||
|
@ -70,7 +81,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
|
||||||
Domain = ARMII::DomainNEON;
|
Domain = ARMII::DomainNEON;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Domain & ARMII::DomainNEON) {
|
if (inNEONDomain(Domain, isA8)) {
|
||||||
// Convert VMOVD to VMOVDneon
|
// Convert VMOVD to VMOVDneon
|
||||||
unsigned DestReg = MI->getOperand(0).getReg();
|
unsigned DestReg = MI->getOperand(0).getReg();
|
||||||
|
|
||||||
|
@ -123,6 +134,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
|
||||||
|
|
||||||
TRI = TM.getRegisterInfo();
|
TRI = TM.getRegisterInfo();
|
||||||
TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
|
TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
|
||||||
|
isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
|
||||||
|
|
||||||
bool Modified = false;
|
bool Modified = false;
|
||||||
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
|
||||||
|
|
Loading…
Reference in New Issue