forked from OSchip/llvm-project
Use RegisterTuples to generate pseudo-registers.
The QQ and QQQQ registers are not 'real', they are pseudo-registers used to model some vld and vst instructions. This makes the call clobber lists longer, but I intend to get rid of those soon. llvm-svn: 148151
This commit is contained in:
parent
cef42c30a7
commit
35545421c8
|
@ -1900,7 +1900,9 @@ let isCall = 1,
|
|||
// FIXME: Do we really need a non-predicated version? If so, it should
|
||||
// at least be a pseudo instruction expanding to the predicated version
|
||||
// at MC lowering time.
|
||||
Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
|
||||
Defs = [R0, R1, R2, R3, R12, LR,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
|
||||
CPSR, FPSCR],
|
||||
Uses = [SP] in {
|
||||
def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
|
||||
IIC_Br, "bl\t$func",
|
||||
|
@ -1956,7 +1958,9 @@ let isCall = 1,
|
|||
// On IOS R9 is call-clobbered.
|
||||
// R7 is marked as a use to prevent frame-pointer assignments from being
|
||||
// moved above / below calls.
|
||||
Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
|
||||
Defs = [R0, R1, R2, R3, R9, R12, LR,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
|
||||
CPSR, FPSCR],
|
||||
Uses = [R7, SP] in {
|
||||
def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
|
||||
4, IIC_Br,
|
||||
|
@ -2061,7 +2065,8 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
|
|||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
// IOS versions.
|
||||
let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
|
||||
let Defs = [R0, R1, R2, R3, R9, R12,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
|
||||
Uses = [SP] in {
|
||||
def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
|
||||
IIC_Br, []>, Requires<[IsIOS]>;
|
||||
|
@ -2082,7 +2087,8 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
|||
}
|
||||
|
||||
// Non-IOS versions (the difference is R9).
|
||||
let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
|
||||
let Defs = [R0, R1, R2, R3, R12,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
|
||||
Uses = [SP] in {
|
||||
def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
|
||||
IIC_Br, []>, Requires<[IsNotIOS]>;
|
||||
|
@ -4711,8 +4717,8 @@ let isCall = 1,
|
|||
// no encoding information is necessary.
|
||||
let Defs =
|
||||
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
|
||||
QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1,
|
||||
usesCustomInserter = 1 in {
|
||||
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
|
||||
hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
|
||||
def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
|
||||
NoItinerary,
|
||||
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
|
||||
|
@ -4743,7 +4749,8 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
|
|||
// that need the instruction size).
|
||||
let Defs =
|
||||
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
|
||||
QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], isBarrier = 1 in
|
||||
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
|
||||
isBarrier = 1 in
|
||||
def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
|
||||
|
||||
let Defs =
|
||||
|
|
|
@ -405,7 +405,9 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
|
|||
// potentially appearing dead.
|
||||
let isCall = 1,
|
||||
// On non-IOS platforms R9 is callee-saved.
|
||||
Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
|
||||
Defs = [R0, R1, R2, R3, R12, LR,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
|
||||
CPSR, FPSCR],
|
||||
Uses = [SP] in {
|
||||
// Also used for Thumb2
|
||||
def tBL : TIx2<0b11110, 0b11, 1,
|
||||
|
@ -457,7 +459,9 @@ let isCall = 1,
|
|||
// On IOS R9 is call-clobbered.
|
||||
// R7 is marked as a use to prevent frame-pointer assignments from being
|
||||
// moved above / below calls.
|
||||
Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
|
||||
Defs = [R0, R1, R2, R3, R9, R12, LR,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
|
||||
CPSR, FPSCR],
|
||||
Uses = [R7, SP] in {
|
||||
// Also used for Thumb2
|
||||
def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
|
||||
|
@ -524,7 +528,8 @@ let isBranch = 1, isTerminator = 1 in
|
|||
// Tail calls
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
// IOS versions.
|
||||
let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
|
||||
let Defs = [R0, R1, R2, R3, R9, R12,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
|
||||
Uses = [SP] in {
|
||||
// tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
|
||||
// on IOS), so it's in ARMInstrThumb2.td.
|
||||
|
@ -534,7 +539,8 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
|||
Requires<[IsThumb, IsIOS]>;
|
||||
}
|
||||
// Non-IOS versions (the difference is R9).
|
||||
let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
|
||||
let Defs = [R0, R1, R2, R3, R12,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC],
|
||||
Uses = [SP] in {
|
||||
def tTAILJMPdND : tPseudoExpand<(outs),
|
||||
(ins t_brtarget:$dst, pred:$p, variable_ops),
|
||||
|
|
|
@ -3096,7 +3096,7 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
|
|||
// $val is a scratch register for our use.
|
||||
let Defs =
|
||||
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR,
|
||||
QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
|
||||
hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
|
||||
usesCustomInserter = 1 in {
|
||||
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
|
||||
|
@ -3216,7 +3216,8 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
|
|||
// it goes here.
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
// IOS version.
|
||||
let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
|
||||
let Defs = [R0, R1, R2, R3, R9, R12, PC,
|
||||
Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
|
||||
Uses = [SP] in
|
||||
def tTAILJMPd: tPseudoExpand<(outs),
|
||||
(ins uncondbrtarget:$dst, pred:$p, variable_ops),
|
||||
|
|
|
@ -150,36 +150,6 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>;
|
|||
def Q15 : ARMReg<15, "q15", [D30, D31]>;
|
||||
}
|
||||
|
||||
// Pseudo 256-bit registers to represent pairs of Q registers. These should
|
||||
// never be present in the emitted code.
|
||||
// These are used for NEON load / store instructions, e.g., vld4, vst3.
|
||||
// NOTE: It's possible to define more QQ registers since technically the
|
||||
// starting D register number doesn't have to be multiple of 4, e.g.,
|
||||
// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
|
||||
// stuff very messy.
|
||||
let SubRegIndices = [qsub_0, qsub_1],
|
||||
CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
|
||||
def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>;
|
||||
def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>;
|
||||
def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>;
|
||||
def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>;
|
||||
def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>;
|
||||
def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
|
||||
def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
|
||||
def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
|
||||
}
|
||||
|
||||
// Pseudo 512-bit registers to represent four consecutive Q registers.
|
||||
let SubRegIndices = [qqsub_0, qqsub_1],
|
||||
CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
|
||||
(dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
|
||||
(dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
|
||||
def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
|
||||
def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
|
||||
def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
|
||||
def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
|
||||
}
|
||||
|
||||
// Current Program Status Register.
|
||||
def CPSR : ARMReg<0, "cpsr">;
|
||||
def APSR : ARMReg<1, "apsr">;
|
||||
|
@ -316,9 +286,22 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
|||
(DPR_8 dsub_0, dsub_1)];
|
||||
}
|
||||
|
||||
// Pseudo 256-bit registers to represent pairs of Q registers. These should
|
||||
// never be present in the emitted code.
|
||||
// These are used for NEON load / store instructions, e.g., vld4, vst3.
|
||||
// NOTE: It's possible to define more QQ registers since technically the
|
||||
// starting D register number doesn't have to be multiple of 4, e.g.,
|
||||
// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
|
||||
// stuff very messy.
|
||||
def Tuples2Q : RegisterTuples<[qsub_0, qsub_1],
|
||||
[(decimate QPR, 2),
|
||||
(decimate (shl QPR, 1), 2)]> {
|
||||
let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)];
|
||||
}
|
||||
|
||||
// Pseudo 256-bit vector register class to model pairs of Q registers
|
||||
// (4 consecutive D registers).
|
||||
def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
|
||||
def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
|
||||
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
|
||||
(QPR qsub_0, qsub_1)];
|
||||
// Allocate non-VFP2 aliases first.
|
||||
|
@ -326,9 +309,18 @@ def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
|
|||
let AltOrderSelect = [{ return 1; }];
|
||||
}
|
||||
|
||||
// Pseudo 512-bit registers to represent four consecutive Q registers.
|
||||
def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1],
|
||||
[(decimate QQPR, 2),
|
||||
(decimate (shl QQPR, 1), 2)]> {
|
||||
let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
|
||||
(dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
|
||||
(dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)];
|
||||
}
|
||||
|
||||
// Pseudo 512-bit vector register class to model 4 consecutive Q registers
|
||||
// (8 consecutive D registers).
|
||||
def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
|
||||
def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
|
||||
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
|
||||
dsub_4, dsub_5, dsub_6, dsub_7),
|
||||
(QPR qsub_0, qsub_1, qsub_2, qsub_3)];
|
||||
|
|
Loading…
Reference in New Issue