From 2259d67a33a5ff55994420364d8eedfb980ecb21 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 29 Sep 2010 00:49:25 +0000 Subject: [PATCH] Separate itinerary classes for mvn from mov; for tst / teq from cmp / cmn. llvm-svn: 115010 --- llvm/lib/Target/ARM/ARMInstrInfo.td | 22 +++++++++++++------- llvm/lib/Target/ARM/ARMInstrThumb.td | 4 ++-- llvm/lib/Target/ARM/ARMInstrThumb2.td | 30 ++++++++++++++++++--------- llvm/lib/Target/ARM/ARMSchedule.td | 10 ++++++++- llvm/lib/Target/ARM/ARMScheduleA8.td | 16 ++++++++++++-- llvm/lib/Target/ARM/ARMScheduleA9.td | 18 +++++++++++++--- llvm/lib/Target/ARM/ARMScheduleV6.td | 12 +++++++++++ 7 files changed, 86 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 7ebfa8e5f522..4980d8b602cb 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -511,15 +511,16 @@ multiclass AI1_bin_s_irs opcod, string opc, /// patterns. Similar to AsI1_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. let isCompare = 1, Defs = [CPSR] in { -multiclass AI1_cmp_irs opcod, string opc, PatFrag opnode, - bit Commutable = 0> { - def ri : AI1 opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { + def ri : AI1 { let Inst{20} = 1; let Inst{25} = 1; } - def rr : AI1 { let Inst{11-4} = 0b00000000; @@ -527,7 +528,7 @@ multiclass AI1_cmp_irs opcod, string opc, PatFrag opnode, let Inst{25} = 0; let isCommutable = Commutable; } - def rs : AI1 { let Inst{20} = 1; @@ -1896,20 +1897,20 @@ def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm), let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 } -def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, +def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMVNr, "mvn", "\t$dst, $src", [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { let Inst{25} = 0; let Inst{11-4} = 0b00000000; } def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, - IIC_iMOVsr, "mvn", "\t$dst, $src", + IIC_iMVNsr, "mvn", "\t$dst, $src", [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP { let Inst{25} = 0; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, - IIC_iMOVi, "mvn", "\t$dst, $imm", + IIC_iMVNi, "mvn", "\t$dst, $imm", [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP { let Inst{25} = 1; } @@ -2319,6 +2320,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), // defm CMP : AI1_cmp_irs<0b1010, "cmp", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; // FIXME: We have to be careful when using the CMN instruction and comparison @@ -2367,13 +2369,17 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp", // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>; defm TEQ : AI1_cmp_irs<0b1001, "teq", + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>; defm CMPz : AI1_cmp_irs<0b1010, "cmp", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; defm CMNz : AI1_cmp_irs<0b1011, "cmn", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; //def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 7c829da80402..15280139bcb8 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -744,7 +744,7 @@ def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, T1DataProcessing<0b1101>; // move inverse register -def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, +def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMVNr, "mvn", "\t$dst, $src", [(set tGPR:$dst, (not tGPR:$src))]>, T1DataProcessing<0b1111>; @@ -836,7 +836,7 @@ def tSXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, // test let isCompare = 1, isCommutable = 1, Defs = [CPSR] in -def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, +def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iTSTr, "tst", "\t$lhs, $rhs", [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>, T1DataProcessing<0b1000>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 4fea0550bb52..5ca21aa91973 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -170,10 +170,11 @@ def t2addrmode_so_reg : Operand, /// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// unary operation that produces a value. These are predicable and can be /// changed to modify CPSR. -multiclass T2I_un_irs opcod, string opc, PatFrag opnode, - bit Cheap = 0, bit ReMat = 0> { +multiclass T2I_un_irs opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { // shifted imm - def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, + def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), iii, opc, "\t$dst, $src", [(set rGPR:$dst, (opnode t2_so_imm:$src))]> { let isAsCheapAsAMove = Cheap; @@ -186,7 +187,7 @@ multiclass T2I_un_irs opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr, + def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), iir, opc, ".w\t$dst, $src", [(set rGPR:$dst, (opnode rGPR:$src))]> { let Inst{31-27} = 0b11101; @@ -199,7 +200,7 @@ multiclass T2I_un_irs opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, + def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), iis, opc, ".w\t$dst, $src", [(set rGPR:$dst, (opnode t2_so_reg:$src))]> { let Inst{31-27} = 0b11101; @@ -531,9 +532,11 @@ multiclass T2I_sh_ir opcod, string opc, PatFrag opnode> { /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. let isCompare = 1, Defs = [CPSR] in { -multiclass T2I_cmp_irs opcod, string opc, PatFrag opnode> { +multiclass T2I_cmp_irs opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode> { // shifted imm - def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi, + def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), iii, opc, ".w\t$lhs, $rhs", [(opnode GPR:$lhs, t2_so_imm:$rhs)]> { let Inst{31-27} = 0b11110; @@ -544,7 +547,7 @@ multiclass T2I_cmp_irs opcod, string opc, PatFrag opnode> { let Inst{11-8} = 0b1111; // Rd } // register - def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr, + def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), iir, opc, ".w\t$lhs, $rhs", [(opnode GPR:$lhs, rGPR:$rhs)]> { let Inst{31-27} = 0b11101; @@ -557,7 +560,7 @@ multiclass T2I_cmp_irs opcod, string opc, PatFrag opnode> { let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi, + def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), iis, opc, ".w\t$lhs, $rhs", [(opnode GPR:$lhs, t2_so_reg:$rhs)]> { let Inst{31-27} = 0b11101; @@ -1694,7 +1697,9 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn", // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in -defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>; +defm t2MVN : T2I_un_irs <0b0011, "mvn", + IIC_iMOVi, IIC_iMOVr, IIC_iMOVsi, + UnOpFrag<(not node:$Src)>, 1, 1>; let AddedComplexity = 1 in @@ -2155,8 +2160,10 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), // Comparison Instructions... // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; defm t2CMPz : T2I_cmp_irs<0b1101, "cmp", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; //FIXME: Disable CMN, as CCodes are backwards from compare expectations @@ -2164,6 +2171,7 @@ defm t2CMPz : T2I_cmp_irs<0b1101, "cmp", //defm t2CMN : T2I_cmp_irs<0b1000, "cmn", // BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; defm t2CMNz : T2I_cmp_irs<0b1000, "cmn", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; //def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), @@ -2173,8 +2181,10 @@ def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm), (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>; defm t2TST : T2I_cmp_irs<0b0000, "tst", + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>; defm t2TEQ : T2I_cmp_irs<0b0100, "teq", + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; // Conditional moves diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td index e385ec7f6a26..133a81b195d8 100644 --- a/llvm/lib/Target/ARM/ARMSchedule.td +++ b/llvm/lib/Target/ARM/ARMSchedule.td @@ -28,11 +28,19 @@ def IIC_iCMPi : InstrItinClass; def IIC_iCMPr : InstrItinClass; def IIC_iCMPsi : InstrItinClass; def IIC_iCMPsr : InstrItinClass; +def IIC_iTSTi : InstrItinClass; +def IIC_iTSTr : InstrItinClass; +def IIC_iTSTsi : InstrItinClass; +def IIC_iTSTsr : InstrItinClass; def IIC_iMOVi : InstrItinClass; -def IIC_iMOVix2 : InstrItinClass; def IIC_iMOVr : InstrItinClass; def IIC_iMOVsi : InstrItinClass; def IIC_iMOVsr : InstrItinClass; +def IIC_iMOVix2 : InstrItinClass; +def IIC_iMVNi : InstrItinClass; +def IIC_iMVNr : InstrItinClass; +def IIC_iMVNsi : InstrItinClass; +def IIC_iMVNsr : InstrItinClass; def IIC_iCMOVi : InstrItinClass; def IIC_iCMOVr : InstrItinClass; def IIC_iCMOVsi : InstrItinClass; diff --git a/llvm/lib/Target/ARM/ARMScheduleA8.td b/llvm/lib/Target/ARM/ARMScheduleA8.td index 6b9b964e4246..e6b2beae0380 100644 --- a/llvm/lib/Target/ARM/ARMScheduleA8.td +++ b/llvm/lib/Target/ARM/ARMScheduleA8.td @@ -59,19 +59,31 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData], [2, 1]>, InstrItinData], [2, 1, 1]>, // + // Test instructions + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, + // // Move instructions, unconditional InstrItinData], [1]>, - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, InstrItinData], [1, 1]>, InstrItinData], [1, 1]>, InstrItinData], [1, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, // // Move instructions, conditional InstrItinData], [2]>, InstrItinData], [2, 1]>, InstrItinData], [2, 1]>, InstrItinData], [2, 1, 1]>, + // + // MVN instructions + InstrItinData], [1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1, 1]>, // Integer multiply pipeline // Result written in E5, but that is relative to the last cycle of multicycle, diff --git a/llvm/lib/Target/ARM/ARMScheduleA9.td b/llvm/lib/Target/ARM/ARMScheduleA9.td index 056aa599dd6e..14197c824da8 100644 --- a/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -28,15 +28,21 @@ def A9_DRegsN : FuncUnit; // FP register set, NEON side def CortexA9Itineraries : ProcessorItineraries< [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [], [ // Two fully-pipelined integer ALU pipelines - // FIXME: There are no operand latencies for these instructions at all! + // // Move instructions, unconditional InstrItinData], [1]>, - InstrItinData, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, InstrItinData], [1, 1]>, InstrItinData], [1, 1]>, InstrItinData], [2, 2, 1]>, + InstrItinData, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, + // + // MVN instructions + InstrItinData], [1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1]>, + InstrItinData], [2, 2, 1]>, // // No operand cycles InstrItinData]>, @@ -68,6 +74,12 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData], [2, 1]>, InstrItinData], [2, 1, 1]>, // + // Test instructions + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, + // // Move instructions, conditional InstrItinData], [2]>, InstrItinData], [2, 1]>, diff --git a/llvm/lib/Target/ARM/ARMScheduleV6.td b/llvm/lib/Target/ARM/ARMScheduleV6.td index 51b915f4fdba..f19d1d13775d 100644 --- a/llvm/lib/Target/ARM/ARMScheduleV6.td +++ b/llvm/lib/Target/ARM/ARMScheduleV6.td @@ -51,6 +51,12 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData], [2, 1]>, InstrItinData], [3, 2, 1]>, // + // Test instructions + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [3, 2, 1]>, + // // Move instructions, unconditional InstrItinData], [2]>, InstrItinData], [2, 2]>, @@ -64,6 +70,12 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData], [3, 2]>, InstrItinData], [3, 1]>, InstrItinData], [4, 2, 1]>, + // + // MVN instructions + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [3, 2, 1]>, // Integer multiply pipeline //