forked from OSchip/llvm-project
[ARM] Cortex-M4 schedule
This patch adds a simple Cortex-M4 schedule, renaming the existing M3 schedule to M4 and filling in the latencies as-per the Cortex-M4 TRM: https://developer.arm.com/docs/ddi0439/latest Most of these are 1, with the important exception being loads taking 2 cycles. A few others are also higher, but I don't believe they make a large difference. I've repurposed the M3 schedule as the latencies are mostly the same between the two cores, with the M4 having more FP and DSP instructions. We also turn on MISched and UseAA for the cores that now use this. It also adds some schedule Write's to various instruction to make things simpler. Differential Revision: https://reviews.llvm.org/D54142 llvm-svn: 360768
This commit is contained in:
parent
9c7188a08a
commit
d2d0f46cd2
|
@ -978,21 +978,27 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
|
|||
FeatureHasSlowFPVMLx,
|
||||
FeatureAvoidPartialCPSR]>;
|
||||
|
||||
def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
|
||||
def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m,
|
||||
ProcM3,
|
||||
FeaturePrefLoopAlign32,
|
||||
FeatureUseMISched,
|
||||
FeatureUseAA,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
|
||||
def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m,
|
||||
ProcM3,
|
||||
FeatureUseMISched,
|
||||
FeatureUseAA,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
|
||||
def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
|
||||
FeatureVFP4,
|
||||
FeatureVFPOnlySP,
|
||||
FeatureD16,
|
||||
FeaturePrefLoopAlign32,
|
||||
FeatureHasSlowFPVMLx,
|
||||
FeatureUseMISched,
|
||||
FeatureUseAA,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
def : ProcNoItin<"cortex-m7", [ARMv7em,
|
||||
|
@ -1002,22 +1008,26 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
|
|||
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
|
||||
FeatureNoMovt]>;
|
||||
|
||||
def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
|
||||
def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
|
||||
FeatureDSP,
|
||||
FeatureFPARMv8,
|
||||
FeatureD16,
|
||||
FeatureVFPOnlySP,
|
||||
FeaturePrefLoopAlign32,
|
||||
FeatureHasSlowFPVMLx,
|
||||
FeatureUseMISched,
|
||||
FeatureUseAA,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
def : ProcessorModel<"cortex-m35p", CortexM3Model, [ARMv8mMainline,
|
||||
def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
|
||||
FeatureDSP,
|
||||
FeatureFPARMv8,
|
||||
FeatureD16,
|
||||
FeatureVFPOnlySP,
|
||||
FeaturePrefLoopAlign32,
|
||||
FeatureHasSlowFPVMLx,
|
||||
FeatureUseMISched,
|
||||
FeatureUseAA,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
|
||||
|
|
|
@ -663,7 +663,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
|
|||
def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
|
||||
"ldr", "\t$Rt, $addr",
|
||||
[(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
|
||||
T1Encoding<{0,1,0,0,1,?}> {
|
||||
T1Encoding<{0,1,0,0,1,?}>, Sched<[WriteLd]> {
|
||||
// A6.2 & A8.6.59
|
||||
bits<3> Rt;
|
||||
bits<8> addr;
|
||||
|
@ -677,7 +677,7 @@ let canFoldAsLoad = 1 in
|
|||
def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
|
||||
"ldr", "\t$Rt, $addr",
|
||||
[(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
|
||||
T1LdStSP<{1,?,?}> {
|
||||
T1LdStSP<{1,?,?}>, Sched<[WriteLd]> {
|
||||
bits<3> Rt;
|
||||
bits<8> addr;
|
||||
let Inst{10-8} = Rt;
|
||||
|
@ -728,39 +728,39 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
|
|||
defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
|
||||
t_addrmode_is4, AddrModeT1_4,
|
||||
IIC_iLoad_r, IIC_iLoad_i, "ldr",
|
||||
load>;
|
||||
load>, Sched<[WriteLd]>;
|
||||
|
||||
// A8.6.64 & A8.6.61
|
||||
defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
|
||||
t_addrmode_is1, AddrModeT1_1,
|
||||
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
|
||||
zextloadi8>;
|
||||
zextloadi8>, Sched<[WriteLd]>;
|
||||
|
||||
// A8.6.76 & A8.6.73
|
||||
defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
|
||||
t_addrmode_is2, AddrModeT1_2,
|
||||
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
|
||||
zextloadi16>;
|
||||
zextloadi16>, Sched<[WriteLd]>;
|
||||
|
||||
let AddedComplexity = 10 in
|
||||
def tLDRSB : // A8.6.80
|
||||
T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr),
|
||||
AddrModeT1_1, IIC_iLoad_bh_r,
|
||||
"ldrsb", "\t$Rt, $addr",
|
||||
[(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>;
|
||||
[(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>;
|
||||
|
||||
let AddedComplexity = 10 in
|
||||
def tLDRSH : // A8.6.84
|
||||
T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr),
|
||||
AddrModeT1_2, IIC_iLoad_bh_r,
|
||||
"ldrsh", "\t$Rt, $addr",
|
||||
[(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>;
|
||||
[(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>;
|
||||
|
||||
|
||||
def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
|
||||
"str", "\t$Rt, $addr",
|
||||
[(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
|
||||
T1LdStSP<{0,?,?}> {
|
||||
T1LdStSP<{0,?,?}>, Sched<[WriteST]> {
|
||||
bits<3> Rt;
|
||||
bits<8> addr;
|
||||
let Inst{10-8} = Rt;
|
||||
|
@ -771,19 +771,19 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
|
|||
defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
|
||||
t_addrmode_is4, AddrModeT1_4,
|
||||
IIC_iStore_r, IIC_iStore_i, "str",
|
||||
store>;
|
||||
store>, Sched<[WriteST]>;
|
||||
|
||||
// A8.6.197 & A8.6.195
|
||||
defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
|
||||
t_addrmode_is1, AddrModeT1_1,
|
||||
IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
|
||||
truncstorei8>;
|
||||
truncstorei8>, Sched<[WriteST]>;
|
||||
|
||||
// A8.6.207 & A8.6.205
|
||||
defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
|
||||
t_addrmode_is2, AddrModeT1_2,
|
||||
IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
|
||||
truncstorei16>;
|
||||
truncstorei16>, Sched<[WriteST]>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -843,7 +843,7 @@ let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1,
|
|||
def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
|
||||
IIC_iPop,
|
||||
"pop${p}\t$regs", []>,
|
||||
T1Misc<{1,1,0,?,?,?,?}> {
|
||||
T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> {
|
||||
bits<16> regs;
|
||||
let Inst{8} = regs{15};
|
||||
let Inst{7-0} = regs{7-0};
|
||||
|
@ -853,7 +853,7 @@ let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
|
|||
def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
|
||||
IIC_iStore_m,
|
||||
"push${p}\t$regs", []>,
|
||||
T1Misc<{0,1,0,?,?,?,?}> {
|
||||
T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> {
|
||||
bits<16> regs;
|
||||
let Inst{8} = regs{14};
|
||||
let Inst{7-0} = regs{7-0};
|
||||
|
@ -1214,7 +1214,7 @@ def tMUL : // A8.6.105 T1
|
|||
Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
|
||||
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
|
||||
[(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
|
||||
T1DataProcessing<0b1101> {
|
||||
T1DataProcessing<0b1101>, Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
|
||||
bits<3> Rd;
|
||||
bits<3> Rn;
|
||||
let Inst{5-3} = Rn;
|
||||
|
|
|
@ -1333,7 +1333,8 @@ def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
|
|||
def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
|
||||
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
|
||||
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
|
||||
"ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
|
||||
"ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
|
||||
Sched<[WriteLd]>;
|
||||
|
||||
def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
|
||||
(ins t2addrmode_imm8_pre:$addr),
|
||||
|
@ -2331,14 +2332,14 @@ class T2SatI<dag iops, string opc, string asm>
|
|||
|
||||
def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
|
||||
"ssat", "\t$Rd, $sat_imm, $Rn$sh">,
|
||||
Requires<[IsThumb2]> {
|
||||
Requires<[IsThumb2]>, Sched<[WriteALU]> {
|
||||
let Inst{23-22} = 0b00;
|
||||
let Inst{5} = 0;
|
||||
}
|
||||
|
||||
def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
|
||||
"ssat16", "\t$Rd, $sat_imm, $Rn">,
|
||||
Requires<[IsThumb2, HasDSP]> {
|
||||
Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> {
|
||||
let Inst{23-22} = 0b00;
|
||||
let sh = 0b100000;
|
||||
let Inst{4} = 0;
|
||||
|
@ -2346,13 +2347,13 @@ def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
|
|||
|
||||
def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
|
||||
"usat", "\t$Rd, $sat_imm, $Rn$sh">,
|
||||
Requires<[IsThumb2]> {
|
||||
Requires<[IsThumb2]>, Sched<[WriteALU]> {
|
||||
let Inst{23-22} = 0b10;
|
||||
}
|
||||
|
||||
def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
|
||||
"usat16", "\t$Rd, $sat_imm, $Rn">,
|
||||
Requires<[IsThumb2, HasDSP]> {
|
||||
Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> {
|
||||
let Inst{23-22} = 0b10;
|
||||
let sh = 0b100000;
|
||||
let Inst{4} = 0;
|
||||
|
@ -2476,7 +2477,7 @@ class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
|
|||
let Constraints = "$src = $Rd" in
|
||||
def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
|
||||
IIC_iUNAsi, "bfc", "\t$Rd, $imm",
|
||||
[(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
|
||||
[(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> {
|
||||
let Inst{31-27} = 0b11110;
|
||||
let Inst{26} = 0; // should be 0.
|
||||
let Inst{25} = 1;
|
||||
|
@ -2492,7 +2493,7 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
|
|||
|
||||
def t2SBFX: T2TwoRegBitFI<
|
||||
(outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
|
||||
IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
|
||||
IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> {
|
||||
let Inst{31-27} = 0b11110;
|
||||
let Inst{25} = 1;
|
||||
let Inst{24-20} = 0b10100;
|
||||
|
@ -2501,7 +2502,7 @@ def t2SBFX: T2TwoRegBitFI<
|
|||
|
||||
def t2UBFX: T2TwoRegBitFI<
|
||||
(outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
|
||||
IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
|
||||
IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> {
|
||||
let Inst{31-27} = 0b11110;
|
||||
let Inst{25} = 1;
|
||||
let Inst{24-20} = 0b11100;
|
||||
|
@ -2527,7 +2528,7 @@ let Constraints = "$src = $Rd" in {
|
|||
(ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
|
||||
IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
|
||||
[(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
|
||||
bf_inv_mask_imm:$imm))]> {
|
||||
bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> {
|
||||
let Inst{31-27} = 0b11110;
|
||||
let Inst{26} = 0; // should be 0.
|
||||
let Inst{25} = 1;
|
||||
|
@ -3281,17 +3282,17 @@ def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
|
|||
AddrModeNone, 4, NoItinerary,
|
||||
"ldrexb", "\t$Rt, $addr", "",
|
||||
[(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasV8MBaseline]>;
|
||||
Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>;
|
||||
def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
|
||||
AddrModeNone, 4, NoItinerary,
|
||||
"ldrexh", "\t$Rt, $addr", "",
|
||||
[(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasV8MBaseline]>;
|
||||
Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>;
|
||||
def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
|
||||
AddrModeT2_ldrex, 4, NoItinerary,
|
||||
"ldrex", "\t$Rt, $addr", "",
|
||||
[(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>,
|
||||
Requires<[IsThumb, HasV8MBaseline]> {
|
||||
Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]> {
|
||||
bits<4> Rt;
|
||||
bits<12> addr;
|
||||
let Inst{31-27} = 0b11101;
|
||||
|
@ -3307,7 +3308,7 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
|
|||
AddrModeNone, 4, NoItinerary,
|
||||
"ldrexd", "\t$Rt, $Rt2, $addr", "",
|
||||
[], {?, ?, ?, ?}>,
|
||||
Requires<[IsThumb2, IsNotMClass]> {
|
||||
Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteLd]> {
|
||||
bits<4> Rt2;
|
||||
let Inst{11-8} = Rt2;
|
||||
}
|
||||
|
@ -3315,17 +3316,17 @@ def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
|
|||
AddrModeNone, 4, NoItinerary,
|
||||
"ldaexb", "\t$Rt, $addr", "",
|
||||
[(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
|
||||
Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>;
|
||||
def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
|
||||
AddrModeNone, 4, NoItinerary,
|
||||
"ldaexh", "\t$Rt, $addr", "",
|
||||
[(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
|
||||
Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>;
|
||||
def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr),
|
||||
AddrModeNone, 4, NoItinerary,
|
||||
"ldaex", "\t$Rt, $addr", "",
|
||||
[(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> {
|
||||
Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]> {
|
||||
bits<4> Rt;
|
||||
bits<4> addr;
|
||||
let Inst{31-27} = 0b11101;
|
||||
|
@ -3341,7 +3342,7 @@ def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2),
|
|||
AddrModeNone, 4, NoItinerary,
|
||||
"ldaexd", "\t$Rt, $Rt2, $addr", "",
|
||||
[], {?, ?, ?, ?}>, Requires<[IsThumb,
|
||||
HasAcquireRelease, HasV7Clrex, IsNotMClass]> {
|
||||
HasAcquireRelease, HasV7Clrex, IsNotMClass]>, Sched<[WriteLd]> {
|
||||
bits<4> Rt2;
|
||||
let Inst{11-8} = Rt2;
|
||||
|
||||
|
@ -3356,14 +3357,14 @@ def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd),
|
|||
"strexb", "\t$Rd, $Rt, $addr", "",
|
||||
[(set rGPR:$Rd,
|
||||
(strex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasV8MBaseline]>;
|
||||
Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>;
|
||||
def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
|
||||
(ins rGPR:$Rt, addr_offset_none:$addr),
|
||||
AddrModeNone, 4, NoItinerary,
|
||||
"strexh", "\t$Rd, $Rt, $addr", "",
|
||||
[(set rGPR:$Rd,
|
||||
(strex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasV8MBaseline]>;
|
||||
Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>;
|
||||
|
||||
def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
|
||||
t2addrmode_imm0_1020s4:$addr),
|
||||
|
@ -3371,7 +3372,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
|
|||
"strex", "\t$Rd, $Rt, $addr", "",
|
||||
[(set rGPR:$Rd,
|
||||
(strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>,
|
||||
Requires<[IsThumb, HasV8MBaseline]> {
|
||||
Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]> {
|
||||
bits<4> Rd;
|
||||
bits<4> Rt;
|
||||
bits<12> addr;
|
||||
|
@ -3388,7 +3389,7 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
|
|||
AddrModeNone, 4, NoItinerary,
|
||||
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
|
||||
{?, ?, ?, ?}>,
|
||||
Requires<[IsThumb2, IsNotMClass]> {
|
||||
Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteST]> {
|
||||
bits<4> Rt2;
|
||||
let Inst{11-8} = Rt2;
|
||||
}
|
||||
|
@ -3399,7 +3400,7 @@ def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd),
|
|||
[(set rGPR:$Rd,
|
||||
(stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasAcquireRelease,
|
||||
HasV7Clrex]>;
|
||||
HasV7Clrex]>, Sched<[WriteST]>;
|
||||
|
||||
def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
|
||||
(ins rGPR:$Rt, addr_offset_none:$addr),
|
||||
|
@ -3408,7 +3409,7 @@ def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
|
|||
[(set rGPR:$Rd,
|
||||
(stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasAcquireRelease,
|
||||
HasV7Clrex]>;
|
||||
HasV7Clrex]>, Sched<[WriteST]>;
|
||||
|
||||
def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
|
||||
addr_offset_none:$addr),
|
||||
|
@ -3416,7 +3417,8 @@ def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
|
|||
"stlex", "\t$Rd, $Rt, $addr", "",
|
||||
[(set rGPR:$Rd,
|
||||
(stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>,
|
||||
Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> {
|
||||
Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>,
|
||||
Sched<[WriteST]> {
|
||||
bits<4> Rd;
|
||||
bits<4> Rt;
|
||||
bits<4> addr;
|
||||
|
@ -3433,7 +3435,7 @@ def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd),
|
|||
AddrModeNone, 4, NoItinerary,
|
||||
"stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
|
||||
{?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease,
|
||||
HasV7Clrex, IsNotMClass]> {
|
||||
HasV7Clrex, IsNotMClass]>, Sched<[WriteST]> {
|
||||
bits<4> Rt2;
|
||||
let Inst{11-8} = Rt2;
|
||||
}
|
||||
|
|
|
@ -424,4 +424,4 @@ include "ARMScheduleA9.td"
|
|||
include "ARMScheduleSwift.td"
|
||||
include "ARMScheduleR52.td"
|
||||
include "ARMScheduleA57.td"
|
||||
include "ARMScheduleM3.td"
|
||||
include "ARMScheduleM4.td"
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for the ARM Cortex-M3 processor.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CortexM3Model : SchedMachineModel {
|
||||
let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
|
||||
let MicroOpBufferSize = 0; // In-order
|
||||
let LoadLatency = 2; // Latency when not pipelined, not pc-relative
|
||||
let MispredictPenalty = 2; // Best case branch taken cost
|
||||
|
||||
let CompleteModel = 0;
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CortexM4Model : SchedMachineModel {
|
||||
let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
|
||||
let MicroOpBufferSize = 0; // In-order
|
||||
let LoadLatency = 2; // Latency when not pipelined, not pc-relative
|
||||
let MispredictPenalty = 2; // Best case branch taken cost
|
||||
let PostRAScheduler = 1;
|
||||
|
||||
let CompleteModel = 0;
|
||||
}
|
||||
|
||||
|
||||
// We model the entire cpu as a single pipeline with a BufferSize = 0 since
|
||||
// Cortex-M4 is in-order.
|
||||
|
||||
def M4Unit : ProcResource<1> { let BufferSize = 0; }
|
||||
|
||||
|
||||
let SchedModel = CortexM4Model in {
|
||||
|
||||
// Some definitions of latencies we apply to different instructions
|
||||
|
||||
class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
|
||||
class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
|
||||
class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
|
||||
class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
|
||||
def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
|
||||
def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
|
||||
class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
|
||||
class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;
|
||||
|
||||
|
||||
// Loads, MAC's and DIV all get a higher latency of 2
|
||||
def : M4UnitL2<WriteLd>;
|
||||
def : M4UnitL2<WriteMAC32>;
|
||||
def : M4UnitL2<WriteMAC64Hi>;
|
||||
def : M4UnitL2<WriteMAC64Lo>;
|
||||
def : M4UnitL2<WriteMAC16>;
|
||||
def : M4UnitL2<WriteDIV>;
|
||||
|
||||
def : M4UnitL2I<(instregex "(t|t2)LDM")>;
|
||||
|
||||
|
||||
// Stores we use a latency of 1 as they have no outputs
|
||||
|
||||
def : M4UnitL1<WriteST>;
|
||||
def : M4UnitL1I<(instregex "(t|t2)STM")>;
|
||||
|
||||
|
||||
// Everything else has a Latency of 1
|
||||
|
||||
def : M4UnitL1<WriteALU>;
|
||||
def : M4UnitL1<WriteALUsi>;
|
||||
def : M4UnitL1<WriteALUsr>;
|
||||
def : M4UnitL1<WriteALUSsr>;
|
||||
def : M4UnitL1<WriteBr>;
|
||||
def : M4UnitL1<WriteBrL>;
|
||||
def : M4UnitL1<WriteBrTbl>;
|
||||
def : M4UnitL1<WriteCMPsi>;
|
||||
def : M4UnitL1<WriteCMPsr>;
|
||||
def : M4UnitL1<WriteCMP>;
|
||||
def : M4UnitL1<WriteMUL32>;
|
||||
def : M4UnitL1<WriteMUL64Hi>;
|
||||
def : M4UnitL1<WriteMUL64Lo>;
|
||||
def : M4UnitL1<WriteMUL16>;
|
||||
def : M4UnitL1<WriteNoop>;
|
||||
def : M4UnitL1<WritePreLd>;
|
||||
def : M4UnitL1I<(instregex "(t|t2)MOV")>;
|
||||
def : M4UnitL1I<(instrs COPY)>;
|
||||
def : M4UnitL1I<(instregex "t2IT")>;
|
||||
def : M4UnitL1I<(instregex "t2SEL", "t2USAD8",
|
||||
"t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>;
|
||||
|
||||
def : ReadAdvance<ReadALU, 0>;
|
||||
def : ReadAdvance<ReadALUsr, 0>;
|
||||
def : ReadAdvance<ReadMUL, 0>;
|
||||
def : ReadAdvance<ReadMAC, 0>;
|
||||
|
||||
// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
|
||||
// Loads still take 2 cycles.
|
||||
|
||||
def : M4UnitL1<WriteFPCVT>;
|
||||
def : M4UnitL1<WriteFPMOV>;
|
||||
def : M4UnitL1<WriteFPALU32>;
|
||||
def : M4UnitL1<WriteFPALU64>;
|
||||
def : M4UnitL1<WriteFPMUL32>;
|
||||
def : M4UnitL1<WriteFPMUL64>;
|
||||
def : M4UnitL2I<(instregex "VLD")>;
|
||||
def : M4UnitL1I<(instregex "VST")>;
|
||||
def : M4UnitL3<WriteFPMAC32>;
|
||||
def : M4UnitL3<WriteFPMAC64>;
|
||||
def : M4UnitL14<WriteFPDIV32>;
|
||||
def : M4UnitL14<WriteFPDIV64>;
|
||||
def : M4UnitL14<WriteFPSQRT32>;
|
||||
def : M4UnitL14<WriteFPSQRT64>;
|
||||
def : M4UnitL1<WriteVLD1>;
|
||||
def : M4UnitL1<WriteVLD2>;
|
||||
def : M4UnitL1<WriteVLD3>;
|
||||
def : M4UnitL1<WriteVLD4>;
|
||||
def : M4UnitL1<WriteVST1>;
|
||||
def : M4UnitL1<WriteVST2>;
|
||||
def : M4UnitL1<WriteVST3>;
|
||||
def : M4UnitL1<WriteVST4>;
|
||||
|
||||
def : ReadAdvance<ReadFPMUL, 0>;
|
||||
def : ReadAdvance<ReadFPMAC, 0>;
|
||||
|
||||
}
|
|
@ -5,10 +5,10 @@
|
|||
|
||||
; CHECK-LABEL: add_user
|
||||
; CHECK: %for.body
|
||||
; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: sxtah [[COUNT:r[0-9]+]], [[COUNT]], [[A]]
|
||||
; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
define i32 @add_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
|
||||
entry:
|
||||
%cmp24 = icmp sgt i32 %arg, 0
|
||||
|
@ -53,10 +53,10 @@ for.body:
|
|||
|
||||
; CHECK-LABEL: mul_bottom_user
|
||||
; CHECK: %for.body
|
||||
; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: sxth [[SXT:r[0-9]+]], [[A]]
|
||||
; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
; CHECK: mul [[COUNT:r[0-9]+]],{{.*}}[[SXT]]
|
||||
define i32 @mul_bottom_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
|
||||
entry:
|
||||
|
@ -104,8 +104,8 @@ for.body:
|
|||
; CHECK: %for.body
|
||||
; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
; CHECK: asr.w [[ASR:[rl0-9]+]], [[B]], #16
|
||||
; CHECK: asrs [[ASR:[rl0-9]+]], [[A]], #16
|
||||
; CHECK: smlad [[ACC:[rl0-9]+]], [[A]], [[B]], [[ACC]]
|
||||
; CHECK: mul [[COUNT:[rl0-9]+]],{{.}}[[ASR]]
|
||||
define i32 @mul_top_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
|
||||
entry:
|
||||
|
@ -151,10 +151,10 @@ for.body:
|
|||
|
||||
; CHECK-LABEL: and_user
|
||||
; CHECK: %for.body
|
||||
; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]!
|
||||
; CHECK: uxth [[UXT:r[0-9]+]], [[A]]
|
||||
; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
; CHECK: mul [[MUL:r[0-9]+]],{{.*}}[[UXT]]
|
||||
define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
|
||||
entry:
|
||||
|
@ -201,12 +201,12 @@ for.body:
|
|||
|
||||
; CHECK-LABEL: multi_uses
|
||||
; CHECK: %for.body
|
||||
; CHECK: ldr [[A:r[0-9]+]], [{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:r[0-9]+]], [{{.*}}, #2]!
|
||||
; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
; CHECK: ldr [[A:[rl0-9]+]], [{{.*}}, #2]!
|
||||
; CHECK: ldr [[B:[rl0-9]+]], [{{.*}}, #2]!
|
||||
; CHECK: sxth [[SXT:r[0-9]+]], [[A]]
|
||||
; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]]
|
||||
; CHECK: eor.w [[EOR:r[0-9]+]], [[SXT]], [[SHIFT:r[0-9]+]]
|
||||
; CHECK: mul [[MUL:r[0-9]+]],{{.*}}[[SXT]]
|
||||
; CHECK: muls [[MUL:r[0-9]+]],{{.*}}[[SXT]]
|
||||
; CHECK: lsl.w [[SHIFT]], [[MUL]], #16
|
||||
define i32 @multi_uses(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) {
|
||||
entry:
|
||||
|
|
|
@ -76,8 +76,8 @@ define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3
|
|||
|
||||
; CHECK-M4F-LABEL: test_1double_nosplit:
|
||||
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: movt [[ONEHI]], #16368
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp]
|
||||
; CHECK-M4F: bl test_1double_nosplit
|
||||
call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0)
|
||||
|
@ -97,8 +97,8 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
|
|||
|
||||
; CHECK-M4F-LABEL: test_1double_misaligned:
|
||||
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: movt [[ONEHI]], #16368
|
||||
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
|
||||
; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp, #8]
|
||||
; CHECK-M4F: bl test_1double_misaligned
|
||||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-r52 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA
|
||||
; RUN: llc < %s -mtriple=armv7m-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA
|
||||
; RUN: llc < %s -mtriple=armv8m-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA
|
||||
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
|
||||
|
||||
; Check we use AA during codegen, so can interleave these loads/stores.
|
||||
|
|
|
@ -100,10 +100,10 @@ if.end:
|
|||
; CHECK-BP: str
|
||||
; CHECK-BP: b
|
||||
; CHECK-BP: str
|
||||
; CHECK-BP: ldr
|
||||
; CHECK-BP: add
|
||||
; CHECK-NOBP: ittee
|
||||
; CHECK-NOBP: streq
|
||||
; CHECK-NOBP: ldreq
|
||||
; CHECK-NOBP: addeq
|
||||
; CHECK-NOBP: strne
|
||||
; CHECK-NOBP: strne
|
||||
define i32 @diamond2(i32 %n, i32* %p, i32* %q) {
|
||||
|
@ -119,7 +119,7 @@ if.then:
|
|||
|
||||
if.else:
|
||||
store i32 %n, i32* %q, align 4
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%0 = add i32 %n, 10
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
# RUN: llc %s -run-pass machine-scheduler -o - | FileCheck %s
|
||||
|
||||
# CHECK-LABEL: bb.0.
|
||||
# CHECK: t2LDRi12
|
||||
# CHECK-NEXT: t2LDRi12
|
||||
# CHECK-NEXT: t2ADDri
|
||||
# CHECK-NEXT: t2ADDri
|
||||
--- |
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv7em-arm-none-eabi"
|
||||
|
||||
; Function Attrs: norecurse nounwind optsize readonly
|
||||
define dso_local i32 @test(i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = load i32, i32* %a, align 4
|
||||
%add = add nsw i32 %0, 10
|
||||
%1 = load i32, i32* %b, align 4
|
||||
%add1 = add nsw i32 %1, 20
|
||||
%mul = mul nsw i32 %add1, %add
|
||||
ret i32 %mul
|
||||
}
|
||||
|
||||
attributes #0 = { "target-cpu"="cortex-m4" }
|
||||
|
||||
...
|
||||
---
|
||||
name: test
|
||||
alignment: 1
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr, preferred-register: '' }
|
||||
- { id: 1, class: gpr, preferred-register: '' }
|
||||
- { id: 2, class: gprnopc, preferred-register: '' }
|
||||
- { id: 3, class: rgpr, preferred-register: '' }
|
||||
- { id: 4, class: gprnopc, preferred-register: '' }
|
||||
- { id: 5, class: rgpr, preferred-register: '' }
|
||||
- { id: 6, class: rgpr, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '%0' }
|
||||
- { reg: '$r1', virtual-reg: '%1' }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $r0, $r1
|
||||
|
||||
%1:gpr = COPY $r1
|
||||
%0:gpr = COPY $r0
|
||||
%2:gprnopc = t2LDRi12 %0, 0, 14, $noreg :: (load 4 from %ir.a)
|
||||
%3:rgpr = nsw t2ADDri %2, 10, 14, $noreg, $noreg
|
||||
%4:gprnopc = t2LDRi12 %1, 0, 14, $noreg :: (load 4 from %ir.b)
|
||||
%5:rgpr = nsw t2ADDri %4, 20, 14, $noreg, $noreg
|
||||
%6:rgpr = nsw t2MUL %5, %3, 14, $noreg
|
||||
$r0 = COPY %6
|
||||
tBX_RET 14, $noreg, implicit $r0
|
||||
|
||||
...
|
|
@ -0,0 +1,52 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc %s -o - | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv7em-arm-none-eabi"
|
||||
|
||||
%struct.a = type { i32, %struct.b*, i8, i8, i8, i8, i8*, %struct.b*, i16, i16, i16, i16, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32 }
|
||||
%struct.b = type { i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, [16 x i8], [64 x i8], [128 x i8], i32, [68 x i8] }
|
||||
|
||||
define void @test(%struct.a* nocapture %dhcp, i16 zeroext %value) #0 {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: ldrh r3, [r0, #20]
|
||||
; CHECK-NEXT: ldr.w lr, [r0, #16]
|
||||
; CHECK-NEXT: lsr.w r12, r1, #8
|
||||
; CHECK-NEXT: adds r2, r3, #1
|
||||
; CHECK-NEXT: strh r2, [r0, #20]
|
||||
; CHECK-NEXT: add.w r2, lr, r3
|
||||
; CHECK-NEXT: strb.w r12, [r2, #240]
|
||||
; CHECK-NEXT: ldrh r2, [r0, #20]
|
||||
; CHECK-NEXT: ldr.w r12, [r0, #16]
|
||||
; CHECK-NEXT: adds r3, r2, #1
|
||||
; CHECK-NEXT: strh r3, [r0, #20]
|
||||
; CHECK-NEXT: add.w r0, r12, r2
|
||||
; CHECK-NEXT: strb.w r1, [r0, #240]
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%shr = lshr i16 %value, 8
|
||||
%conv1 = trunc i16 %shr to i8
|
||||
%msg_out = getelementptr inbounds %struct.a, %struct.a* %dhcp, i32 0, i32 7
|
||||
%0 = load %struct.b*, %struct.b** %msg_out, align 4
|
||||
%options_out_len = getelementptr inbounds %struct.a, %struct.a* %dhcp, i32 0, i32 8
|
||||
%1 = load i16, i16* %options_out_len, align 4
|
||||
%inc = add i16 %1, 1
|
||||
store i16 %inc, i16* %options_out_len, align 4
|
||||
%idxprom = zext i16 %1 to i32
|
||||
%arrayidx = getelementptr inbounds %struct.b, %struct.b* %0, i32 0, i32 15, i32 %idxprom
|
||||
store i8 %conv1, i8* %arrayidx, align 1
|
||||
%conv4 = trunc i16 %value to i8
|
||||
%2 = load %struct.b*, %struct.b** %msg_out, align 4
|
||||
%3 = load i16, i16* %options_out_len, align 4
|
||||
%inc8 = add i16 %3, 1
|
||||
store i16 %inc8, i16* %options_out_len, align 4
|
||||
%idxprom9 = zext i16 %3 to i32
|
||||
%arrayidx10 = getelementptr inbounds %struct.b, %struct.b* %2, i32 0, i32 15, i32 %idxprom9
|
||||
store i8 %conv4, i8* %arrayidx10, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { minsize optsize "target-cpu"="cortex-m4" }
|
Loading…
Reference in New Issue