2012-02-18 20:03:15 +08:00
|
|
|
//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
|
2011-01-26 10:03:37 +08:00
|
|
|
//
|
2010-10-05 14:04:14 +08:00
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2011-01-26 10:03:37 +08:00
|
|
|
//
|
2010-10-05 14:04:14 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file describes the X86 jump, return, call, and related instructions.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Control Flow Instructions.
|
|
|
|
//
|
|
|
|
|
|
|
|
// Return instructions.
|
2012-08-25 04:52:44 +08:00
|
|
|
//
|
|
|
|
// The X86retflag return instructions are variadic because we may add ST0 and
|
|
|
|
// ST1 arguments when returning values on the x87 stack.
|
2010-10-05 14:04:14 +08:00
|
|
|
let isTerminator = 1, isReturn = 1, isBarrier = 1,
|
2013-03-27 02:24:17 +08:00
|
|
|
hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
|
2014-01-08 20:58:07 +08:00
|
|
|
def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops),
|
2016-03-05 06:56:17 +08:00
|
|
|
"ret{l}", [], IIC_RET>, OpSize32,
|
2014-02-18 16:18:29 +08:00
|
|
|
Requires<[Not64BitMode]>;
|
2014-01-08 20:58:07 +08:00
|
|
|
def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops),
|
2016-03-05 06:56:17 +08:00
|
|
|
"ret{q}", [], IIC_RET>, OpSize32,
|
2014-02-18 16:18:29 +08:00
|
|
|
Requires<[In64BitMode]>;
|
2012-07-05 07:53:27 +08:00
|
|
|
def RETW : I <0xC3, RawFrm, (outs), (ins),
|
2012-04-11 09:10:53 +08:00
|
|
|
"ret{w}",
|
2014-02-02 17:25:09 +08:00
|
|
|
[], IIC_RET>, OpSize16;
|
2014-01-13 22:05:59 +08:00
|
|
|
def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
|
2014-01-08 20:58:07 +08:00
|
|
|
"ret{l}\t$amt",
|
2016-03-05 06:56:17 +08:00
|
|
|
[], IIC_RET_IMM>, OpSize32,
|
2014-01-13 22:05:59 +08:00
|
|
|
Requires<[Not64BitMode]>;
|
|
|
|
def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
|
|
|
|
"ret{q}\t$amt",
|
2016-03-05 06:56:17 +08:00
|
|
|
[], IIC_RET_IMM>, OpSize32,
|
2014-01-13 22:05:59 +08:00
|
|
|
Requires<[In64BitMode]>;
|
2012-07-05 07:53:27 +08:00
|
|
|
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
|
2012-04-11 09:10:53 +08:00
|
|
|
"ret{w}\t$amt",
|
2014-02-02 17:25:09 +08:00
|
|
|
[], IIC_RET_IMM>, OpSize16;
|
2010-11-13 02:54:56 +08:00
|
|
|
def LRETL : I <0xCB, RawFrm, (outs), (ins),
|
2014-02-02 17:25:09 +08:00
|
|
|
"{l}ret{l|f}", [], IIC_RET>, OpSize32;
|
2014-01-13 22:05:59 +08:00
|
|
|
def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
|
|
|
|
"{l}ret{|f}q", [], IIC_RET>, Requires<[In64BitMode]>;
|
2012-04-11 09:10:53 +08:00
|
|
|
def LRETW : I <0xCB, RawFrm, (outs), (ins),
|
2014-02-02 17:25:09 +08:00
|
|
|
"{l}ret{w|f}", [], IIC_RET>, OpSize16;
|
2014-01-13 22:05:59 +08:00
|
|
|
def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
|
2014-02-02 17:25:09 +08:00
|
|
|
"{l}ret{l|f}\t$amt", [], IIC_RET>, OpSize32;
|
2014-01-13 22:05:59 +08:00
|
|
|
def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
|
|
|
|
"{l}ret{|f}q\t$amt", [], IIC_RET>, Requires<[In64BitMode]>;
|
2010-10-19 01:04:36 +08:00
|
|
|
def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
|
2014-02-02 17:25:09 +08:00
|
|
|
"{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize16;
|
2015-12-21 22:07:14 +08:00
|
|
|
|
|
|
|
// The machine return from interrupt instruction, but sometimes we need to
|
|
|
|
// perform a post-epilogue stack adjustment. Codegen emits the pseudo form
|
|
|
|
// which expands to include an SP adjustment if necessary.
|
|
|
|
def IRET16 : I <0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>,
|
|
|
|
OpSize16;
|
|
|
|
def IRET32 : I <0xcf, RawFrm, (outs), (ins), "iret{l|d}", [],
|
|
|
|
IIC_IRET>, OpSize32;
|
|
|
|
def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", [],
|
|
|
|
IIC_IRET>, Requires<[In64BitMode]>;
|
|
|
|
let isCodeGenOnly = 1 in
|
2016-03-05 06:56:17 +08:00
|
|
|
def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>;
|
|
|
|
def RET : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Unconditional branches.
|
2013-03-27 02:24:17 +08:00
|
|
|
let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
|
2010-10-05 14:04:14 +08:00
|
|
|
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
|
2015-01-06 12:23:53 +08:00
|
|
|
"jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
|
2015-01-06 12:23:57 +08:00
|
|
|
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
|
2015-01-06 16:59:30 +08:00
|
|
|
def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget16:$dst),
|
2015-01-06 12:23:53 +08:00
|
|
|
"jmp\t$dst", [], IIC_JMP_REL>, OpSize16;
|
2015-01-06 16:59:30 +08:00
|
|
|
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget32:$dst),
|
2015-01-06 12:23:53 +08:00
|
|
|
"jmp\t$dst", [], IIC_JMP_REL>, OpSize32;
|
|
|
|
}
|
2010-10-05 14:04:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Conditional Branches.
|
2013-03-27 02:24:17 +08:00
|
|
|
let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
|
2010-10-05 14:04:14 +08:00
|
|
|
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
|
2015-01-06 12:23:53 +08:00
|
|
|
def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
|
|
|
|
[(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>;
|
2015-01-06 12:23:57 +08:00
|
|
|
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
|
2015-01-06 16:59:30 +08:00
|
|
|
def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
|
2015-01-06 12:23:53 +08:00
|
|
|
[], IIC_Jcc>, OpSize16, TB;
|
2015-01-06 16:59:30 +08:00
|
|
|
def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
|
2015-01-06 12:23:53 +08:00
|
|
|
[], IIC_Jcc>, TB, OpSize32;
|
|
|
|
}
|
2010-10-05 14:04:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
|
2015-01-06 12:23:53 +08:00
|
|
|
defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
|
2010-10-05 14:04:14 +08:00
|
|
|
defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
|
|
|
|
defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
|
|
|
|
defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
|
|
|
|
defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
|
|
|
|
defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
|
|
|
|
defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
|
|
|
|
defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
|
|
|
|
defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
|
|
|
|
defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
|
|
|
|
defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
|
|
|
|
defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
|
|
|
|
defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
|
|
|
|
defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
|
|
|
|
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
|
|
|
|
|
|
|
|
// jcx/jecx/jrcx instructions.
|
2013-09-03 11:56:17 +08:00
|
|
|
let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
|
2010-10-05 14:04:14 +08:00
|
|
|
// These are the 32-bit versions of this instruction for the asmparser. In
|
|
|
|
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
|
|
|
|
// jecxz.
|
|
|
|
let Uses = [CX] in
|
|
|
|
def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
|
2015-07-04 08:01:07 +08:00
|
|
|
"jcxz\t$dst", [], IIC_JCXZ>, AdSize16,
|
|
|
|
Requires<[Not64BitMode]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
let Uses = [ECX] in
|
2015-01-02 15:02:25 +08:00
|
|
|
def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
|
|
|
|
"jecxz\t$dst", [], IIC_JCXZ>, AdSize32;
|
2010-10-05 14:04:14 +08:00
|
|
|
|
|
|
|
let Uses = [RCX] in
|
|
|
|
def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
|
2015-07-04 08:01:07 +08:00
|
|
|
"jrcxz\t$dst", [], IIC_JCXZ>, AdSize64,
|
|
|
|
Requires<[In64BitMode]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Indirect branches
|
|
|
|
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
|
2014-01-08 20:57:49 +08:00
|
|
|
def JMP16r : I<0xFF, MRM4r, (outs), (ins GR16:$dst), "jmp{w}\t{*}$dst",
|
|
|
|
[(brind GR16:$dst)], IIC_JMP_REG>, Requires<[Not64BitMode]>,
|
2014-02-02 17:25:09 +08:00
|
|
|
OpSize16, Sched<[WriteJump]>;
|
2014-01-08 20:57:49 +08:00
|
|
|
def JMP16m : I<0xFF, MRM4m, (outs), (ins i16mem:$dst), "jmp{w}\t{*}$dst",
|
|
|
|
[(brind (loadi16 addr:$dst))], IIC_JMP_MEM>,
|
2014-02-02 17:25:09 +08:00
|
|
|
Requires<[Not64BitMode]>, OpSize16, Sched<[WriteJumpLd]>;
|
2014-01-08 20:57:49 +08:00
|
|
|
|
2010-10-05 14:04:14 +08:00
|
|
|
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
|
2013-12-20 10:04:49 +08:00
|
|
|
[(brind GR32:$dst)], IIC_JMP_REG>, Requires<[Not64BitMode]>,
|
2014-02-02 17:25:09 +08:00
|
|
|
OpSize32, Sched<[WriteJump]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
|
2013-03-27 02:24:17 +08:00
|
|
|
[(brind (loadi32 addr:$dst))], IIC_JMP_MEM>,
|
2014-02-02 17:25:09 +08:00
|
|
|
Requires<[Not64BitMode]>, OpSize32, Sched<[WriteJumpLd]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
|
|
|
|
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
|
2013-03-27 02:24:17 +08:00
|
|
|
[(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>,
|
|
|
|
Sched<[WriteJump]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
|
2013-03-27 02:24:17 +08:00
|
|
|
[(brind (loadi64 addr:$dst))], IIC_JMP_MEM>,
|
|
|
|
Requires<[In64BitMode]>, Sched<[WriteJumpLd]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
|
2014-12-20 15:43:27 +08:00
|
|
|
let Predicates = [Not64BitMode] in {
|
|
|
|
def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
|
|
|
|
(ins i16imm:$off, i16imm:$seg),
|
2014-12-21 07:05:52 +08:00
|
|
|
"ljmp{w}\t$seg, $off", [],
|
2014-12-20 15:43:27 +08:00
|
|
|
IIC_JMP_FAR_PTR>, OpSize16, Sched<[WriteJump]>;
|
|
|
|
def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
|
|
|
|
(ins i32imm:$off, i16imm:$seg),
|
2014-12-21 07:05:52 +08:00
|
|
|
"ljmp{l}\t$seg, $off", [],
|
2014-12-20 15:43:27 +08:00
|
|
|
IIC_JMP_FAR_PTR>, OpSize32, Sched<[WriteJump]>;
|
|
|
|
}
|
2010-10-05 14:04:14 +08:00
|
|
|
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
|
2013-03-27 02:24:17 +08:00
|
|
|
"ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
|
|
|
|
Sched<[WriteJump]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
|
2011-01-26 10:03:37 +08:00
|
|
|
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
|
2014-02-02 17:25:09 +08:00
|
|
|
"ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize16,
|
2013-03-27 02:24:17 +08:00
|
|
|
Sched<[WriteJumpLd]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
|
2014-02-02 17:25:09 +08:00
|
|
|
"ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize32,
|
2013-03-27 02:24:17 +08:00
|
|
|
Sched<[WriteJumpLd]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Loop instructions
|
2013-03-27 02:24:17 +08:00
|
|
|
let SchedRW = [WriteJump] in {
|
2012-02-02 07:20:51 +08:00
|
|
|
def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>;
|
|
|
|
def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>;
|
|
|
|
def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>;
|
2013-03-27 02:24:17 +08:00
|
|
|
}
|
2010-10-05 14:04:14 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Call Instructions...
|
|
|
|
//
|
|
|
|
let isCall = 1 in
|
|
|
|
// All calls clobber the non-callee saved registers. ESP is marked as
|
|
|
|
// a use to prevent stack-pointer assignments that appear immediately
|
|
|
|
// before calls from potentially appearing dead. Uses for argument
|
|
|
|
// registers are added manually.
|
Enable register mask operands for x86 calls.
Call instructions no longer have a list of 43 call-clobbered registers.
Instead, they get a single register mask operand with a bit vector of
call-preserved registers.
This saves a lot of memory, 42 x 32 bytes = 1344 bytes per call
instruction, and it speeds up building call instructions because those
43 imp-def operands no longer need to be added to use-def lists. (And
removed and shifted and re-added for every explicit call operand).
Passes like LiveVariables, LiveIntervals, RAGreedy, PEI, and
BranchFolding are significantly faster because they can deal with call
clobbers in bulk.
Overall, clang -O2 is between 0% and 8% faster, uniformly distributed
depending on call density in the compiled code. Debug builds using
clang -O0 are 0% - 3% faster.
I have verified that this patch doesn't change the assembly generated
for the LLVM nightly test suite when building with -disable-copyprop
and -disable-branch-fold.
Branch folding behaves slightly differently in a few cases because call
instructions have different hash values now.
Copy propagation flushes its data structures when it crosses a register
mask operand. This causes it to leave a few dead copies behind, on the
order of 20 instruction across the entire nightly test suite, including
SPEC. Fixing this properly would require the pass to use different data
structures.
llvm-svn: 150638
2012-02-16 08:02:50 +08:00
|
|
|
let Uses = [ESP] in {
|
2010-10-05 14:04:14 +08:00
|
|
|
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
|
2012-07-05 07:53:27 +08:00
|
|
|
(outs), (ins i32imm_pcrel:$dst),
|
2014-02-02 17:25:09 +08:00
|
|
|
"call{l}\t$dst", [], IIC_CALL_RI>, OpSize32,
|
2013-12-20 10:04:49 +08:00
|
|
|
Requires<[Not64BitMode]>, Sched<[WriteJump]>;
|
2014-12-22 04:05:06 +08:00
|
|
|
let hasSideEffects = 0 in
|
|
|
|
def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
|
|
|
|
(outs), (ins i16imm_pcrel:$dst),
|
|
|
|
"call{w}\t$dst", [], IIC_CALL_RI>, OpSize16,
|
|
|
|
Sched<[WriteJump]>;
|
2014-01-08 20:57:49 +08:00
|
|
|
def CALL16r : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
|
|
|
|
"call{w}\t{*}$dst", [(X86call GR16:$dst)], IIC_CALL_RI>,
|
2014-02-02 17:25:09 +08:00
|
|
|
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
|
2014-01-08 20:57:49 +08:00
|
|
|
def CALL16m : I<0xFF, MRM2m, (outs), (ins i16mem:$dst),
|
|
|
|
"call{w}\t{*}$dst", [(X86call (loadi16 addr:$dst))],
|
2014-02-02 17:25:09 +08:00
|
|
|
IIC_CALL_MEM>, OpSize16,
|
2014-01-08 20:57:49 +08:00
|
|
|
Requires<[Not64BitMode,FavorMemIndirectCall]>,
|
|
|
|
Sched<[WriteJumpLd]>;
|
2012-07-05 07:53:27 +08:00
|
|
|
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
|
2012-02-02 07:20:51 +08:00
|
|
|
"call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
|
2014-02-02 17:25:09 +08:00
|
|
|
OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
|
2012-07-05 07:53:27 +08:00
|
|
|
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
|
2013-03-27 02:24:17 +08:00
|
|
|
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))],
|
2014-02-02 17:25:09 +08:00
|
|
|
IIC_CALL_MEM>, OpSize32,
|
2013-12-20 10:04:49 +08:00
|
|
|
Requires<[Not64BitMode,FavorMemIndirectCall]>,
|
2013-03-29 07:13:21 +08:00
|
|
|
Sched<[WriteJumpLd]>;
|
2011-01-26 10:03:37 +08:00
|
|
|
|
2014-12-20 15:43:27 +08:00
|
|
|
let Predicates = [Not64BitMode] in {
|
|
|
|
def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
|
|
|
|
(ins i16imm:$off, i16imm:$seg),
|
2014-12-21 07:05:52 +08:00
|
|
|
"lcall{w}\t$seg, $off", [],
|
2014-12-20 15:43:27 +08:00
|
|
|
IIC_CALL_FAR_PTR>, OpSize16, Sched<[WriteJump]>;
|
|
|
|
def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
|
|
|
|
(ins i32imm:$off, i16imm:$seg),
|
2014-12-21 07:05:52 +08:00
|
|
|
"lcall{l}\t$seg, $off", [],
|
2014-12-20 15:43:27 +08:00
|
|
|
IIC_CALL_FAR_PTR>, OpSize32, Sched<[WriteJump]>;
|
|
|
|
}
|
2011-01-26 10:03:37 +08:00
|
|
|
|
2010-10-05 14:04:14 +08:00
|
|
|
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
|
2014-02-02 17:25:09 +08:00
|
|
|
"lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize16,
|
2013-03-27 02:24:17 +08:00
|
|
|
Sched<[WriteJumpLd]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
|
2014-02-02 17:25:09 +08:00
|
|
|
"lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize32,
|
2013-03-27 02:24:17 +08:00
|
|
|
Sched<[WriteJumpLd]>;
|
2010-10-05 14:04:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Tail call stuff.
|
|
|
|
|
|
|
|
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
|
2013-03-27 02:24:17 +08:00
|
|
|
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
|
Enable register mask operands for x86 calls.
Call instructions no longer have a list of 43 call-clobbered registers.
Instead, they get a single register mask operand with a bit vector of
call-preserved registers.
This saves a lot of memory, 42 x 32 bytes = 1344 bytes per call
instruction, and it speeds up building call instructions because those
43 imp-def operands no longer need to be added to use-def lists. (And
removed and shifted and re-added for every explicit call operand).
Passes like LiveVariables, LiveIntervals, RAGreedy, PEI, and
BranchFolding are significantly faster because they can deal with call
clobbers in bulk.
Overall, clang -O2 is between 0% and 8% faster, uniformly distributed
depending on call density in the compiled code. Debug builds using
clang -O0 are 0% - 3% faster.
I have verified that this patch doesn't change the assembly generated
for the LLVM nightly test suite when building with -disable-copyprop
and -disable-branch-fold.
Branch folding behaves slightly differently in a few cases because call
instructions have different hash values now.
Copy propagation flushes its data structures when it crosses a register
mask operand. This causes it to leave a few dead copies behind, on the
order of 20 instruction across the entire nightly test suite, including
SPEC. Fixing this properly would require the pass to use different data
structures.
llvm-svn: 150638
2012-02-16 08:02:50 +08:00
|
|
|
let Uses = [ESP] in {
|
2011-01-26 10:03:37 +08:00
|
|
|
def TCRETURNdi : PseudoI<(outs),
|
2012-07-05 07:53:27 +08:00
|
|
|
(ins i32imm_pcrel:$dst, i32imm:$offset), []>;
|
X86: Fold tail calls into conditional branches where possible (PR26302)
When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.
Example:
define void @f(i32 %x, i32 %y) {
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
before:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne .LBB0_2
jmp foo
.LBB0_2:
jmp bar
after:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne bar
.LBB0_1:
jmp foo
I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.
This patch only does 32-bit, but 64-bit would work similarly.
Differential Revision: https://reviews.llvm.org/D24108
llvm-svn: 280832
2016-09-08 01:52:14 +08:00
|
|
|
def TCRETURNdicc : PseudoI<(outs),
|
|
|
|
(ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
|
2011-01-26 10:03:37 +08:00
|
|
|
def TCRETURNri : PseudoI<(outs),
|
2012-07-05 07:53:27 +08:00
|
|
|
(ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
|
2010-10-05 14:04:14 +08:00
|
|
|
let mayLoad = 1 in
|
2011-01-26 10:03:37 +08:00
|
|
|
def TCRETURNmi : PseudoI<(outs),
|
2012-07-05 07:53:27 +08:00
|
|
|
(ins i32mem_TC:$dst, i32imm:$offset), []>;
|
2010-10-05 14:04:14 +08:00
|
|
|
|
|
|
|
// FIXME: The should be pseudo instructions that are lowered when going to
|
|
|
|
// mcinst.
|
|
|
|
def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
|
2012-07-05 07:53:27 +08:00
|
|
|
(ins i32imm_pcrel:$dst),
|
2015-01-31 05:03:31 +08:00
|
|
|
"jmp\t$dst",
|
2012-02-02 07:20:51 +08:00
|
|
|
[], IIC_JMP_REL>;
|
X86: Fold tail calls into conditional branches where possible (PR26302)
When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.
Example:
define void @f(i32 %x, i32 %y) {
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
before:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne .LBB0_2
jmp foo
.LBB0_2:
jmp bar
after:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne bar
.LBB0_1:
jmp foo
I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.
This patch only does 32-bit, but 64-bit would work similarly.
Differential Revision: https://reviews.llvm.org/D24108
llvm-svn: 280832
2016-09-08 01:52:14 +08:00
|
|
|
|
|
|
|
// This gets substituted to a conditional jump instruction in MC lowering.
|
|
|
|
def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
|
|
|
|
(ins i32imm_pcrel:$dst, i32imm:$cond),
|
|
|
|
"",
|
|
|
|
[], IIC_JMP_REL>;
|
|
|
|
|
2012-07-05 07:53:27 +08:00
|
|
|
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
|
2012-02-02 07:20:51 +08:00
|
|
|
"", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead.
|
2010-10-05 14:04:14 +08:00
|
|
|
let mayLoad = 1 in
|
2012-07-05 07:53:27 +08:00
|
|
|
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
|
2015-01-31 05:03:31 +08:00
|
|
|
"jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
|
2010-10-05 14:04:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Call Instructions...
|
|
|
|
//
|
|
|
|
|
2012-02-17 01:56:02 +08:00
|
|
|
// RSP is marked as a use to prevent stack-pointer assignments that appear
|
|
|
|
// immediately before calls from potentially appearing dead. Uses for argument
|
|
|
|
// registers are added manually.
|
2013-03-27 02:24:17 +08:00
|
|
|
let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
|
2012-02-17 01:56:02 +08:00
|
|
|
// NOTE: this pattern doesn't match "X86call imm", because we do not know
|
|
|
|
// that the offset between an arbitrary immediate and the call will fit in
|
|
|
|
// the 32-bit pcrel field that we have.
|
|
|
|
def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
|
2012-07-05 07:53:27 +08:00
|
|
|
(outs), (ins i64i32imm_pcrel:$dst),
|
2014-02-18 16:18:29 +08:00
|
|
|
"call{q}\t$dst", [], IIC_CALL_RI>, OpSize32,
|
2012-02-17 01:56:02 +08:00
|
|
|
Requires<[In64BitMode]>;
|
2012-07-05 07:53:27 +08:00
|
|
|
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
|
2012-02-17 01:56:02 +08:00
|
|
|
"call{q}\t{*}$dst", [(X86call GR64:$dst)],
|
|
|
|
IIC_CALL_RI>,
|
|
|
|
Requires<[In64BitMode]>;
|
2012-07-05 07:53:27 +08:00
|
|
|
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
|
2012-02-17 01:56:02 +08:00
|
|
|
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
|
|
|
|
IIC_CALL_MEM>,
|
2013-03-29 07:13:21 +08:00
|
|
|
Requires<[In64BitMode,FavorMemIndirectCall]>;
|
2012-02-17 01:56:02 +08:00
|
|
|
|
|
|
|
def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
|
|
|
|
"lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
|
|
|
|
}
|
2010-10-05 14:04:14 +08:00
|
|
|
|
|
|
|
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
|
2013-03-27 02:24:17 +08:00
|
|
|
isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
|
|
|
|
SchedRW = [WriteJump] in {
|
2010-12-01 05:37:36 +08:00
|
|
|
def TCRETURNdi64 : PseudoI<(outs),
|
2012-07-05 07:53:27 +08:00
|
|
|
(ins i64i32imm_pcrel:$dst, i32imm:$offset),
|
2010-12-01 05:37:36 +08:00
|
|
|
[]>;
|
|
|
|
def TCRETURNri64 : PseudoI<(outs),
|
2012-07-05 07:53:27 +08:00
|
|
|
(ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
|
2010-10-05 14:04:14 +08:00
|
|
|
let mayLoad = 1 in
|
2011-01-26 10:03:37 +08:00
|
|
|
def TCRETURNmi64 : PseudoI<(outs),
|
2012-07-05 07:53:27 +08:00
|
|
|
(ins i64mem_TC:$dst, i32imm:$offset), []>;
|
2010-10-05 14:04:14 +08:00
|
|
|
|
2015-01-31 05:03:31 +08:00
|
|
|
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
|
|
|
|
"jmp\t$dst", [], IIC_JMP_REL>;
|
2012-07-05 07:53:27 +08:00
|
|
|
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
|
2015-01-31 05:03:31 +08:00
|
|
|
"jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
|
2010-10-05 14:04:14 +08:00
|
|
|
|
|
|
|
let mayLoad = 1 in
|
2012-07-05 07:53:27 +08:00
|
|
|
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
|
2015-01-31 05:03:31 +08:00
|
|
|
"jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
|
|
|
|
|
2016-09-09 07:35:10 +08:00
|
|
|
// Win64 wants indirect jumps leaving the function to have a REX_W prefix.
|
2015-01-31 05:03:31 +08:00
|
|
|
let hasREX_WPrefix = 1 in {
|
|
|
|
def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
|
|
|
|
"rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
|
|
|
|
|
|
|
|
let mayLoad = 1 in
|
|
|
|
def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
|
|
|
|
"rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
|
|
|
|
}
|
2010-10-05 14:04:14 +08:00
|
|
|
}
|