2002-11-23 06:42:50 +08:00
|
|
|
//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
|
2005-04-22 07:38:14 +08:00
|
|
|
//
|
2003-10-21 03:43:21 +08:00
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 04:36:04 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-04-22 07:38:14 +08:00
|
|
|
//
|
2003-10-21 03:43:21 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2002-10-26 06:55:53 +08:00
|
|
|
//
|
2003-01-15 06:00:31 +08:00
|
|
|
// This file contains the X86 implementation of the TargetInstrInfo class.
|
2002-10-26 06:55:53 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2002-10-30 05:05:24 +08:00
|
|
|
#include "X86InstrInfo.h"
|
2002-12-03 13:42:53 +08:00
|
|
|
#include "X86.h"
|
2006-05-31 05:45:53 +08:00
|
|
|
#include "X86GenInstrInfo.inc"
|
2005-01-02 10:37:07 +08:00
|
|
|
#include "X86InstrBuilder.h"
|
2008-01-05 07:57:37 +08:00
|
|
|
#include "X86MachineFunctionInfo.h"
|
2006-05-31 05:45:53 +08:00
|
|
|
#include "X86Subtarget.h"
|
|
|
|
#include "X86TargetMachine.h"
|
2009-01-06 01:59:02 +08:00
|
|
|
#include "llvm/DerivedTypes.h"
|
2009-07-13 12:09:18 +08:00
|
|
|
#include "llvm/LLVMContext.h"
|
2007-09-07 12:06:50 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2008-12-03 13:21:24 +08:00
|
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
2008-01-05 07:57:37 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2003-05-24 08:09:50 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2007-12-31 12:13:23 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2006-12-02 05:52:41 +08:00
|
|
|
#include "llvm/CodeGen/LiveVariables.h"
|
2009-11-13 04:55:29 +08:00
|
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
2010-04-27 07:37:21 +08:00
|
|
|
#include "llvm/MC/MCInst.h"
|
2008-01-07 09:35:02 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2010-01-05 09:29:29 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2009-07-09 02:01:40 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2007-09-25 09:57:46 +08:00
|
|
|
#include "llvm/Target/TargetOptions.h"
|
2009-08-23 04:48:53 +08:00
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
2009-11-13 04:55:29 +08:00
|
|
|
|
|
|
|
#include <limits>
|
|
|
|
|
2003-11-12 06:41:34 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2009-08-23 11:41:05 +08:00
|
|
|
static cl::opt<bool>
|
|
|
|
NoFusing("disable-spill-fusing",
|
|
|
|
cl::desc("Disable fusing of spill code into instructions"));
|
|
|
|
static cl::opt<bool>
|
|
|
|
PrintFailedFusing("print-failed-fuse-candidates",
|
|
|
|
cl::desc("Print instructions that the allocator wants to"
|
|
|
|
" fuse, but the X86 backend currently can't"),
|
|
|
|
cl::Hidden);
|
|
|
|
static cl::opt<bool>
|
|
|
|
ReMatPICStubLoad("remat-pic-stub-load",
|
|
|
|
cl::desc("Re-materialize load from stub in PIC mode"),
|
|
|
|
cl::init(false), cl::Hidden);
|
2008-01-07 09:35:02 +08:00
|
|
|
|
2006-05-31 05:45:53 +08:00
|
|
|
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
2008-01-01 09:03:04 +08:00
|
|
|
: TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
|
2006-09-08 14:48:29 +08:00
|
|
|
TM(tm), RI(tm, *this) {
|
2008-01-07 09:35:02 +08:00
|
|
|
SmallVector<unsigned,16> AmbEntries;
|
|
|
|
static const unsigned OpTbl2Addr[][2] = {
|
|
|
|
{ X86::ADC32ri, X86::ADC32mi },
|
|
|
|
{ X86::ADC32ri8, X86::ADC32mi8 },
|
|
|
|
{ X86::ADC32rr, X86::ADC32mr },
|
|
|
|
{ X86::ADC64ri32, X86::ADC64mi32 },
|
|
|
|
{ X86::ADC64ri8, X86::ADC64mi8 },
|
|
|
|
{ X86::ADC64rr, X86::ADC64mr },
|
|
|
|
{ X86::ADD16ri, X86::ADD16mi },
|
|
|
|
{ X86::ADD16ri8, X86::ADD16mi8 },
|
|
|
|
{ X86::ADD16rr, X86::ADD16mr },
|
|
|
|
{ X86::ADD32ri, X86::ADD32mi },
|
|
|
|
{ X86::ADD32ri8, X86::ADD32mi8 },
|
|
|
|
{ X86::ADD32rr, X86::ADD32mr },
|
|
|
|
{ X86::ADD64ri32, X86::ADD64mi32 },
|
|
|
|
{ X86::ADD64ri8, X86::ADD64mi8 },
|
|
|
|
{ X86::ADD64rr, X86::ADD64mr },
|
|
|
|
{ X86::ADD8ri, X86::ADD8mi },
|
|
|
|
{ X86::ADD8rr, X86::ADD8mr },
|
|
|
|
{ X86::AND16ri, X86::AND16mi },
|
|
|
|
{ X86::AND16ri8, X86::AND16mi8 },
|
|
|
|
{ X86::AND16rr, X86::AND16mr },
|
|
|
|
{ X86::AND32ri, X86::AND32mi },
|
|
|
|
{ X86::AND32ri8, X86::AND32mi8 },
|
|
|
|
{ X86::AND32rr, X86::AND32mr },
|
|
|
|
{ X86::AND64ri32, X86::AND64mi32 },
|
|
|
|
{ X86::AND64ri8, X86::AND64mi8 },
|
|
|
|
{ X86::AND64rr, X86::AND64mr },
|
|
|
|
{ X86::AND8ri, X86::AND8mi },
|
|
|
|
{ X86::AND8rr, X86::AND8mr },
|
|
|
|
{ X86::DEC16r, X86::DEC16m },
|
|
|
|
{ X86::DEC32r, X86::DEC32m },
|
|
|
|
{ X86::DEC64_16r, X86::DEC64_16m },
|
|
|
|
{ X86::DEC64_32r, X86::DEC64_32m },
|
|
|
|
{ X86::DEC64r, X86::DEC64m },
|
|
|
|
{ X86::DEC8r, X86::DEC8m },
|
|
|
|
{ X86::INC16r, X86::INC16m },
|
|
|
|
{ X86::INC32r, X86::INC32m },
|
|
|
|
{ X86::INC64_16r, X86::INC64_16m },
|
|
|
|
{ X86::INC64_32r, X86::INC64_32m },
|
|
|
|
{ X86::INC64r, X86::INC64m },
|
|
|
|
{ X86::INC8r, X86::INC8m },
|
|
|
|
{ X86::NEG16r, X86::NEG16m },
|
|
|
|
{ X86::NEG32r, X86::NEG32m },
|
|
|
|
{ X86::NEG64r, X86::NEG64m },
|
|
|
|
{ X86::NEG8r, X86::NEG8m },
|
|
|
|
{ X86::NOT16r, X86::NOT16m },
|
|
|
|
{ X86::NOT32r, X86::NOT32m },
|
|
|
|
{ X86::NOT64r, X86::NOT64m },
|
|
|
|
{ X86::NOT8r, X86::NOT8m },
|
|
|
|
{ X86::OR16ri, X86::OR16mi },
|
|
|
|
{ X86::OR16ri8, X86::OR16mi8 },
|
|
|
|
{ X86::OR16rr, X86::OR16mr },
|
|
|
|
{ X86::OR32ri, X86::OR32mi },
|
|
|
|
{ X86::OR32ri8, X86::OR32mi8 },
|
|
|
|
{ X86::OR32rr, X86::OR32mr },
|
|
|
|
{ X86::OR64ri32, X86::OR64mi32 },
|
|
|
|
{ X86::OR64ri8, X86::OR64mi8 },
|
|
|
|
{ X86::OR64rr, X86::OR64mr },
|
|
|
|
{ X86::OR8ri, X86::OR8mi },
|
|
|
|
{ X86::OR8rr, X86::OR8mr },
|
|
|
|
{ X86::ROL16r1, X86::ROL16m1 },
|
|
|
|
{ X86::ROL16rCL, X86::ROL16mCL },
|
|
|
|
{ X86::ROL16ri, X86::ROL16mi },
|
|
|
|
{ X86::ROL32r1, X86::ROL32m1 },
|
|
|
|
{ X86::ROL32rCL, X86::ROL32mCL },
|
|
|
|
{ X86::ROL32ri, X86::ROL32mi },
|
|
|
|
{ X86::ROL64r1, X86::ROL64m1 },
|
|
|
|
{ X86::ROL64rCL, X86::ROL64mCL },
|
|
|
|
{ X86::ROL64ri, X86::ROL64mi },
|
|
|
|
{ X86::ROL8r1, X86::ROL8m1 },
|
|
|
|
{ X86::ROL8rCL, X86::ROL8mCL },
|
|
|
|
{ X86::ROL8ri, X86::ROL8mi },
|
|
|
|
{ X86::ROR16r1, X86::ROR16m1 },
|
|
|
|
{ X86::ROR16rCL, X86::ROR16mCL },
|
|
|
|
{ X86::ROR16ri, X86::ROR16mi },
|
|
|
|
{ X86::ROR32r1, X86::ROR32m1 },
|
|
|
|
{ X86::ROR32rCL, X86::ROR32mCL },
|
|
|
|
{ X86::ROR32ri, X86::ROR32mi },
|
|
|
|
{ X86::ROR64r1, X86::ROR64m1 },
|
|
|
|
{ X86::ROR64rCL, X86::ROR64mCL },
|
|
|
|
{ X86::ROR64ri, X86::ROR64mi },
|
|
|
|
{ X86::ROR8r1, X86::ROR8m1 },
|
|
|
|
{ X86::ROR8rCL, X86::ROR8mCL },
|
|
|
|
{ X86::ROR8ri, X86::ROR8mi },
|
|
|
|
{ X86::SAR16r1, X86::SAR16m1 },
|
|
|
|
{ X86::SAR16rCL, X86::SAR16mCL },
|
|
|
|
{ X86::SAR16ri, X86::SAR16mi },
|
|
|
|
{ X86::SAR32r1, X86::SAR32m1 },
|
|
|
|
{ X86::SAR32rCL, X86::SAR32mCL },
|
|
|
|
{ X86::SAR32ri, X86::SAR32mi },
|
|
|
|
{ X86::SAR64r1, X86::SAR64m1 },
|
|
|
|
{ X86::SAR64rCL, X86::SAR64mCL },
|
|
|
|
{ X86::SAR64ri, X86::SAR64mi },
|
|
|
|
{ X86::SAR8r1, X86::SAR8m1 },
|
|
|
|
{ X86::SAR8rCL, X86::SAR8mCL },
|
|
|
|
{ X86::SAR8ri, X86::SAR8mi },
|
|
|
|
{ X86::SBB32ri, X86::SBB32mi },
|
|
|
|
{ X86::SBB32ri8, X86::SBB32mi8 },
|
|
|
|
{ X86::SBB32rr, X86::SBB32mr },
|
|
|
|
{ X86::SBB64ri32, X86::SBB64mi32 },
|
|
|
|
{ X86::SBB64ri8, X86::SBB64mi8 },
|
|
|
|
{ X86::SBB64rr, X86::SBB64mr },
|
|
|
|
{ X86::SHL16rCL, X86::SHL16mCL },
|
|
|
|
{ X86::SHL16ri, X86::SHL16mi },
|
|
|
|
{ X86::SHL32rCL, X86::SHL32mCL },
|
|
|
|
{ X86::SHL32ri, X86::SHL32mi },
|
|
|
|
{ X86::SHL64rCL, X86::SHL64mCL },
|
|
|
|
{ X86::SHL64ri, X86::SHL64mi },
|
|
|
|
{ X86::SHL8rCL, X86::SHL8mCL },
|
|
|
|
{ X86::SHL8ri, X86::SHL8mi },
|
|
|
|
{ X86::SHLD16rrCL, X86::SHLD16mrCL },
|
|
|
|
{ X86::SHLD16rri8, X86::SHLD16mri8 },
|
|
|
|
{ X86::SHLD32rrCL, X86::SHLD32mrCL },
|
|
|
|
{ X86::SHLD32rri8, X86::SHLD32mri8 },
|
|
|
|
{ X86::SHLD64rrCL, X86::SHLD64mrCL },
|
|
|
|
{ X86::SHLD64rri8, X86::SHLD64mri8 },
|
|
|
|
{ X86::SHR16r1, X86::SHR16m1 },
|
|
|
|
{ X86::SHR16rCL, X86::SHR16mCL },
|
|
|
|
{ X86::SHR16ri, X86::SHR16mi },
|
|
|
|
{ X86::SHR32r1, X86::SHR32m1 },
|
|
|
|
{ X86::SHR32rCL, X86::SHR32mCL },
|
|
|
|
{ X86::SHR32ri, X86::SHR32mi },
|
|
|
|
{ X86::SHR64r1, X86::SHR64m1 },
|
|
|
|
{ X86::SHR64rCL, X86::SHR64mCL },
|
|
|
|
{ X86::SHR64ri, X86::SHR64mi },
|
|
|
|
{ X86::SHR8r1, X86::SHR8m1 },
|
|
|
|
{ X86::SHR8rCL, X86::SHR8mCL },
|
|
|
|
{ X86::SHR8ri, X86::SHR8mi },
|
|
|
|
{ X86::SHRD16rrCL, X86::SHRD16mrCL },
|
|
|
|
{ X86::SHRD16rri8, X86::SHRD16mri8 },
|
|
|
|
{ X86::SHRD32rrCL, X86::SHRD32mrCL },
|
|
|
|
{ X86::SHRD32rri8, X86::SHRD32mri8 },
|
|
|
|
{ X86::SHRD64rrCL, X86::SHRD64mrCL },
|
|
|
|
{ X86::SHRD64rri8, X86::SHRD64mri8 },
|
|
|
|
{ X86::SUB16ri, X86::SUB16mi },
|
|
|
|
{ X86::SUB16ri8, X86::SUB16mi8 },
|
|
|
|
{ X86::SUB16rr, X86::SUB16mr },
|
|
|
|
{ X86::SUB32ri, X86::SUB32mi },
|
|
|
|
{ X86::SUB32ri8, X86::SUB32mi8 },
|
|
|
|
{ X86::SUB32rr, X86::SUB32mr },
|
|
|
|
{ X86::SUB64ri32, X86::SUB64mi32 },
|
|
|
|
{ X86::SUB64ri8, X86::SUB64mi8 },
|
|
|
|
{ X86::SUB64rr, X86::SUB64mr },
|
|
|
|
{ X86::SUB8ri, X86::SUB8mi },
|
|
|
|
{ X86::SUB8rr, X86::SUB8mr },
|
|
|
|
{ X86::XOR16ri, X86::XOR16mi },
|
|
|
|
{ X86::XOR16ri8, X86::XOR16mi8 },
|
|
|
|
{ X86::XOR16rr, X86::XOR16mr },
|
|
|
|
{ X86::XOR32ri, X86::XOR32mi },
|
|
|
|
{ X86::XOR32ri8, X86::XOR32mi8 },
|
|
|
|
{ X86::XOR32rr, X86::XOR32mr },
|
|
|
|
{ X86::XOR64ri32, X86::XOR64mi32 },
|
|
|
|
{ X86::XOR64ri8, X86::XOR64mi8 },
|
|
|
|
{ X86::XOR64rr, X86::XOR64mr },
|
|
|
|
{ X86::XOR8ri, X86::XOR8mi },
|
|
|
|
{ X86::XOR8rr, X86::XOR8mr }
|
|
|
|
};
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
|
|
|
|
unsigned RegOp = OpTbl2Addr[i][0];
|
|
|
|
unsigned MemOp = OpTbl2Addr[i][1];
|
2008-07-08 01:46:23 +08:00
|
|
|
if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
|
2009-07-15 14:10:07 +08:00
|
|
|
std::make_pair(MemOp,0))).second)
|
2008-01-07 09:35:02 +08:00
|
|
|
assert(false && "Duplicated entries?");
|
2009-07-15 14:10:07 +08:00
|
|
|
// Index 0, folded load and store, no alignment requirement.
|
|
|
|
unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
|
2008-07-08 01:46:23 +08:00
|
|
|
std::make_pair(RegOp,
|
|
|
|
AuxInfo))).second)
|
2008-01-07 09:35:02 +08:00
|
|
|
AmbEntries.push_back(MemOp);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the third value is 1, then it's folding either a load or a store.
|
2009-07-15 14:10:07 +08:00
|
|
|
static const unsigned OpTbl0[][4] = {
|
|
|
|
{ X86::BT16ri8, X86::BT16mi8, 1, 0 },
|
|
|
|
{ X86::BT32ri8, X86::BT32mi8, 1, 0 },
|
|
|
|
{ X86::BT64ri8, X86::BT64mi8, 1, 0 },
|
|
|
|
{ X86::CALL32r, X86::CALL32m, 1, 0 },
|
|
|
|
{ X86::CALL64r, X86::CALL64m, 1, 0 },
|
2010-08-18 05:06:01 +08:00
|
|
|
{ X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
|
2009-07-15 14:10:07 +08:00
|
|
|
{ X86::CMP16ri, X86::CMP16mi, 1, 0 },
|
|
|
|
{ X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
|
|
|
|
{ X86::CMP16rr, X86::CMP16mr, 1, 0 },
|
|
|
|
{ X86::CMP32ri, X86::CMP32mi, 1, 0 },
|
|
|
|
{ X86::CMP32ri8, X86::CMP32mi8, 1, 0 },
|
|
|
|
{ X86::CMP32rr, X86::CMP32mr, 1, 0 },
|
|
|
|
{ X86::CMP64ri32, X86::CMP64mi32, 1, 0 },
|
|
|
|
{ X86::CMP64ri8, X86::CMP64mi8, 1, 0 },
|
|
|
|
{ X86::CMP64rr, X86::CMP64mr, 1, 0 },
|
|
|
|
{ X86::CMP8ri, X86::CMP8mi, 1, 0 },
|
|
|
|
{ X86::CMP8rr, X86::CMP8mr, 1, 0 },
|
|
|
|
{ X86::DIV16r, X86::DIV16m, 1, 0 },
|
|
|
|
{ X86::DIV32r, X86::DIV32m, 1, 0 },
|
|
|
|
{ X86::DIV64r, X86::DIV64m, 1, 0 },
|
|
|
|
{ X86::DIV8r, X86::DIV8m, 1, 0 },
|
|
|
|
{ X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
|
|
|
|
{ X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 },
|
|
|
|
{ X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 },
|
|
|
|
{ X86::IDIV16r, X86::IDIV16m, 1, 0 },
|
|
|
|
{ X86::IDIV32r, X86::IDIV32m, 1, 0 },
|
|
|
|
{ X86::IDIV64r, X86::IDIV64m, 1, 0 },
|
|
|
|
{ X86::IDIV8r, X86::IDIV8m, 1, 0 },
|
|
|
|
{ X86::IMUL16r, X86::IMUL16m, 1, 0 },
|
|
|
|
{ X86::IMUL32r, X86::IMUL32m, 1, 0 },
|
|
|
|
{ X86::IMUL64r, X86::IMUL64m, 1, 0 },
|
|
|
|
{ X86::IMUL8r, X86::IMUL8m, 1, 0 },
|
|
|
|
{ X86::JMP32r, X86::JMP32m, 1, 0 },
|
|
|
|
{ X86::JMP64r, X86::JMP64m, 1, 0 },
|
|
|
|
{ X86::MOV16ri, X86::MOV16mi, 0, 0 },
|
|
|
|
{ X86::MOV16rr, X86::MOV16mr, 0, 0 },
|
|
|
|
{ X86::MOV32ri, X86::MOV32mi, 0, 0 },
|
|
|
|
{ X86::MOV32rr, X86::MOV32mr, 0, 0 },
|
2010-03-14 11:48:46 +08:00
|
|
|
{ X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 },
|
2009-07-15 14:10:07 +08:00
|
|
|
{ X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
|
|
|
|
{ X86::MOV64rr, X86::MOV64mr, 0, 0 },
|
|
|
|
{ X86::MOV8ri, X86::MOV8mi, 0, 0 },
|
|
|
|
{ X86::MOV8rr, X86::MOV8mr, 0, 0 },
|
|
|
|
{ X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
|
|
|
|
{ X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 },
|
|
|
|
{ X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 },
|
|
|
|
{ X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
|
|
|
|
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
|
|
|
|
{ X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
|
|
|
|
{ X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
|
|
|
|
{ X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
|
|
|
|
{ X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
|
|
|
|
{ X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
|
|
|
|
{ X86::MUL16r, X86::MUL16m, 1, 0 },
|
|
|
|
{ X86::MUL32r, X86::MUL32m, 1, 0 },
|
|
|
|
{ X86::MUL64r, X86::MUL64m, 1, 0 },
|
|
|
|
{ X86::MUL8r, X86::MUL8m, 1, 0 },
|
|
|
|
{ X86::SETAEr, X86::SETAEm, 0, 0 },
|
|
|
|
{ X86::SETAr, X86::SETAm, 0, 0 },
|
|
|
|
{ X86::SETBEr, X86::SETBEm, 0, 0 },
|
|
|
|
{ X86::SETBr, X86::SETBm, 0, 0 },
|
|
|
|
{ X86::SETEr, X86::SETEm, 0, 0 },
|
|
|
|
{ X86::SETGEr, X86::SETGEm, 0, 0 },
|
|
|
|
{ X86::SETGr, X86::SETGm, 0, 0 },
|
|
|
|
{ X86::SETLEr, X86::SETLEm, 0, 0 },
|
|
|
|
{ X86::SETLr, X86::SETLm, 0, 0 },
|
|
|
|
{ X86::SETNEr, X86::SETNEm, 0, 0 },
|
|
|
|
{ X86::SETNOr, X86::SETNOm, 0, 0 },
|
|
|
|
{ X86::SETNPr, X86::SETNPm, 0, 0 },
|
|
|
|
{ X86::SETNSr, X86::SETNSm, 0, 0 },
|
|
|
|
{ X86::SETOr, X86::SETOm, 0, 0 },
|
|
|
|
{ X86::SETPr, X86::SETPm, 0, 0 },
|
|
|
|
{ X86::SETSr, X86::SETSm, 0, 0 },
|
|
|
|
{ X86::TAILJMPr, X86::TAILJMPm, 1, 0 },
|
2010-03-14 11:48:46 +08:00
|
|
|
{ X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 },
|
2009-07-15 14:10:07 +08:00
|
|
|
{ X86::TEST16ri, X86::TEST16mi, 1, 0 },
|
|
|
|
{ X86::TEST32ri, X86::TEST32mi, 1, 0 },
|
|
|
|
{ X86::TEST64ri32, X86::TEST64mi32, 1, 0 },
|
|
|
|
{ X86::TEST8ri, X86::TEST8mi, 1, 0 }
|
2008-01-07 09:35:02 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
|
|
|
|
unsigned RegOp = OpTbl0[i][0];
|
|
|
|
unsigned MemOp = OpTbl0[i][1];
|
2009-07-15 14:10:07 +08:00
|
|
|
unsigned Align = OpTbl0[i][3];
|
2008-07-08 01:46:23 +08:00
|
|
|
if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
|
2009-07-15 14:10:07 +08:00
|
|
|
std::make_pair(MemOp,Align))).second)
|
2008-01-07 09:35:02 +08:00
|
|
|
assert(false && "Duplicated entries?");
|
|
|
|
unsigned FoldedLoad = OpTbl0[i][2];
|
|
|
|
// Index 0, folded load or store.
|
|
|
|
unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
|
|
|
|
if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
|
|
|
|
if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
|
2008-07-08 01:46:23 +08:00
|
|
|
std::make_pair(RegOp, AuxInfo))).second)
|
2008-01-07 09:35:02 +08:00
|
|
|
AmbEntries.push_back(MemOp);
|
|
|
|
}
|
|
|
|
|
2009-07-15 14:10:07 +08:00
|
|
|
static const unsigned OpTbl1[][3] = {
|
|
|
|
{ X86::CMP16rr, X86::CMP16rm, 0 },
|
|
|
|
{ X86::CMP32rr, X86::CMP32rm, 0 },
|
|
|
|
{ X86::CMP64rr, X86::CMP64rm, 0 },
|
|
|
|
{ X86::CMP8rr, X86::CMP8rm, 0 },
|
|
|
|
{ X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
|
|
|
|
{ X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 },
|
|
|
|
{ X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
|
|
|
|
{ X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 },
|
|
|
|
{ X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
|
|
|
|
{ X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
|
|
|
|
{ X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
|
|
|
|
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
|
|
|
|
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
|
|
|
|
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
|
|
|
|
{ X86::FsMOVAPDrr, X86::MOVSDrm, 0 },
|
|
|
|
{ X86::FsMOVAPSrr, X86::MOVSSrm, 0 },
|
|
|
|
{ X86::IMUL16rri, X86::IMUL16rmi, 0 },
|
|
|
|
{ X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
|
|
|
|
{ X86::IMUL32rri, X86::IMUL32rmi, 0 },
|
|
|
|
{ X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
|
|
|
|
{ X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
|
|
|
|
{ X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
|
|
|
|
{ X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
|
|
|
|
{ X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
|
|
|
|
{ X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
|
|
|
|
{ X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
|
|
|
|
{ X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 },
|
|
|
|
{ X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 },
|
|
|
|
{ X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 },
|
|
|
|
{ X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
|
|
|
|
{ X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
|
|
|
|
{ X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
|
|
|
|
{ X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
|
|
|
|
{ X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 },
|
|
|
|
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
|
|
|
|
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
|
|
|
|
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
|
|
|
|
{ X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
|
|
|
|
{ X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 },
|
|
|
|
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
|
|
|
|
{ X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
|
|
|
|
{ X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
|
|
|
|
{ X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
|
|
|
|
{ X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
|
|
|
|
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
|
|
|
|
{ X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
|
|
|
|
{ X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
|
|
|
|
{ X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
|
|
|
|
{ X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 },
|
|
|
|
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
|
|
|
|
{ X86::MOV16rr, X86::MOV16rm, 0 },
|
|
|
|
{ X86::MOV32rr, X86::MOV32rm, 0 },
|
2010-03-14 11:48:46 +08:00
|
|
|
{ X86::MOV32rr_TC, X86::MOV32rm_TC, 0 },
|
2009-07-15 14:10:07 +08:00
|
|
|
{ X86::MOV64rr, X86::MOV64rm, 0 },
|
|
|
|
{ X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
|
|
|
|
{ X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
|
|
|
|
{ X86::MOV8rr, X86::MOV8rm, 0 },
|
|
|
|
{ X86::MOVAPDrr, X86::MOVAPDrm, 16 },
|
|
|
|
{ X86::MOVAPSrr, X86::MOVAPSrm, 16 },
|
|
|
|
{ X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
|
|
|
|
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
|
|
|
|
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
|
|
|
|
{ X86::MOVDQArr, X86::MOVDQArm, 16 },
|
|
|
|
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
|
|
|
|
{ X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
|
|
|
|
{ X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
|
|
|
|
{ X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
|
|
|
|
{ X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
|
|
|
|
{ X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
|
|
|
|
{ X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
|
|
|
|
{ X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
|
|
|
|
{ X86::MOVUPDrr, X86::MOVUPDrm, 16 },
|
2010-01-21 08:55:14 +08:00
|
|
|
{ X86::MOVUPSrr, X86::MOVUPSrm, 0 },
|
2009-07-15 14:10:07 +08:00
|
|
|
{ X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
|
|
|
|
{ X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
|
|
|
|
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
|
|
|
|
{ X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
|
|
|
|
{ X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
|
|
|
|
{ X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
|
|
|
|
{ X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
|
|
|
|
{ X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
|
|
|
|
{ X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
|
|
|
|
{ X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
|
|
|
|
{ X86::PSHUFDri, X86::PSHUFDmi, 16 },
|
|
|
|
{ X86::PSHUFHWri, X86::PSHUFHWmi, 16 },
|
|
|
|
{ X86::PSHUFLWri, X86::PSHUFLWmi, 16 },
|
|
|
|
{ X86::RCPPSr, X86::RCPPSm, 16 },
|
|
|
|
{ X86::RCPPSr_Int, X86::RCPPSm_Int, 16 },
|
|
|
|
{ X86::RSQRTPSr, X86::RSQRTPSm, 16 },
|
|
|
|
{ X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 },
|
|
|
|
{ X86::RSQRTSSr, X86::RSQRTSSm, 0 },
|
|
|
|
{ X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 },
|
|
|
|
{ X86::SQRTPDr, X86::SQRTPDm, 16 },
|
|
|
|
{ X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 },
|
|
|
|
{ X86::SQRTPSr, X86::SQRTPSm, 16 },
|
|
|
|
{ X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 },
|
|
|
|
{ X86::SQRTSDr, X86::SQRTSDm, 0 },
|
|
|
|
{ X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 },
|
|
|
|
{ X86::SQRTSSr, X86::SQRTSSm, 0 },
|
|
|
|
{ X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 },
|
|
|
|
{ X86::TEST16rr, X86::TEST16rm, 0 },
|
|
|
|
{ X86::TEST32rr, X86::TEST32rm, 0 },
|
|
|
|
{ X86::TEST64rr, X86::TEST64rm, 0 },
|
|
|
|
{ X86::TEST8rr, X86::TEST8rm, 0 },
|
2008-01-07 09:35:02 +08:00
|
|
|
// FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
|
2009-07-15 14:10:07 +08:00
|
|
|
{ X86::UCOMISDrr, X86::UCOMISDrm, 0 },
|
|
|
|
{ X86::UCOMISSrr, X86::UCOMISSrm, 0 }
|
2008-01-07 09:35:02 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
|
|
|
|
unsigned RegOp = OpTbl1[i][0];
|
|
|
|
unsigned MemOp = OpTbl1[i][1];
|
2009-07-15 14:10:07 +08:00
|
|
|
unsigned Align = OpTbl1[i][2];
|
2008-07-08 01:46:23 +08:00
|
|
|
if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
|
2009-07-15 14:10:07 +08:00
|
|
|
std::make_pair(MemOp,Align))).second)
|
2008-01-07 09:35:02 +08:00
|
|
|
assert(false && "Duplicated entries?");
|
2009-07-15 14:10:07 +08:00
|
|
|
// Index 1, folded load
|
|
|
|
unsigned AuxInfo = 1 | (1 << 4);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
|
|
|
|
if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
|
2008-07-08 01:46:23 +08:00
|
|
|
std::make_pair(RegOp, AuxInfo))).second)
|
2008-01-07 09:35:02 +08:00
|
|
|
AmbEntries.push_back(MemOp);
|
|
|
|
}
|
|
|
|
|
2009-07-15 14:10:07 +08:00
|
|
|
static const unsigned OpTbl2[][3] = {
|
|
|
|
{ X86::ADC32rr, X86::ADC32rm, 0 },
|
|
|
|
{ X86::ADC64rr, X86::ADC64rm, 0 },
|
|
|
|
{ X86::ADD16rr, X86::ADD16rm, 0 },
|
|
|
|
{ X86::ADD32rr, X86::ADD32rm, 0 },
|
|
|
|
{ X86::ADD64rr, X86::ADD64rm, 0 },
|
|
|
|
{ X86::ADD8rr, X86::ADD8rm, 0 },
|
|
|
|
{ X86::ADDPDrr, X86::ADDPDrm, 16 },
|
|
|
|
{ X86::ADDPSrr, X86::ADDPSrm, 16 },
|
|
|
|
{ X86::ADDSDrr, X86::ADDSDrm, 0 },
|
|
|
|
{ X86::ADDSSrr, X86::ADDSSrm, 0 },
|
|
|
|
{ X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 },
|
|
|
|
{ X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 },
|
|
|
|
{ X86::AND16rr, X86::AND16rm, 0 },
|
|
|
|
{ X86::AND32rr, X86::AND32rm, 0 },
|
|
|
|
{ X86::AND64rr, X86::AND64rm, 0 },
|
|
|
|
{ X86::AND8rr, X86::AND8rm, 0 },
|
|
|
|
{ X86::ANDNPDrr, X86::ANDNPDrm, 16 },
|
|
|
|
{ X86::ANDNPSrr, X86::ANDNPSrm, 16 },
|
|
|
|
{ X86::ANDPDrr, X86::ANDPDrm, 16 },
|
|
|
|
{ X86::ANDPSrr, X86::ANDPSrm, 16 },
|
|
|
|
{ X86::CMOVA16rr, X86::CMOVA16rm, 0 },
|
|
|
|
{ X86::CMOVA32rr, X86::CMOVA32rm, 0 },
|
|
|
|
{ X86::CMOVA64rr, X86::CMOVA64rm, 0 },
|
|
|
|
{ X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
|
|
|
|
{ X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
|
|
|
|
{ X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
|
|
|
|
{ X86::CMOVB16rr, X86::CMOVB16rm, 0 },
|
|
|
|
{ X86::CMOVB32rr, X86::CMOVB32rm, 0 },
|
|
|
|
{ X86::CMOVB64rr, X86::CMOVB64rm, 0 },
|
|
|
|
{ X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
|
|
|
|
{ X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
|
|
|
|
{ X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
|
|
|
|
{ X86::CMOVE16rr, X86::CMOVE16rm, 0 },
|
|
|
|
{ X86::CMOVE32rr, X86::CMOVE32rm, 0 },
|
|
|
|
{ X86::CMOVE64rr, X86::CMOVE64rm, 0 },
|
|
|
|
{ X86::CMOVG16rr, X86::CMOVG16rm, 0 },
|
|
|
|
{ X86::CMOVG32rr, X86::CMOVG32rm, 0 },
|
|
|
|
{ X86::CMOVG64rr, X86::CMOVG64rm, 0 },
|
|
|
|
{ X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
|
|
|
|
{ X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
|
|
|
|
{ X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
|
|
|
|
{ X86::CMOVL16rr, X86::CMOVL16rm, 0 },
|
|
|
|
{ X86::CMOVL32rr, X86::CMOVL32rm, 0 },
|
|
|
|
{ X86::CMOVL64rr, X86::CMOVL64rm, 0 },
|
|
|
|
{ X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
|
|
|
|
{ X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
|
|
|
|
{ X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
|
|
|
|
{ X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
|
|
|
|
{ X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
|
|
|
|
{ X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
|
|
|
|
{ X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
|
|
|
|
{ X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
|
|
|
|
{ X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
|
|
|
|
{ X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
|
|
|
|
{ X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
|
|
|
|
{ X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
|
|
|
|
{ X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
|
|
|
|
{ X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
|
|
|
|
{ X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
|
|
|
|
{ X86::CMOVO16rr, X86::CMOVO16rm, 0 },
|
|
|
|
{ X86::CMOVO32rr, X86::CMOVO32rm, 0 },
|
|
|
|
{ X86::CMOVO64rr, X86::CMOVO64rm, 0 },
|
|
|
|
{ X86::CMOVP16rr, X86::CMOVP16rm, 0 },
|
|
|
|
{ X86::CMOVP32rr, X86::CMOVP32rm, 0 },
|
|
|
|
{ X86::CMOVP64rr, X86::CMOVP64rm, 0 },
|
|
|
|
{ X86::CMOVS16rr, X86::CMOVS16rm, 0 },
|
|
|
|
{ X86::CMOVS32rr, X86::CMOVS32rm, 0 },
|
|
|
|
{ X86::CMOVS64rr, X86::CMOVS64rm, 0 },
|
|
|
|
{ X86::CMPPDrri, X86::CMPPDrmi, 16 },
|
|
|
|
{ X86::CMPPSrri, X86::CMPPSrmi, 16 },
|
|
|
|
{ X86::CMPSDrr, X86::CMPSDrm, 0 },
|
|
|
|
{ X86::CMPSSrr, X86::CMPSSrm, 0 },
|
|
|
|
{ X86::DIVPDrr, X86::DIVPDrm, 16 },
|
|
|
|
{ X86::DIVPSrr, X86::DIVPSrm, 16 },
|
|
|
|
{ X86::DIVSDrr, X86::DIVSDrm, 0 },
|
|
|
|
{ X86::DIVSSrr, X86::DIVSSrm, 0 },
|
|
|
|
{ X86::FsANDNPDrr, X86::FsANDNPDrm, 16 },
|
|
|
|
{ X86::FsANDNPSrr, X86::FsANDNPSrm, 16 },
|
|
|
|
{ X86::FsANDPDrr, X86::FsANDPDrm, 16 },
|
|
|
|
{ X86::FsANDPSrr, X86::FsANDPSrm, 16 },
|
|
|
|
{ X86::FsORPDrr, X86::FsORPDrm, 16 },
|
|
|
|
{ X86::FsORPSrr, X86::FsORPSrm, 16 },
|
|
|
|
{ X86::FsXORPDrr, X86::FsXORPDrm, 16 },
|
|
|
|
{ X86::FsXORPSrr, X86::FsXORPSrm, 16 },
|
|
|
|
{ X86::HADDPDrr, X86::HADDPDrm, 16 },
|
|
|
|
{ X86::HADDPSrr, X86::HADDPSrm, 16 },
|
|
|
|
{ X86::HSUBPDrr, X86::HSUBPDrm, 16 },
|
|
|
|
{ X86::HSUBPSrr, X86::HSUBPSrm, 16 },
|
|
|
|
{ X86::IMUL16rr, X86::IMUL16rm, 0 },
|
|
|
|
{ X86::IMUL32rr, X86::IMUL32rm, 0 },
|
|
|
|
{ X86::IMUL64rr, X86::IMUL64rm, 0 },
|
|
|
|
{ X86::MAXPDrr, X86::MAXPDrm, 16 },
|
|
|
|
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 },
|
|
|
|
{ X86::MAXPSrr, X86::MAXPSrm, 16 },
|
|
|
|
{ X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 },
|
|
|
|
{ X86::MAXSDrr, X86::MAXSDrm, 0 },
|
|
|
|
{ X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 },
|
|
|
|
{ X86::MAXSSrr, X86::MAXSSrm, 0 },
|
|
|
|
{ X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 },
|
|
|
|
{ X86::MINPDrr, X86::MINPDrm, 16 },
|
|
|
|
{ X86::MINPDrr_Int, X86::MINPDrm_Int, 16 },
|
|
|
|
{ X86::MINPSrr, X86::MINPSrm, 16 },
|
|
|
|
{ X86::MINPSrr_Int, X86::MINPSrm_Int, 16 },
|
|
|
|
{ X86::MINSDrr, X86::MINSDrm, 0 },
|
|
|
|
{ X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
|
|
|
|
{ X86::MINSSrr, X86::MINSSrm, 0 },
|
|
|
|
{ X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
|
|
|
|
{ X86::MULPDrr, X86::MULPDrm, 16 },
|
|
|
|
{ X86::MULPSrr, X86::MULPSrm, 16 },
|
|
|
|
{ X86::MULSDrr, X86::MULSDrm, 0 },
|
|
|
|
{ X86::MULSSrr, X86::MULSSrm, 0 },
|
|
|
|
{ X86::OR16rr, X86::OR16rm, 0 },
|
|
|
|
{ X86::OR32rr, X86::OR32rm, 0 },
|
|
|
|
{ X86::OR64rr, X86::OR64rm, 0 },
|
|
|
|
{ X86::OR8rr, X86::OR8rm, 0 },
|
|
|
|
{ X86::ORPDrr, X86::ORPDrm, 16 },
|
|
|
|
{ X86::ORPSrr, X86::ORPSrm, 16 },
|
|
|
|
{ X86::PACKSSDWrr, X86::PACKSSDWrm, 16 },
|
|
|
|
{ X86::PACKSSWBrr, X86::PACKSSWBrm, 16 },
|
|
|
|
{ X86::PACKUSWBrr, X86::PACKUSWBrm, 16 },
|
|
|
|
{ X86::PADDBrr, X86::PADDBrm, 16 },
|
|
|
|
{ X86::PADDDrr, X86::PADDDrm, 16 },
|
|
|
|
{ X86::PADDQrr, X86::PADDQrm, 16 },
|
|
|
|
{ X86::PADDSBrr, X86::PADDSBrm, 16 },
|
|
|
|
{ X86::PADDSWrr, X86::PADDSWrm, 16 },
|
|
|
|
{ X86::PADDWrr, X86::PADDWrm, 16 },
|
|
|
|
{ X86::PANDNrr, X86::PANDNrm, 16 },
|
|
|
|
{ X86::PANDrr, X86::PANDrm, 16 },
|
|
|
|
{ X86::PAVGBrr, X86::PAVGBrm, 16 },
|
|
|
|
{ X86::PAVGWrr, X86::PAVGWrm, 16 },
|
|
|
|
{ X86::PCMPEQBrr, X86::PCMPEQBrm, 16 },
|
|
|
|
{ X86::PCMPEQDrr, X86::PCMPEQDrm, 16 },
|
|
|
|
{ X86::PCMPEQWrr, X86::PCMPEQWrm, 16 },
|
|
|
|
{ X86::PCMPGTBrr, X86::PCMPGTBrm, 16 },
|
|
|
|
{ X86::PCMPGTDrr, X86::PCMPGTDrm, 16 },
|
|
|
|
{ X86::PCMPGTWrr, X86::PCMPGTWrm, 16 },
|
|
|
|
{ X86::PINSRWrri, X86::PINSRWrmi, 16 },
|
|
|
|
{ X86::PMADDWDrr, X86::PMADDWDrm, 16 },
|
|
|
|
{ X86::PMAXSWrr, X86::PMAXSWrm, 16 },
|
|
|
|
{ X86::PMAXUBrr, X86::PMAXUBrm, 16 },
|
|
|
|
{ X86::PMINSWrr, X86::PMINSWrm, 16 },
|
|
|
|
{ X86::PMINUBrr, X86::PMINUBrm, 16 },
|
|
|
|
{ X86::PMULDQrr, X86::PMULDQrm, 16 },
|
|
|
|
{ X86::PMULHUWrr, X86::PMULHUWrm, 16 },
|
|
|
|
{ X86::PMULHWrr, X86::PMULHWrm, 16 },
|
|
|
|
{ X86::PMULLDrr, X86::PMULLDrm, 16 },
|
|
|
|
{ X86::PMULLWrr, X86::PMULLWrm, 16 },
|
|
|
|
{ X86::PMULUDQrr, X86::PMULUDQrm, 16 },
|
|
|
|
{ X86::PORrr, X86::PORrm, 16 },
|
|
|
|
{ X86::PSADBWrr, X86::PSADBWrm, 16 },
|
|
|
|
{ X86::PSLLDrr, X86::PSLLDrm, 16 },
|
|
|
|
{ X86::PSLLQrr, X86::PSLLQrm, 16 },
|
|
|
|
{ X86::PSLLWrr, X86::PSLLWrm, 16 },
|
|
|
|
{ X86::PSRADrr, X86::PSRADrm, 16 },
|
|
|
|
{ X86::PSRAWrr, X86::PSRAWrm, 16 },
|
|
|
|
{ X86::PSRLDrr, X86::PSRLDrm, 16 },
|
|
|
|
{ X86::PSRLQrr, X86::PSRLQrm, 16 },
|
|
|
|
{ X86::PSRLWrr, X86::PSRLWrm, 16 },
|
|
|
|
{ X86::PSUBBrr, X86::PSUBBrm, 16 },
|
|
|
|
{ X86::PSUBDrr, X86::PSUBDrm, 16 },
|
|
|
|
{ X86::PSUBSBrr, X86::PSUBSBrm, 16 },
|
|
|
|
{ X86::PSUBSWrr, X86::PSUBSWrm, 16 },
|
|
|
|
{ X86::PSUBWrr, X86::PSUBWrm, 16 },
|
|
|
|
{ X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 },
|
|
|
|
{ X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 },
|
|
|
|
{ X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 },
|
|
|
|
{ X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 },
|
|
|
|
{ X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 },
|
|
|
|
{ X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 },
|
|
|
|
{ X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 },
|
|
|
|
{ X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 },
|
|
|
|
{ X86::PXORrr, X86::PXORrm, 16 },
|
|
|
|
{ X86::SBB32rr, X86::SBB32rm, 0 },
|
|
|
|
{ X86::SBB64rr, X86::SBB64rm, 0 },
|
|
|
|
{ X86::SHUFPDrri, X86::SHUFPDrmi, 16 },
|
|
|
|
{ X86::SHUFPSrri, X86::SHUFPSrmi, 16 },
|
|
|
|
{ X86::SUB16rr, X86::SUB16rm, 0 },
|
|
|
|
{ X86::SUB32rr, X86::SUB32rm, 0 },
|
|
|
|
{ X86::SUB64rr, X86::SUB64rm, 0 },
|
|
|
|
{ X86::SUB8rr, X86::SUB8rm, 0 },
|
|
|
|
{ X86::SUBPDrr, X86::SUBPDrm, 16 },
|
|
|
|
{ X86::SUBPSrr, X86::SUBPSrm, 16 },
|
|
|
|
{ X86::SUBSDrr, X86::SUBSDrm, 0 },
|
|
|
|
{ X86::SUBSSrr, X86::SUBSSrm, 0 },
|
2008-01-07 09:35:02 +08:00
|
|
|
// FIXME: TEST*rr -> swapped operand of TEST*mr.
|
2009-07-15 14:10:07 +08:00
|
|
|
{ X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 },
|
|
|
|
{ X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 },
|
|
|
|
{ X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 },
|
|
|
|
{ X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 },
|
|
|
|
{ X86::XOR16rr, X86::XOR16rm, 0 },
|
|
|
|
{ X86::XOR32rr, X86::XOR32rm, 0 },
|
|
|
|
{ X86::XOR64rr, X86::XOR64rm, 0 },
|
|
|
|
{ X86::XOR8rr, X86::XOR8rm, 0 },
|
|
|
|
{ X86::XORPDrr, X86::XORPDrm, 16 },
|
|
|
|
{ X86::XORPSrr, X86::XORPSrm, 16 }
|
2008-01-07 09:35:02 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
|
|
|
|
unsigned RegOp = OpTbl2[i][0];
|
|
|
|
unsigned MemOp = OpTbl2[i][1];
|
2009-07-15 14:10:07 +08:00
|
|
|
unsigned Align = OpTbl2[i][2];
|
2008-07-08 01:46:23 +08:00
|
|
|
if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
|
2009-07-15 14:10:07 +08:00
|
|
|
std::make_pair(MemOp,Align))).second)
|
2008-01-07 09:35:02 +08:00
|
|
|
assert(false && "Duplicated entries?");
|
2009-07-15 14:10:07 +08:00
|
|
|
// Index 2, folded load
|
|
|
|
unsigned AuxInfo = 2 | (1 << 4);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
|
2008-07-08 01:46:23 +08:00
|
|
|
std::make_pair(RegOp, AuxInfo))).second)
|
2008-01-07 09:35:02 +08:00
|
|
|
AmbEntries.push_back(MemOp);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove ambiguous entries.
|
|
|
|
assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
|
2002-10-26 06:55:53 +08:00
|
|
|
}
|
|
|
|
|
2010-01-12 08:09:37 +08:00
|
|
|
bool
|
2010-01-13 08:30:23 +08:00
|
|
|
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
|
|
|
unsigned &SrcReg, unsigned &DstReg,
|
|
|
|
unsigned &SubIdx) const {
|
2010-01-12 08:09:37 +08:00
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case X86::MOVSX16rr8:
|
|
|
|
case X86::MOVZX16rr8:
|
|
|
|
case X86::MOVSX32rr8:
|
|
|
|
case X86::MOVZX32rr8:
|
|
|
|
case X86::MOVSX64rr8:
|
|
|
|
case X86::MOVZX64rr8:
|
2010-01-13 16:01:32 +08:00
|
|
|
if (!TM.getSubtarget<X86Subtarget>().is64Bit())
|
|
|
|
// It's not always legal to reference the low 8-bit of the larger
|
|
|
|
// register in 32-bit mode.
|
|
|
|
return false;
|
2010-01-12 08:09:37 +08:00
|
|
|
case X86::MOVSX32rr16:
|
|
|
|
case X86::MOVZX32rr16:
|
|
|
|
case X86::MOVSX64rr16:
|
|
|
|
case X86::MOVZX64rr16:
|
|
|
|
case X86::MOVSX64rr32:
|
|
|
|
case X86::MOVZX64rr32: {
|
|
|
|
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
|
|
|
|
// Be conservative.
|
|
|
|
return false;
|
|
|
|
SrcReg = MI.getOperand(1).getReg();
|
|
|
|
DstReg = MI.getOperand(0).getReg();
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable(0);
|
|
|
|
break;
|
|
|
|
case X86::MOVSX16rr8:
|
|
|
|
case X86::MOVZX16rr8:
|
|
|
|
case X86::MOVSX32rr8:
|
|
|
|
case X86::MOVZX32rr8:
|
|
|
|
case X86::MOVSX64rr8:
|
|
|
|
case X86::MOVZX64rr8:
|
2010-05-26 01:04:16 +08:00
|
|
|
SubIdx = X86::sub_8bit;
|
2010-01-12 08:09:37 +08:00
|
|
|
break;
|
|
|
|
case X86::MOVSX32rr16:
|
|
|
|
case X86::MOVZX32rr16:
|
|
|
|
case X86::MOVSX64rr16:
|
|
|
|
case X86::MOVZX64rr16:
|
2010-05-26 01:04:16 +08:00
|
|
|
SubIdx = X86::sub_16bit;
|
2010-01-12 08:09:37 +08:00
|
|
|
break;
|
|
|
|
case X86::MOVSX64rr32:
|
|
|
|
case X86::MOVZX64rr32:
|
2010-05-26 01:04:16 +08:00
|
|
|
SubIdx = X86::sub_32bit;
|
2010-01-12 08:09:37 +08:00
|
|
|
break;
|
|
|
|
}
|
2010-01-13 08:30:23 +08:00
|
|
|
return true;
|
2010-01-12 08:09:37 +08:00
|
|
|
}
|
|
|
|
}
|
2010-01-13 08:30:23 +08:00
|
|
|
return false;
|
2010-01-12 08:09:37 +08:00
|
|
|
}
|
|
|
|
|
2009-11-13 04:55:29 +08:00
|
|
|
/// isFrameOperand - Return true and the FrameIndex if the specified
|
|
|
|
/// operand and follow operands form a reference to the stack frame.
|
|
|
|
bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
|
|
|
|
int &FrameIndex) const {
|
|
|
|
if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
|
|
|
|
MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
|
|
|
|
MI->getOperand(Op+1).getImm() == 1 &&
|
|
|
|
MI->getOperand(Op+2).getReg() == 0 &&
|
|
|
|
MI->getOperand(Op+3).getImm() == 0) {
|
|
|
|
FrameIndex = MI->getOperand(Op).getIndex();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-11-13 08:29:53 +08:00
|
|
|
static bool isFrameLoadOpcode(int Opcode) {
|
|
|
|
switch (Opcode) {
|
2006-02-03 04:12:32 +08:00
|
|
|
default: break;
|
|
|
|
case X86::MOV8rm:
|
|
|
|
case X86::MOV16rm:
|
|
|
|
case X86::MOV32rm:
|
2010-07-10 05:27:55 +08:00
|
|
|
case X86::MOV32rm_TC:
|
2006-09-08 14:48:29 +08:00
|
|
|
case X86::MOV64rm:
|
2010-07-10 05:27:55 +08:00
|
|
|
case X86::MOV64rm_TC:
|
2007-07-05 05:07:47 +08:00
|
|
|
case X86::LD_Fp64m:
|
2006-02-03 04:12:32 +08:00
|
|
|
case X86::MOVSSrm:
|
|
|
|
case X86::MOVSDrm:
|
2006-04-19 00:44:51 +08:00
|
|
|
case X86::MOVAPSrm:
|
|
|
|
case X86::MOVAPDrm:
|
2009-01-09 10:40:34 +08:00
|
|
|
case X86::MOVDQArm:
|
2007-04-03 14:00:37 +08:00
|
|
|
case X86::MMX_MOVD64rm:
|
|
|
|
case X86::MMX_MOVQ64rm:
|
2009-11-13 08:29:53 +08:00
|
|
|
return true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isFrameStoreOpcode(int Opcode) {
|
|
|
|
switch (Opcode) {
|
|
|
|
default: break;
|
|
|
|
case X86::MOV8mr:
|
|
|
|
case X86::MOV16mr:
|
|
|
|
case X86::MOV32mr:
|
2010-07-10 05:27:55 +08:00
|
|
|
case X86::MOV32mr_TC:
|
2009-11-13 08:29:53 +08:00
|
|
|
case X86::MOV64mr:
|
2010-07-10 05:27:55 +08:00
|
|
|
case X86::MOV64mr_TC:
|
2009-11-13 08:29:53 +08:00
|
|
|
case X86::ST_FpP64m:
|
|
|
|
case X86::MOVSSmr:
|
|
|
|
case X86::MOVSDmr:
|
|
|
|
case X86::MOVAPSmr:
|
|
|
|
case X86::MOVAPDmr:
|
|
|
|
case X86::MOVDQAmr:
|
|
|
|
case X86::MMX_MOVD64mr:
|
|
|
|
case X86::MMX_MOVQ64mr:
|
|
|
|
case X86::MMX_MOVNTQmr:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
|
|
|
|
int &FrameIndex) const {
|
|
|
|
if (isFrameLoadOpcode(MI->getOpcode()))
|
2010-07-27 12:17:01 +08:00
|
|
|
if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
|
2006-02-03 04:12:32 +08:00
|
|
|
return MI->getOperand(0).getReg();
|
2009-11-13 08:29:53 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
|
|
|
int &FrameIndex) const {
|
|
|
|
if (isFrameLoadOpcode(MI->getOpcode())) {
|
|
|
|
unsigned Reg;
|
|
|
|
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
|
|
|
|
return Reg;
|
2009-11-13 04:55:29 +08:00
|
|
|
// Check for post-frame index elimination operations
|
2009-12-05 06:38:46 +08:00
|
|
|
const MachineMemOperand *Dummy;
|
|
|
|
return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
|
2006-02-03 04:12:32 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-11-13 04:55:29 +08:00
|
|
|
bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
|
2009-12-05 06:38:46 +08:00
|
|
|
const MachineMemOperand *&MMO,
|
2009-11-13 04:55:29 +08:00
|
|
|
int &FrameIndex) const {
|
|
|
|
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
|
|
|
|
oe = MI->memoperands_end();
|
|
|
|
o != oe;
|
|
|
|
++o) {
|
|
|
|
if ((*o)->isLoad() && (*o)->getValue())
|
|
|
|
if (const FixedStackPseudoSourceValue *Value =
|
|
|
|
dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
|
|
|
|
FrameIndex = Value->getFrameIndex();
|
2009-12-05 06:38:46 +08:00
|
|
|
MMO = *o;
|
2009-11-13 04:55:29 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-11-19 03:49:32 +08:00
|
|
|
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
|
2006-02-03 04:12:32 +08:00
|
|
|
int &FrameIndex) const {
|
2009-11-13 08:29:53 +08:00
|
|
|
if (isFrameStoreOpcode(MI->getOpcode()))
|
2010-07-27 12:17:01 +08:00
|
|
|
if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
|
|
|
|
isFrameOperand(MI, 0, FrameIndex))
|
2010-07-09 06:41:28 +08:00
|
|
|
return MI->getOperand(X86::AddrNumOperands).getReg();
|
2009-11-13 08:29:53 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
|
|
|
|
int &FrameIndex) const {
|
|
|
|
if (isFrameStoreOpcode(MI->getOpcode())) {
|
|
|
|
unsigned Reg;
|
|
|
|
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
|
|
|
|
return Reg;
|
2009-11-13 04:55:29 +08:00
|
|
|
// Check for post-frame index elimination operations
|
2009-12-05 06:38:46 +08:00
|
|
|
const MachineMemOperand *Dummy;
|
|
|
|
return hasStoreToStackSlot(MI, Dummy, FrameIndex);
|
2006-02-03 04:12:32 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-11-13 04:55:29 +08:00
|
|
|
bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
|
2009-12-05 06:38:46 +08:00
|
|
|
const MachineMemOperand *&MMO,
|
2009-11-13 04:55:29 +08:00
|
|
|
int &FrameIndex) const {
|
|
|
|
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
|
|
|
|
oe = MI->memoperands_end();
|
|
|
|
o != oe;
|
|
|
|
++o) {
|
|
|
|
if ((*o)->isStore() && (*o)->getValue())
|
|
|
|
if (const FixedStackPseudoSourceValue *Value =
|
|
|
|
dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
|
|
|
|
FrameIndex = Value->getFrameIndex();
|
2009-12-05 06:38:46 +08:00
|
|
|
MMO = *o;
|
2009-11-13 04:55:29 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-03-27 09:45:11 +08:00
|
|
|
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
|
|
|
|
/// X86::MOVPC32r.
|
2008-07-08 07:14:23 +08:00
|
|
|
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
|
2008-03-27 09:45:11 +08:00
|
|
|
bool isPICBase = false;
|
|
|
|
for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
|
|
|
|
E = MRI.def_end(); I != E; ++I) {
|
|
|
|
MachineInstr *DefMI = I.getOperand().getParent();
|
|
|
|
if (DefMI->getOpcode() != X86::MOVPC32r)
|
|
|
|
return false;
|
|
|
|
assert(!isPICBase && "More than one PIC base?");
|
|
|
|
isPICBase = true;
|
|
|
|
}
|
|
|
|
return isPICBase;
|
|
|
|
}
|
2008-03-31 15:54:19 +08:00
|
|
|
|
2008-05-13 04:54:26 +08:00
|
|
|
bool
|
2009-10-10 08:34:18 +08:00
|
|
|
X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
|
|
|
|
AliasAnalysis *AA) const {
|
2007-06-15 04:50:44 +08:00
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
default: break;
|
2008-03-27 09:41:09 +08:00
|
|
|
case X86::MOV8rm:
|
|
|
|
case X86::MOV16rm:
|
|
|
|
case X86::MOV32rm:
|
|
|
|
case X86::MOV64rm:
|
|
|
|
case X86::LD_Fp64m:
|
|
|
|
case X86::MOVSSrm:
|
|
|
|
case X86::MOVSDrm:
|
|
|
|
case X86::MOVAPSrm:
|
2009-11-17 05:56:03 +08:00
|
|
|
case X86::MOVUPSrm:
|
2009-11-17 17:51:18 +08:00
|
|
|
case X86::MOVUPSrm_Int:
|
2008-03-27 09:41:09 +08:00
|
|
|
case X86::MOVAPDrm:
|
2009-01-09 10:40:34 +08:00
|
|
|
case X86::MOVDQArm:
|
2008-03-27 09:41:09 +08:00
|
|
|
case X86::MMX_MOVD64rm:
|
2009-11-17 17:51:18 +08:00
|
|
|
case X86::MMX_MOVQ64rm:
|
|
|
|
case X86::FsMOVAPSrm:
|
|
|
|
case X86::FsMOVAPDrm: {
|
2008-03-27 09:41:09 +08:00
|
|
|
// Loads from constant pools are trivially rematerializable.
|
2008-10-03 23:45:36 +08:00
|
|
|
if (MI->getOperand(1).isReg() &&
|
|
|
|
MI->getOperand(2).isImm() &&
|
|
|
|
MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
|
2009-10-10 08:34:18 +08:00
|
|
|
MI->isInvariantLoad(AA)) {
|
2008-03-27 09:41:09 +08:00
|
|
|
unsigned BaseReg = MI->getOperand(1).getReg();
|
Reimplement rip-relative addressing in the X86-64 backend. The new
implementation primarily differs from the former in that the asmprinter
doesn't make a zillion decisions about whether or not something will be
RIP relative or not. Instead, those decisions are made by isel lowering
and propagated through to the asm printer. To achieve this, we:
1. Represent RIP relative addresses by setting the base of the X86 addr
mode to X86::RIP.
2. When ISel Lowering decides that it is safe to use RIP, it lowers to
X86ISD::WrapperRIP. When it is unsafe to use RIP, it lowers to
X86ISD::Wrapper as before.
3. This removes isRIPRel from X86ISelAddressMode, representing it with
a basereg of RIP instead.
4. The addressing mode matching logic in isel is greatly simplified.
5. The asmprinter is greatly simplified, notably the "NotRIPRel" predicate
passed through various printoperand routines is gone now.
6. The various symbol printing routines in asmprinter now no longer infer
when to emit (%rip), they just print the symbol.
I think this is a big improvement over the previous situation. It does have
two small caveats though: 1. I implemented a horrible "no-rip" modifier for
the inline asm "P" constraint modifier. This is a short term hack, there is
a much better, but more involved, solution. 2. I had to xfail an
-aggressive-remat testcase because it isn't handling the use of RIP in the
constant-pool reading instruction. This specific test is easy to fix without
-aggressive-remat, which I intend to do next.
llvm-svn: 74372
2009-06-27 12:16:01 +08:00
|
|
|
if (BaseReg == 0 || BaseReg == X86::RIP)
|
2008-03-27 09:41:09 +08:00
|
|
|
return true;
|
|
|
|
// Allow re-materialization of PIC load.
|
2008-10-03 23:45:36 +08:00
|
|
|
if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
|
2008-04-02 07:26:12 +08:00
|
|
|
return false;
|
2008-07-08 07:14:23 +08:00
|
|
|
const MachineFunction &MF = *MI->getParent()->getParent();
|
|
|
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
2008-03-27 09:41:09 +08:00
|
|
|
bool isPICBase = false;
|
|
|
|
for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
|
|
|
|
E = MRI.def_end(); I != E; ++I) {
|
|
|
|
MachineInstr *DefMI = I.getOperand().getParent();
|
|
|
|
if (DefMI->getOpcode() != X86::MOVPC32r)
|
|
|
|
return false;
|
|
|
|
assert(!isPICBase && "More than one PIC base?");
|
|
|
|
isPICBase = true;
|
|
|
|
}
|
|
|
|
return isPICBase;
|
|
|
|
}
|
|
|
|
return false;
|
2008-02-22 17:25:47 +08:00
|
|
|
}
|
2008-03-27 09:41:09 +08:00
|
|
|
|
|
|
|
case X86::LEA32r:
|
|
|
|
case X86::LEA64r: {
|
2008-10-03 23:45:36 +08:00
|
|
|
if (MI->getOperand(2).isImm() &&
|
|
|
|
MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
|
|
|
|
!MI->getOperand(4).isReg()) {
|
2008-03-27 09:41:09 +08:00
|
|
|
// lea fi#, lea GV, etc. are all rematerializable.
|
2008-10-03 23:45:36 +08:00
|
|
|
if (!MI->getOperand(1).isReg())
|
2008-09-27 05:30:20 +08:00
|
|
|
return true;
|
2008-03-27 09:41:09 +08:00
|
|
|
unsigned BaseReg = MI->getOperand(1).getReg();
|
|
|
|
if (BaseReg == 0)
|
|
|
|
return true;
|
|
|
|
// Allow re-materialization of lea PICBase + x.
|
2008-07-08 07:14:23 +08:00
|
|
|
const MachineFunction &MF = *MI->getParent()->getParent();
|
|
|
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
2008-03-27 09:45:11 +08:00
|
|
|
return regIsPICBase(BaseReg, MRI);
|
2008-03-27 09:41:09 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2007-06-15 04:50:44 +08:00
|
|
|
}
|
2008-03-27 09:41:09 +08:00
|
|
|
|
2007-06-26 08:48:07 +08:00
|
|
|
// All other instructions marked M_REMATERIALIZABLE are always trivially
|
|
|
|
// rematerializable.
|
|
|
|
return true;
|
2007-06-15 04:50:44 +08:00
|
|
|
}
|
|
|
|
|
2008-06-24 15:10:51 +08:00
|
|
|
/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
|
|
|
|
/// would clobber the EFLAGS condition register. Note the result may be
|
|
|
|
/// conservative. If it cannot definitely determine the safety after visiting
|
2009-10-14 08:08:59 +08:00
|
|
|
/// a few instructions in each direction it assumes it's not safe.
|
2008-06-24 15:10:51 +08:00
|
|
|
static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator I) {
|
2010-03-24 04:35:45 +08:00
|
|
|
MachineBasicBlock::iterator E = MBB.end();
|
|
|
|
|
2008-10-21 11:24:31 +08:00
|
|
|
// It's always safe to clobber EFLAGS at the end of a block.
|
2010-03-24 04:35:45 +08:00
|
|
|
if (I == E)
|
2008-10-21 11:24:31 +08:00
|
|
|
return true;
|
|
|
|
|
2008-06-24 15:10:51 +08:00
|
|
|
// For compile time consideration, if we are not able to determine the
|
2009-10-14 08:08:59 +08:00
|
|
|
// safety after visiting 4 instructions in each direction, we will assume
|
|
|
|
// it's not safe.
|
|
|
|
MachineBasicBlock::iterator Iter = I;
|
|
|
|
for (unsigned i = 0; i < 4; ++i) {
|
2008-06-24 15:10:51 +08:00
|
|
|
bool SeenDef = false;
|
2009-10-14 08:08:59 +08:00
|
|
|
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
|
|
|
|
MachineOperand &MO = Iter->getOperand(j);
|
2008-10-03 23:45:36 +08:00
|
|
|
if (!MO.isReg())
|
2008-06-24 15:10:51 +08:00
|
|
|
continue;
|
|
|
|
if (MO.getReg() == X86::EFLAGS) {
|
|
|
|
if (MO.isUse())
|
|
|
|
return false;
|
|
|
|
SeenDef = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (SeenDef)
|
|
|
|
// This instruction defines EFLAGS, no need to look any further.
|
|
|
|
return true;
|
2009-10-14 08:08:59 +08:00
|
|
|
++Iter;
|
2010-03-24 04:35:45 +08:00
|
|
|
// Skip over DBG_VALUE.
|
|
|
|
while (Iter != E && Iter->isDebugValue())
|
|
|
|
++Iter;
|
2008-10-21 11:24:31 +08:00
|
|
|
|
|
|
|
// If we make it to the end of the block, it's safe to clobber EFLAGS.
|
2010-03-24 04:35:45 +08:00
|
|
|
if (Iter == E)
|
2009-10-14 08:08:59 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2010-03-24 04:35:45 +08:00
|
|
|
MachineBasicBlock::iterator B = MBB.begin();
|
2009-10-14 08:08:59 +08:00
|
|
|
Iter = I;
|
|
|
|
for (unsigned i = 0; i < 4; ++i) {
|
|
|
|
// If we make it to the beginning of the block, it's safe to clobber
|
|
|
|
// EFLAGS iff EFLAGS is not live-in.
|
2010-03-24 04:35:45 +08:00
|
|
|
if (Iter == B)
|
2009-10-14 08:08:59 +08:00
|
|
|
return !MBB.isLiveIn(X86::EFLAGS);
|
|
|
|
|
|
|
|
--Iter;
|
2010-03-24 04:35:45 +08:00
|
|
|
// Skip over DBG_VALUE.
|
|
|
|
while (Iter != B && Iter->isDebugValue())
|
|
|
|
--Iter;
|
|
|
|
|
2009-10-14 08:08:59 +08:00
|
|
|
bool SawKill = false;
|
|
|
|
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
|
|
|
|
MachineOperand &MO = Iter->getOperand(j);
|
|
|
|
if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
|
|
|
|
if (MO.isDef()) return MO.isDead();
|
|
|
|
if (MO.isKill()) SawKill = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (SawKill)
|
|
|
|
// This instruction kills EFLAGS and doesn't redefine it, so
|
|
|
|
// there's no need to look further.
|
2008-10-21 11:24:31 +08:00
|
|
|
return true;
|
2008-06-24 15:10:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Conservative answer.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-04-01 04:40:39 +08:00
|
|
|
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator I,
|
2009-07-16 17:20:10 +08:00
|
|
|
unsigned DestReg, unsigned SubIdx,
|
2009-11-14 10:55:43 +08:00
|
|
|
const MachineInstr *Orig,
|
2010-06-03 06:47:25 +08:00
|
|
|
const TargetRegisterInfo &TRI) const {
|
2010-05-07 09:28:10 +08:00
|
|
|
DebugLoc DL = Orig->getDebugLoc();
|
2009-02-12 05:51:19 +08:00
|
|
|
|
2008-04-01 04:40:39 +08:00
|
|
|
// MOV32r0 etc. are implemented with xor which clobbers condition code.
|
|
|
|
// Re-materialize them as movri instructions to avoid side effects.
|
2009-07-16 17:20:10 +08:00
|
|
|
bool Clone = true;
|
|
|
|
unsigned Opc = Orig->getOpcode();
|
|
|
|
switch (Opc) {
|
2008-06-24 15:10:51 +08:00
|
|
|
default: break;
|
2008-04-01 04:40:39 +08:00
|
|
|
case X86::MOV8r0:
|
2010-01-12 12:42:54 +08:00
|
|
|
case X86::MOV16r0:
|
|
|
|
case X86::MOV32r0:
|
|
|
|
case X86::MOV64r0: {
|
2008-06-24 15:10:51 +08:00
|
|
|
if (!isSafeToClobberEFLAGS(MBB, I)) {
|
2009-07-16 17:20:10 +08:00
|
|
|
switch (Opc) {
|
2008-06-24 15:10:51 +08:00
|
|
|
default: break;
|
|
|
|
case X86::MOV8r0: Opc = X86::MOV8ri; break;
|
2010-01-12 12:42:54 +08:00
|
|
|
case X86::MOV16r0: Opc = X86::MOV16ri; break;
|
2008-06-24 15:10:51 +08:00
|
|
|
case X86::MOV32r0: Opc = X86::MOV32ri; break;
|
2010-02-27 00:49:27 +08:00
|
|
|
case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
|
2008-06-24 15:10:51 +08:00
|
|
|
}
|
2009-07-16 17:20:10 +08:00
|
|
|
Clone = false;
|
2008-06-24 15:10:51 +08:00
|
|
|
}
|
2008-04-01 04:40:39 +08:00
|
|
|
break;
|
2008-06-24 15:10:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-07-16 17:20:10 +08:00
|
|
|
if (Clone) {
|
2008-07-08 07:14:23 +08:00
|
|
|
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
|
2008-04-01 04:40:39 +08:00
|
|
|
MBB.insert(I, MI);
|
2009-07-16 17:20:10 +08:00
|
|
|
} else {
|
2010-06-03 06:47:25 +08:00
|
|
|
BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
|
2008-04-01 04:40:39 +08:00
|
|
|
}
|
2008-04-17 07:44:44 +08:00
|
|
|
|
2009-07-16 17:20:10 +08:00
|
|
|
MachineInstr *NewMI = prior(I);
|
2010-06-03 06:47:25 +08:00
|
|
|
NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
|
2008-04-01 04:40:39 +08:00
|
|
|
}
|
|
|
|
|
2007-10-05 16:04:01 +08:00
|
|
|
/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
|
|
|
|
/// is not marked dead.
|
|
|
|
static bool hasLiveCondCodeDef(MachineInstr *MI) {
|
|
|
|
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
|
|
|
MachineOperand &MO = MI->getOperand(i);
|
2008-10-03 23:45:36 +08:00
|
|
|
if (MO.isReg() && MO.isDef() &&
|
2007-10-05 16:04:01 +08:00
|
|
|
MO.getReg() == X86::EFLAGS && !MO.isDead()) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-12-13 04:03:14 +08:00
|
|
|
/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
|
2009-12-11 14:01:48 +08:00
|
|
|
/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
|
|
|
|
/// to a 32-bit superregister and then truncating back down to a 16-bit
|
|
|
|
/// subregister.
|
|
|
|
MachineInstr *
|
|
|
|
X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
|
|
|
|
MachineFunction::iterator &MFI,
|
|
|
|
MachineBasicBlock::iterator &MBBI,
|
|
|
|
LiveVariables *LV) const {
|
|
|
|
MachineInstr *MI = MBBI;
|
|
|
|
unsigned Dest = MI->getOperand(0).getReg();
|
|
|
|
unsigned Src = MI->getOperand(1).getReg();
|
|
|
|
bool isDead = MI->getOperand(0).isDead();
|
|
|
|
bool isKill = MI->getOperand(1).isKill();
|
|
|
|
|
|
|
|
unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
|
|
|
|
? X86::LEA64_32r : X86::LEA32r;
|
|
|
|
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
|
|
|
|
unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
|
|
|
|
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
|
|
|
|
|
|
|
|
// Build and insert into an implicit UNDEF value. This is OK because
|
|
|
|
// well be shifting and then extracting the lower 16-bits.
|
2009-12-13 04:03:14 +08:00
|
|
|
// This has the potential to cause partial register stall. e.g.
|
2009-12-13 02:55:26 +08:00
|
|
|
// movw (%rbp,%rcx,2), %dx
|
|
|
|
// leal -65(%rdx), %esi
|
2009-12-13 04:03:14 +08:00
|
|
|
// But testing has shown this *does* help performance in 64-bit mode (at
|
|
|
|
// least on modern x86 machines).
|
2009-12-11 14:01:48 +08:00
|
|
|
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
|
|
|
|
MachineInstr *InsMI =
|
2010-07-09 00:40:15 +08:00
|
|
|
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
|
|
|
|
.addReg(leaInReg, RegState::Define, X86::sub_16bit)
|
|
|
|
.addReg(Src, getKillRegState(isKill));
|
2009-12-11 14:01:48 +08:00
|
|
|
|
|
|
|
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
|
|
|
|
get(Opc), leaOutReg);
|
|
|
|
switch (MIOpc) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable(0);
|
|
|
|
break;
|
|
|
|
case X86::SHL16ri: {
|
|
|
|
unsigned ShAmt = MI->getOperand(2).getImm();
|
|
|
|
MIB.addReg(0).addImm(1 << ShAmt)
|
2010-07-09 07:46:44 +08:00
|
|
|
.addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
|
2009-12-11 14:01:48 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case X86::INC16r:
|
|
|
|
case X86::INC64_16r:
|
2010-07-09 07:46:44 +08:00
|
|
|
addRegOffset(MIB, leaInReg, true, 1);
|
2009-12-11 14:01:48 +08:00
|
|
|
break;
|
|
|
|
case X86::DEC16r:
|
|
|
|
case X86::DEC64_16r:
|
2010-07-09 07:46:44 +08:00
|
|
|
addRegOffset(MIB, leaInReg, true, -1);
|
2009-12-11 14:01:48 +08:00
|
|
|
break;
|
|
|
|
case X86::ADD16ri:
|
|
|
|
case X86::ADD16ri8:
|
2010-07-09 07:46:44 +08:00
|
|
|
addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
|
2009-12-11 14:01:48 +08:00
|
|
|
break;
|
|
|
|
case X86::ADD16rr: {
|
|
|
|
unsigned Src2 = MI->getOperand(2).getReg();
|
|
|
|
bool isKill2 = MI->getOperand(2).isKill();
|
|
|
|
unsigned leaInReg2 = 0;
|
|
|
|
MachineInstr *InsMI2 = 0;
|
|
|
|
if (Src == Src2) {
|
|
|
|
// ADD16rr %reg1028<kill>, %reg1028
|
|
|
|
// just a single insert_subreg.
|
|
|
|
addRegReg(MIB, leaInReg, true, leaInReg, false);
|
|
|
|
} else {
|
|
|
|
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
|
|
|
|
// Build and insert into an implicit UNDEF value. This is OK because
|
|
|
|
// well be shifting and then extracting the lower 16-bits.
|
|
|
|
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
|
|
|
|
InsMI2 =
|
2010-07-09 00:40:15 +08:00
|
|
|
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
|
|
|
|
.addReg(leaInReg2, RegState::Define, X86::sub_16bit)
|
|
|
|
.addReg(Src2, getKillRegState(isKill2));
|
2009-12-11 14:01:48 +08:00
|
|
|
addRegReg(MIB, leaInReg, true, leaInReg2, true);
|
|
|
|
}
|
|
|
|
if (LV && isKill2 && InsMI2)
|
|
|
|
LV->replaceKillInstruction(Src2, MI, InsMI2);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr *NewMI = MIB;
|
|
|
|
MachineInstr *ExtMI =
|
2010-07-09 00:40:22 +08:00
|
|
|
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
|
2009-12-11 14:01:48 +08:00
|
|
|
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
|
2010-07-09 00:40:22 +08:00
|
|
|
.addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
|
2009-12-11 14:01:48 +08:00
|
|
|
|
|
|
|
if (LV) {
|
|
|
|
// Update live variables
|
|
|
|
LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
|
|
|
|
LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
|
|
|
|
if (isKill)
|
|
|
|
LV->replaceKillInstruction(Src, MI, InsMI);
|
|
|
|
if (isDead)
|
|
|
|
LV->replaceKillInstruction(Dest, MI, ExtMI);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ExtMI;
|
|
|
|
}
|
|
|
|
|
2005-01-02 10:37:07 +08:00
|
|
|
/// convertToThreeAddress - This method must be implemented by targets that
|
|
|
|
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
|
|
|
|
/// may be able to convert a two-address instruction into a true
|
|
|
|
/// three-address instruction on demand. This allows the X86 target (for
|
|
|
|
/// example) to convert ADD and SHL instructions into LEA instructions if they
|
|
|
|
/// would require register copies due to two-addressness.
|
|
|
|
///
|
|
|
|
/// This method returns a null pointer if the transformation cannot be
|
|
|
|
/// performed, otherwise it returns the new instruction.
|
|
|
|
///
|
2006-12-02 05:52:41 +08:00
|
|
|
MachineInstr *
|
|
|
|
X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
|
|
|
MachineBasicBlock::iterator &MBBI,
|
2008-07-03 07:41:07 +08:00
|
|
|
LiveVariables *LV) const {
|
2006-12-02 05:52:41 +08:00
|
|
|
MachineInstr *MI = MBBI;
|
2008-07-08 07:14:23 +08:00
|
|
|
MachineFunction &MF = *MI->getParent()->getParent();
|
2005-01-02 10:37:07 +08:00
|
|
|
// All instructions input are two-addr instructions. Get the known operands.
|
|
|
|
unsigned Dest = MI->getOperand(0).getReg();
|
|
|
|
unsigned Src = MI->getOperand(1).getReg();
|
2008-07-03 17:09:37 +08:00
|
|
|
bool isDead = MI->getOperand(0).isDead();
|
|
|
|
bool isKill = MI->getOperand(1).isKill();
|
2005-01-02 10:37:07 +08:00
|
|
|
|
2006-11-16 04:58:11 +08:00
|
|
|
MachineInstr *NewMI = NULL;
|
2006-12-02 05:52:41 +08:00
|
|
|
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
|
Two changes:
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
in three-address form.
This gives us asm diffs like:
- leal (,%eax,4), %eax
+ shll $2, %eax
which is faster on some processors and smaller on all of them.
and, more interestingly:
- movl 24(%esi), %eax
- leal (,%eax,4), %edi
+ movl 24(%esi), %edi
+ shll $2, %edi
Without #2, #1 was a significant pessimization in some cases.
This implements CodeGen/X86/shift-codegen.ll
llvm-svn: 35204
2007-03-20 14:08:29 +08:00
|
|
|
// we have better subtarget support, enable the 16-bit LEA generation here.
|
2009-12-13 04:03:14 +08:00
|
|
|
// 16-bit LEA is also slow on Core2.
|
2006-12-02 05:52:41 +08:00
|
|
|
bool DisableLEA16 = true;
|
2009-12-13 04:03:14 +08:00
|
|
|
bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
|
2006-12-02 05:52:41 +08:00
|
|
|
|
2007-10-06 04:34:26 +08:00
|
|
|
unsigned MIOpc = MI->getOpcode();
|
|
|
|
switch (MIOpc) {
|
2006-05-31 04:26:50 +08:00
|
|
|
case X86::SHUFPSrri: {
|
|
|
|
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
|
Two changes:
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
in three-address form.
This gives us asm diffs like:
- leal (,%eax,4), %eax
+ shll $2, %eax
which is faster on some processors and smaller on all of them.
and, more interestingly:
- movl 24(%esi), %eax
- leal (,%eax,4), %edi
+ movl 24(%esi), %edi
+ shll $2, %edi
Without #2, #1 was a significant pessimization in some cases.
This implements CodeGen/X86/shift-codegen.ll
llvm-svn: 35204
2007-03-20 14:08:29 +08:00
|
|
|
if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
|
|
|
|
|
2006-05-31 05:45:53 +08:00
|
|
|
unsigned B = MI->getOperand(1).getReg();
|
|
|
|
unsigned C = MI->getOperand(2).getReg();
|
Two changes:
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
in three-address form.
This gives us asm diffs like:
- leal (,%eax,4), %eax
+ shll $2, %eax
which is faster on some processors and smaller on all of them.
and, more interestingly:
- movl 24(%esi), %eax
- leal (,%eax,4), %edi
+ movl 24(%esi), %edi
+ shll $2, %edi
Without #2, #1 was a significant pessimization in some cases.
This implements CodeGen/X86/shift-codegen.ll
llvm-svn: 35204
2007-03-20 14:08:29 +08:00
|
|
|
if (B != C) return 0;
|
2008-07-03 17:09:37 +08:00
|
|
|
unsigned A = MI->getOperand(0).getReg();
|
|
|
|
unsigned M = MI->getOperand(3).getImm();
|
2009-02-12 05:51:19 +08:00
|
|
|
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(A, RegState::Define | getDeadRegState(isDead))
|
|
|
|
.addReg(B, getKillRegState(isKill)).addImm(M);
|
Two changes:
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
in three-address form.
This gives us asm diffs like:
- leal (,%eax,4), %eax
+ shll $2, %eax
which is faster on some processors and smaller on all of them.
and, more interestingly:
- movl 24(%esi), %eax
- leal (,%eax,4), %edi
+ movl 24(%esi), %edi
+ shll $2, %edi
Without #2, #1 was a significant pessimization in some cases.
This implements CodeGen/X86/shift-codegen.ll
llvm-svn: 35204
2007-03-20 14:08:29 +08:00
|
|
|
break;
|
|
|
|
}
|
2007-03-29 02:12:31 +08:00
|
|
|
case X86::SHL64ri: {
|
2007-09-15 05:48:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
|
2007-03-29 02:12:31 +08:00
|
|
|
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
|
|
|
|
// the flags produced by a shift yet, so this is safe.
|
|
|
|
unsigned ShAmt = MI->getOperand(2).getImm();
|
|
|
|
if (ShAmt == 0 || ShAmt >= 4) return 0;
|
2008-07-03 17:09:37 +08:00
|
|
|
|
2009-02-12 05:51:19 +08:00
|
|
|
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
|
|
|
|
.addReg(0).addImm(1 << ShAmt)
|
|
|
|
.addReg(Src, getKillRegState(isKill))
|
2010-07-09 07:46:44 +08:00
|
|
|
.addImm(0).addReg(0);
|
2007-03-29 02:12:31 +08:00
|
|
|
break;
|
|
|
|
}
|
Two changes:
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
in three-address form.
This gives us asm diffs like:
- leal (,%eax,4), %eax
+ shll $2, %eax
which is faster on some processors and smaller on all of them.
and, more interestingly:
- movl 24(%esi), %eax
- leal (,%eax,4), %edi
+ movl 24(%esi), %edi
+ shll $2, %edi
Without #2, #1 was a significant pessimization in some cases.
This implements CodeGen/X86/shift-codegen.ll
llvm-svn: 35204
2007-03-20 14:08:29 +08:00
|
|
|
case X86::SHL32ri: {
|
2007-09-15 05:48:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
|
Two changes:
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
in three-address form.
This gives us asm diffs like:
- leal (,%eax,4), %eax
+ shll $2, %eax
which is faster on some processors and smaller on all of them.
and, more interestingly:
- movl 24(%esi), %eax
- leal (,%eax,4), %edi
+ movl 24(%esi), %edi
+ shll $2, %edi
Without #2, #1 was a significant pessimization in some cases.
This implements CodeGen/X86/shift-codegen.ll
llvm-svn: 35204
2007-03-20 14:08:29 +08:00
|
|
|
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
|
|
|
|
// the flags produced by a shift yet, so this is safe.
|
|
|
|
unsigned ShAmt = MI->getOperand(2).getImm();
|
|
|
|
if (ShAmt == 0 || ShAmt >= 4) return 0;
|
2008-07-03 17:09:37 +08:00
|
|
|
|
2009-12-13 04:03:14 +08:00
|
|
|
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
|
2009-02-12 05:51:19 +08:00
|
|
|
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
|
2008-07-03 17:09:37 +08:00
|
|
|
.addReg(0).addImm(1 << ShAmt)
|
2010-07-09 07:46:44 +08:00
|
|
|
.addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
|
Two changes:
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
in three-address form.
This gives us asm diffs like:
- leal (,%eax,4), %eax
+ shll $2, %eax
which is faster on some processors and smaller on all of them.
and, more interestingly:
- movl 24(%esi), %eax
- leal (,%eax,4), %edi
+ movl 24(%esi), %edi
+ shll $2, %edi
Without #2, #1 was a significant pessimization in some cases.
This implements CodeGen/X86/shift-codegen.ll
llvm-svn: 35204
2007-03-20 14:08:29 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case X86::SHL16ri: {
|
2007-09-15 05:48:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
|
2007-09-06 08:14:41 +08:00
|
|
|
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
|
|
|
|
// the flags produced by a shift yet, so this is safe.
|
|
|
|
unsigned ShAmt = MI->getOperand(2).getImm();
|
|
|
|
if (ShAmt == 0 || ShAmt >= 4) return 0;
|
2008-07-03 17:09:37 +08:00
|
|
|
|
2009-12-11 14:01:48 +08:00
|
|
|
if (DisableLEA16)
|
2009-12-13 04:03:14 +08:00
|
|
|
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
|
2009-12-11 14:01:48 +08:00
|
|
|
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
|
|
|
|
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
|
|
|
|
.addReg(0).addImm(1 << ShAmt)
|
|
|
|
.addReg(Src, getKillRegState(isKill))
|
2010-07-09 07:46:44 +08:00
|
|
|
.addImm(0).addReg(0);
|
Two changes:
1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
in three-address form.
This gives us asm diffs like:
- leal (,%eax,4), %eax
+ shll $2, %eax
which is faster on some processors and smaller on all of them.
and, more interestingly:
- movl 24(%esi), %eax
- leal (,%eax,4), %edi
+ movl 24(%esi), %edi
+ shll $2, %edi
Without #2, #1 was a significant pessimization in some cases.
This implements CodeGen/X86/shift-codegen.ll
llvm-svn: 35204
2007-03-20 14:08:29 +08:00
|
|
|
break;
|
2006-05-31 04:26:50 +08:00
|
|
|
}
|
2007-10-06 04:34:26 +08:00
|
|
|
default: {
|
|
|
|
// The following opcodes also sets the condition code register(s). Only
|
|
|
|
// convert them to equivalent lea if the condition code register def's
|
|
|
|
// are dead!
|
|
|
|
if (hasLiveCondCodeDef(MI))
|
|
|
|
return 0;
|
2006-05-31 04:26:50 +08:00
|
|
|
|
2007-10-06 04:34:26 +08:00
|
|
|
switch (MIOpc) {
|
|
|
|
default: return 0;
|
|
|
|
case X86::INC64r:
|
2009-01-07 07:34:46 +08:00
|
|
|
case X86::INC32r:
|
|
|
|
case X86::INC64_32r: {
|
2007-10-06 04:34:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
|
2007-10-09 15:14:53 +08:00
|
|
|
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
|
|
|
|
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
|
2010-07-09 07:46:44 +08:00
|
|
|
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
2009-04-09 05:14:34 +08:00
|
|
|
Src, isKill, 1);
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case X86::INC16r:
|
|
|
|
case X86::INC64_16r:
|
2009-12-11 14:01:48 +08:00
|
|
|
if (DisableLEA16)
|
2009-12-13 04:03:14 +08:00
|
|
|
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
|
2007-10-06 04:34:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
|
2009-02-12 05:51:19 +08:00
|
|
|
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
2008-07-03 17:09:37 +08:00
|
|
|
Src, isKill, 1);
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
|
|
|
case X86::DEC64r:
|
2009-01-07 07:34:46 +08:00
|
|
|
case X86::DEC32r:
|
|
|
|
case X86::DEC64_32r: {
|
2007-10-06 04:34:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
|
2007-10-09 15:14:53 +08:00
|
|
|
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
|
|
|
|
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
|
2010-07-09 07:46:44 +08:00
|
|
|
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
2009-04-09 05:14:34 +08:00
|
|
|
Src, isKill, -1);
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case X86::DEC16r:
|
|
|
|
case X86::DEC64_16r:
|
2009-12-11 14:01:48 +08:00
|
|
|
if (DisableLEA16)
|
2009-12-13 04:03:14 +08:00
|
|
|
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
|
2007-10-06 04:34:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
|
2009-02-12 05:51:19 +08:00
|
|
|
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
2008-07-03 17:09:37 +08:00
|
|
|
Src, isKill, -1);
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
|
|
|
case X86::ADD64rr:
|
|
|
|
case X86::ADD32rr: {
|
|
|
|
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
|
2007-10-09 15:14:53 +08:00
|
|
|
unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
|
|
|
|
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
|
2008-07-03 17:09:37 +08:00
|
|
|
unsigned Src2 = MI->getOperand(2).getReg();
|
|
|
|
bool isKill2 = MI->getOperand(2).isKill();
|
2009-02-12 05:51:19 +08:00
|
|
|
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
2008-07-03 17:09:37 +08:00
|
|
|
Src, isKill, Src2, isKill2);
|
|
|
|
if (LV && isKill2)
|
|
|
|
LV->replaceKillInstruction(Src2, MI, NewMI);
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
|
|
|
}
|
2008-07-03 17:09:37 +08:00
|
|
|
case X86::ADD16rr: {
|
2009-12-11 14:01:48 +08:00
|
|
|
if (DisableLEA16)
|
2009-12-13 04:03:14 +08:00
|
|
|
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
|
2007-10-06 04:34:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
|
2008-07-03 17:09:37 +08:00
|
|
|
unsigned Src2 = MI->getOperand(2).getReg();
|
|
|
|
bool isKill2 = MI->getOperand(2).isKill();
|
2009-02-12 05:51:19 +08:00
|
|
|
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
2008-07-03 17:09:37 +08:00
|
|
|
Src, isKill, Src2, isKill2);
|
|
|
|
if (LV && isKill2)
|
|
|
|
LV->replaceKillInstruction(Src2, MI, NewMI);
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
2008-07-03 17:09:37 +08:00
|
|
|
}
|
2007-10-06 04:34:26 +08:00
|
|
|
case X86::ADD64ri32:
|
|
|
|
case X86::ADD64ri8:
|
|
|
|
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
|
2010-07-09 07:46:44 +08:00
|
|
|
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
|
2009-12-11 14:01:48 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
|
|
|
Src, isKill, MI->getOperand(2).getImm());
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
|
|
|
case X86::ADD32ri:
|
2009-12-11 14:01:48 +08:00
|
|
|
case X86::ADD32ri8: {
|
2007-10-06 04:34:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
|
2009-12-11 14:01:48 +08:00
|
|
|
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
|
2010-07-09 07:46:44 +08:00
|
|
|
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
|
2009-12-11 14:01:48 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
2009-04-09 05:14:34 +08:00
|
|
|
Src, isKill, MI->getOperand(2).getImm());
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
2009-12-11 14:01:48 +08:00
|
|
|
}
|
2007-10-06 04:34:26 +08:00
|
|
|
case X86::ADD16ri:
|
|
|
|
case X86::ADD16ri8:
|
2009-12-11 14:01:48 +08:00
|
|
|
if (DisableLEA16)
|
2009-12-13 04:03:14 +08:00
|
|
|
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
|
2007-10-06 04:34:26 +08:00
|
|
|
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
|
2010-07-09 07:46:44 +08:00
|
|
|
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
|
2009-12-11 14:01:48 +08:00
|
|
|
.addReg(Dest, RegState::Define |
|
|
|
|
getDeadRegState(isDead)),
|
|
|
|
Src, isKill, MI->getOperand(2).getImm());
|
2007-10-06 04:34:26 +08:00
|
|
|
break;
|
|
|
|
}
|
2005-01-02 10:37:07 +08:00
|
|
|
}
|
2006-12-02 05:52:41 +08:00
|
|
|
}
|
2007-10-06 04:34:26 +08:00
|
|
|
|
2008-02-07 16:29:53 +08:00
|
|
|
if (!NewMI) return 0;
|
|
|
|
|
2008-07-03 17:09:37 +08:00
|
|
|
if (LV) { // Update live variables
|
|
|
|
if (isKill)
|
|
|
|
LV->replaceKillInstruction(Src, MI, NewMI);
|
|
|
|
if (isDead)
|
|
|
|
LV->replaceKillInstruction(Dest, MI, NewMI);
|
|
|
|
}
|
|
|
|
|
2007-10-06 04:34:26 +08:00
|
|
|
MFI->insert(MBBI, NewMI); // Insert the new inst
|
2006-11-16 04:58:11 +08:00
|
|
|
return NewMI;
|
2005-01-02 10:37:07 +08:00
|
|
|
}
|
|
|
|
|
Teach the code generator that shrd/shld is commutable if it has an immediate.
This allows us to generate this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
shld %EDX, %EDX, 2
shl %EAX, 2
ret
instead of this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, %EAX
shrd %EDX, %ECX, 30
shl %EAX, 2
ret
Note the magically transmogrifying immediate.
llvm-svn: 19686
2005-01-19 15:11:01 +08:00
|
|
|
/// commuteInstruction - We have a few instructions that must be hacked on to
|
|
|
|
/// commute them.
|
|
|
|
///
|
2008-06-16 15:33:11 +08:00
|
|
|
MachineInstr *
|
|
|
|
X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
|
Teach the code generator that shrd/shld is commutable if it has an immediate.
This allows us to generate this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
shld %EDX, %EDX, 2
shl %EAX, 2
ret
instead of this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, %EAX
shrd %EDX, %ECX, 30
shl %EAX, 2
ret
Note the magically transmogrifying immediate.
llvm-svn: 19686
2005-01-19 15:11:01 +08:00
|
|
|
switch (MI->getOpcode()) {
|
2005-01-19 15:31:24 +08:00
|
|
|
case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
|
|
|
|
case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
|
Teach the code generator that shrd/shld is commutable if it has an immediate.
This allows us to generate this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
shld %EDX, %EDX, 2
shl %EAX, 2
ret
instead of this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, %EAX
shrd %EDX, %ECX, 30
shl %EAX, 2
ret
Note the magically transmogrifying immediate.
llvm-svn: 19686
2005-01-19 15:11:01 +08:00
|
|
|
case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
|
2007-09-15 07:17:45 +08:00
|
|
|
case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
|
|
|
|
case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
|
|
|
|
case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
|
2005-01-19 15:31:24 +08:00
|
|
|
unsigned Opc;
|
|
|
|
unsigned Size;
|
|
|
|
switch (MI->getOpcode()) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unreachable!");
|
2005-01-19 15:31:24 +08:00
|
|
|
case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
|
|
|
|
case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
|
|
|
|
case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
|
|
|
|
case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
|
2007-09-15 07:17:45 +08:00
|
|
|
case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
|
|
|
|
case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
|
2005-01-19 15:31:24 +08:00
|
|
|
}
|
2007-12-31 04:49:49 +08:00
|
|
|
unsigned Amt = MI->getOperand(3).getImm();
|
2008-10-17 09:23:35 +08:00
|
|
|
if (NewMI) {
|
|
|
|
MachineFunction &MF = *MI->getParent()->getParent();
|
|
|
|
MI = MF.CloneMachineInstr(MI);
|
|
|
|
NewMI = false;
|
2008-02-13 10:46:49 +08:00
|
|
|
}
|
2008-10-17 09:23:35 +08:00
|
|
|
MI->setDesc(get(Opc));
|
|
|
|
MI->getOperand(3).setImm(Size-Amt);
|
|
|
|
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
|
Teach the code generator that shrd/shld is commutable if it has an immediate.
This allows us to generate this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
shld %EDX, %EDX, 2
shl %EAX, 2
ret
instead of this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, %EAX
shrd %EDX, %ECX, 30
shl %EAX, 2
ret
Note the magically transmogrifying immediate.
llvm-svn: 19686
2005-01-19 15:11:01 +08:00
|
|
|
}
|
2007-10-06 07:13:21 +08:00
|
|
|
case X86::CMOVB16rr:
|
|
|
|
case X86::CMOVB32rr:
|
|
|
|
case X86::CMOVB64rr:
|
|
|
|
case X86::CMOVAE16rr:
|
|
|
|
case X86::CMOVAE32rr:
|
|
|
|
case X86::CMOVAE64rr:
|
|
|
|
case X86::CMOVE16rr:
|
|
|
|
case X86::CMOVE32rr:
|
|
|
|
case X86::CMOVE64rr:
|
|
|
|
case X86::CMOVNE16rr:
|
|
|
|
case X86::CMOVNE32rr:
|
|
|
|
case X86::CMOVNE64rr:
|
|
|
|
case X86::CMOVBE16rr:
|
|
|
|
case X86::CMOVBE32rr:
|
|
|
|
case X86::CMOVBE64rr:
|
|
|
|
case X86::CMOVA16rr:
|
|
|
|
case X86::CMOVA32rr:
|
|
|
|
case X86::CMOVA64rr:
|
|
|
|
case X86::CMOVL16rr:
|
|
|
|
case X86::CMOVL32rr:
|
|
|
|
case X86::CMOVL64rr:
|
|
|
|
case X86::CMOVGE16rr:
|
|
|
|
case X86::CMOVGE32rr:
|
|
|
|
case X86::CMOVGE64rr:
|
|
|
|
case X86::CMOVLE16rr:
|
|
|
|
case X86::CMOVLE32rr:
|
|
|
|
case X86::CMOVLE64rr:
|
|
|
|
case X86::CMOVG16rr:
|
|
|
|
case X86::CMOVG32rr:
|
|
|
|
case X86::CMOVG64rr:
|
|
|
|
case X86::CMOVS16rr:
|
|
|
|
case X86::CMOVS32rr:
|
|
|
|
case X86::CMOVS64rr:
|
|
|
|
case X86::CMOVNS16rr:
|
|
|
|
case X86::CMOVNS32rr:
|
|
|
|
case X86::CMOVNS64rr:
|
|
|
|
case X86::CMOVP16rr:
|
|
|
|
case X86::CMOVP32rr:
|
|
|
|
case X86::CMOVP64rr:
|
|
|
|
case X86::CMOVNP16rr:
|
|
|
|
case X86::CMOVNP32rr:
|
2009-01-07 08:35:10 +08:00
|
|
|
case X86::CMOVNP64rr:
|
|
|
|
case X86::CMOVO16rr:
|
|
|
|
case X86::CMOVO32rr:
|
|
|
|
case X86::CMOVO64rr:
|
|
|
|
case X86::CMOVNO16rr:
|
|
|
|
case X86::CMOVNO32rr:
|
|
|
|
case X86::CMOVNO64rr: {
|
2007-10-06 07:13:21 +08:00
|
|
|
unsigned Opc = 0;
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
|
|
|
|
case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
|
|
|
|
case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
|
|
|
|
case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
|
|
|
|
case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
|
|
|
|
case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
|
|
|
|
case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
|
|
|
|
case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
|
|
|
|
case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
|
|
|
|
case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
|
|
|
|
case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
|
|
|
|
case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
|
|
|
|
case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
|
|
|
|
case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
|
|
|
|
case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
|
|
|
|
case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
|
|
|
|
case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
|
|
|
|
case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
|
|
|
|
case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
|
|
|
|
case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
|
|
|
|
case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
|
|
|
|
case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
|
|
|
|
case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
|
|
|
|
case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
|
|
|
|
case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
|
|
|
|
case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
|
|
|
|
case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
|
|
|
|
case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
|
|
|
|
case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
|
|
|
|
case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
|
|
|
|
case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
|
|
|
|
case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
|
2009-04-18 13:16:01 +08:00
|
|
|
case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
|
2007-10-06 07:13:21 +08:00
|
|
|
case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
|
|
|
|
case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
|
|
|
|
case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
|
|
|
|
case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
|
|
|
|
case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
|
2009-04-18 13:16:01 +08:00
|
|
|
case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
|
2007-10-06 07:13:21 +08:00
|
|
|
case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
|
|
|
|
case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
|
|
|
|
case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
|
2009-01-07 08:35:10 +08:00
|
|
|
case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
|
|
|
|
case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
|
2009-04-18 13:16:01 +08:00
|
|
|
case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
|
2009-01-07 08:35:10 +08:00
|
|
|
case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
|
|
|
|
case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
|
|
|
|
case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
|
2007-10-06 07:13:21 +08:00
|
|
|
}
|
2008-10-17 09:23:35 +08:00
|
|
|
if (NewMI) {
|
|
|
|
MachineFunction &MF = *MI->getParent()->getParent();
|
|
|
|
MI = MF.CloneMachineInstr(MI);
|
|
|
|
NewMI = false;
|
|
|
|
}
|
2008-01-12 02:10:50 +08:00
|
|
|
MI->setDesc(get(Opc));
|
2007-10-06 07:13:21 +08:00
|
|
|
// Fallthrough intended.
|
|
|
|
}
|
Teach the code generator that shrd/shld is commutable if it has an immediate.
This allows us to generate this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
shld %EDX, %EDX, 2
shl %EAX, 2
ret
instead of this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, %EAX
shrd %EDX, %ECX, 30
shl %EAX, 2
ret
Note the magically transmogrifying immediate.
llvm-svn: 19686
2005-01-19 15:11:01 +08:00
|
|
|
default:
|
2008-06-16 15:33:11 +08:00
|
|
|
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
|
Teach the code generator that shrd/shld is commutable if it has an immediate.
This allows us to generate this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %EDX, DWORD PTR [%ESP + 8]
shld %EDX, %EDX, 2
shl %EAX, 2
ret
instead of this:
foo:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, %EAX
shrd %EDX, %ECX, 30
shl %EAX, 2
ret
Note the magically transmogrifying immediate.
llvm-svn: 19686
2005-01-19 15:11:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-10-21 01:42:20 +08:00
|
|
|
static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
|
|
|
|
switch (BrOpc) {
|
|
|
|
default: return X86::COND_INVALID;
|
2010-02-12 03:25:55 +08:00
|
|
|
case X86::JE_4: return X86::COND_E;
|
|
|
|
case X86::JNE_4: return X86::COND_NE;
|
|
|
|
case X86::JL_4: return X86::COND_L;
|
|
|
|
case X86::JLE_4: return X86::COND_LE;
|
|
|
|
case X86::JG_4: return X86::COND_G;
|
|
|
|
case X86::JGE_4: return X86::COND_GE;
|
|
|
|
case X86::JB_4: return X86::COND_B;
|
|
|
|
case X86::JBE_4: return X86::COND_BE;
|
|
|
|
case X86::JA_4: return X86::COND_A;
|
|
|
|
case X86::JAE_4: return X86::COND_AE;
|
|
|
|
case X86::JS_4: return X86::COND_S;
|
|
|
|
case X86::JNS_4: return X86::COND_NS;
|
|
|
|
case X86::JP_4: return X86::COND_P;
|
|
|
|
case X86::JNP_4: return X86::COND_NP;
|
|
|
|
case X86::JO_4: return X86::COND_O;
|
|
|
|
case X86::JNO_4: return X86::COND_NO;
|
2006-10-21 01:42:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
|
|
|
|
switch (CC) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Illegal condition code!");
|
2010-02-12 03:25:55 +08:00
|
|
|
case X86::COND_E: return X86::JE_4;
|
|
|
|
case X86::COND_NE: return X86::JNE_4;
|
|
|
|
case X86::COND_L: return X86::JL_4;
|
|
|
|
case X86::COND_LE: return X86::JLE_4;
|
|
|
|
case X86::COND_G: return X86::JG_4;
|
|
|
|
case X86::COND_GE: return X86::JGE_4;
|
|
|
|
case X86::COND_B: return X86::JB_4;
|
|
|
|
case X86::COND_BE: return X86::JBE_4;
|
|
|
|
case X86::COND_A: return X86::JA_4;
|
|
|
|
case X86::COND_AE: return X86::JAE_4;
|
|
|
|
case X86::COND_S: return X86::JS_4;
|
|
|
|
case X86::COND_NS: return X86::JNS_4;
|
|
|
|
case X86::COND_P: return X86::JP_4;
|
|
|
|
case X86::COND_NP: return X86::JNP_4;
|
|
|
|
case X86::COND_O: return X86::JO_4;
|
|
|
|
case X86::COND_NO: return X86::JNO_4;
|
2006-10-21 01:42:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-10-21 13:52:40 +08:00
|
|
|
/// GetOppositeBranchCondition - Return the inverse of the specified condition,
|
|
|
|
/// e.g. turning COND_E to COND_NE.
|
|
|
|
X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
|
|
|
|
switch (CC) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Illegal condition code!");
|
2006-10-21 13:52:40 +08:00
|
|
|
case X86::COND_E: return X86::COND_NE;
|
|
|
|
case X86::COND_NE: return X86::COND_E;
|
|
|
|
case X86::COND_L: return X86::COND_GE;
|
|
|
|
case X86::COND_LE: return X86::COND_G;
|
|
|
|
case X86::COND_G: return X86::COND_LE;
|
|
|
|
case X86::COND_GE: return X86::COND_L;
|
|
|
|
case X86::COND_B: return X86::COND_AE;
|
|
|
|
case X86::COND_BE: return X86::COND_A;
|
|
|
|
case X86::COND_A: return X86::COND_BE;
|
|
|
|
case X86::COND_AE: return X86::COND_B;
|
|
|
|
case X86::COND_S: return X86::COND_NS;
|
|
|
|
case X86::COND_NS: return X86::COND_S;
|
|
|
|
case X86::COND_P: return X86::COND_NP;
|
|
|
|
case X86::COND_NP: return X86::COND_P;
|
|
|
|
case X86::COND_O: return X86::COND_NO;
|
|
|
|
case X86::COND_NO: return X86::COND_O;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-06-15 06:03:45 +08:00
|
|
|
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
|
2008-01-07 15:27:27 +08:00
|
|
|
const TargetInstrDesc &TID = MI->getDesc();
|
|
|
|
if (!TID.isTerminator()) return false;
|
2008-01-07 09:56:04 +08:00
|
|
|
|
|
|
|
// Conditional branch is a special case.
|
2008-01-07 15:27:27 +08:00
|
|
|
if (TID.isBranch() && !TID.isBarrier())
|
2008-01-07 09:56:04 +08:00
|
|
|
return true;
|
2008-01-07 15:27:27 +08:00
|
|
|
if (!TID.isPredicable())
|
2008-01-07 09:56:04 +08:00
|
|
|
return true;
|
|
|
|
return !isPredicated(MI);
|
2007-06-15 06:03:45 +08:00
|
|
|
}
|
2006-10-21 13:52:40 +08:00
|
|
|
|
2006-10-21 01:42:20 +08:00
|
|
|
bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock *&TBB,
|
|
|
|
MachineBasicBlock *&FBB,
|
2009-02-09 15:14:22 +08:00
|
|
|
SmallVectorImpl<MachineOperand> &Cond,
|
|
|
|
bool AllowModify) const {
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
// Start from the bottom of the block and work up, examining the
|
|
|
|
// terminator instructions.
|
2006-10-21 01:42:20 +08:00
|
|
|
MachineBasicBlock::iterator I = MBB.end();
|
2010-04-14 02:50:27 +08:00
|
|
|
MachineBasicBlock::iterator UnCondBrIter = MBB.end();
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
while (I != MBB.begin()) {
|
|
|
|
--I;
|
2010-04-02 09:38:09 +08:00
|
|
|
if (I->isDebugValue())
|
|
|
|
continue;
|
2009-12-14 14:51:19 +08:00
|
|
|
|
|
|
|
// Working from the bottom, when we see a non-terminator instruction, we're
|
|
|
|
// done.
|
2010-07-17 01:41:44 +08:00
|
|
|
if (!isUnpredicatedTerminator(I))
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
break;
|
2009-12-14 14:51:19 +08:00
|
|
|
|
|
|
|
// A terminator that isn't a branch can't easily be handled by this
|
|
|
|
// analysis.
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
if (!I->getDesc().isBranch())
|
2006-10-21 01:42:20 +08:00
|
|
|
return true;
|
2009-12-14 14:51:19 +08:00
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
// Handle unconditional branches.
|
2010-02-12 03:25:55 +08:00
|
|
|
if (I->getOpcode() == X86::JMP_4) {
|
2010-04-14 02:50:27 +08:00
|
|
|
UnCondBrIter = I;
|
|
|
|
|
2009-02-09 15:14:22 +08:00
|
|
|
if (!AllowModify) {
|
|
|
|
TBB = I->getOperand(0).getMBB();
|
2009-05-08 14:34:09 +08:00
|
|
|
continue;
|
2009-02-09 15:14:22 +08:00
|
|
|
}
|
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
// If the block has any instructions after a JMP, delete them.
|
2009-12-03 08:50:42 +08:00
|
|
|
while (llvm::next(I) != MBB.end())
|
|
|
|
llvm::next(I)->eraseFromParent();
|
2009-12-14 14:51:19 +08:00
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
Cond.clear();
|
|
|
|
FBB = 0;
|
2009-12-14 14:51:19 +08:00
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
// Delete the JMP if it's equivalent to a fall-through.
|
|
|
|
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
|
|
|
|
TBB = 0;
|
|
|
|
I->eraseFromParent();
|
|
|
|
I = MBB.end();
|
2010-04-14 02:50:27 +08:00
|
|
|
UnCondBrIter = MBB.end();
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
continue;
|
|
|
|
}
|
2009-12-14 14:51:19 +08:00
|
|
|
|
2010-04-14 02:50:27 +08:00
|
|
|
// TBB is used to indicate the unconditional destination.
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
TBB = I->getOperand(0).getMBB();
|
|
|
|
continue;
|
2006-10-21 01:42:20 +08:00
|
|
|
}
|
2009-12-14 14:51:19 +08:00
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
// Handle conditional branches.
|
|
|
|
X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
|
2006-10-21 01:42:20 +08:00
|
|
|
if (BranchCode == X86::COND_INVALID)
|
|
|
|
return true; // Can't handle indirect branch.
|
2009-12-14 14:51:19 +08:00
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
// Working from the bottom, handle the first conditional branch.
|
|
|
|
if (Cond.empty()) {
|
2010-04-14 02:50:27 +08:00
|
|
|
MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
|
|
|
|
if (AllowModify && UnCondBrIter != MBB.end() &&
|
|
|
|
MBB.isLayoutSuccessor(TargetBB)) {
|
|
|
|
// If we can modify the code and it ends in something like:
|
|
|
|
//
|
|
|
|
// jCC L1
|
|
|
|
// jmp L2
|
|
|
|
// L1:
|
|
|
|
// ...
|
|
|
|
// L2:
|
|
|
|
//
|
|
|
|
// Then we can change this to:
|
|
|
|
//
|
|
|
|
// jnCC L2
|
|
|
|
// L1:
|
|
|
|
// ...
|
|
|
|
// L2:
|
|
|
|
//
|
|
|
|
// Which is a bit more efficient.
|
|
|
|
// We conditionally jump to the fall-through block.
|
|
|
|
BranchCode = GetOppositeBranchCondition(BranchCode);
|
|
|
|
unsigned JNCC = GetCondBranchFromCond(BranchCode);
|
|
|
|
MachineBasicBlock::iterator OldInst = I;
|
|
|
|
|
|
|
|
BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
|
|
|
|
.addMBB(UnCondBrIter->getOperand(0).getMBB());
|
|
|
|
BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
|
|
|
|
.addMBB(TargetBB);
|
|
|
|
MBB.addSuccessor(TargetBB);
|
|
|
|
|
|
|
|
OldInst->eraseFromParent();
|
|
|
|
UnCondBrIter->eraseFromParent();
|
|
|
|
|
|
|
|
// Restart the analysis.
|
|
|
|
UnCondBrIter = MBB.end();
|
|
|
|
I = MBB.end();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
FBB = TBB;
|
|
|
|
TBB = I->getOperand(0).getMBB();
|
|
|
|
Cond.push_back(MachineOperand::CreateImm(BranchCode));
|
|
|
|
continue;
|
|
|
|
}
|
2009-12-14 14:51:19 +08:00
|
|
|
|
|
|
|
// Handle subsequent conditional branches. Only handle the case where all
|
|
|
|
// conditional branches branch to the same destination and their condition
|
|
|
|
// opcodes fit one of the special multi-branch idioms.
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
assert(Cond.size() == 1);
|
|
|
|
assert(TBB);
|
2009-12-14 14:51:19 +08:00
|
|
|
|
|
|
|
// Only handle the case where all conditional branches branch to the same
|
|
|
|
// destination.
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
if (TBB != I->getOperand(0).getMBB())
|
|
|
|
return true;
|
2009-12-14 14:51:19 +08:00
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
// If the conditions are the same, we can leave them alone.
|
2009-12-14 14:51:19 +08:00
|
|
|
X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
if (OldBranchCode == BranchCode)
|
|
|
|
continue;
|
2009-12-14 14:51:19 +08:00
|
|
|
|
|
|
|
// If they differ, see if they fit one of the known patterns. Theoretically,
|
|
|
|
// we could handle more patterns here, but we shouldn't expect to see them
|
|
|
|
// if instruction selection has done a reasonable job.
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
if ((OldBranchCode == X86::COND_NP &&
|
|
|
|
BranchCode == X86::COND_E) ||
|
|
|
|
(OldBranchCode == X86::COND_E &&
|
|
|
|
BranchCode == X86::COND_NP))
|
|
|
|
BranchCode = X86::COND_NP_OR_E;
|
|
|
|
else if ((OldBranchCode == X86::COND_P &&
|
|
|
|
BranchCode == X86::COND_NE) ||
|
|
|
|
(OldBranchCode == X86::COND_NE &&
|
|
|
|
BranchCode == X86::COND_P))
|
|
|
|
BranchCode = X86::COND_NE_OR_P;
|
|
|
|
else
|
|
|
|
return true;
|
2009-12-14 14:51:19 +08:00
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
// Update the MachineOperand.
|
|
|
|
Cond[0].setImm(BranchCode);
|
2007-06-14 01:59:52 +08:00
|
|
|
}
|
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
return false;
|
2006-10-21 01:42:20 +08:00
|
|
|
}
|
|
|
|
|
2007-05-18 08:18:17 +08:00
|
|
|
unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
2006-10-21 01:42:20 +08:00
|
|
|
MachineBasicBlock::iterator I = MBB.end();
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
unsigned Count = 0;
|
|
|
|
|
|
|
|
while (I != MBB.begin()) {
|
|
|
|
--I;
|
2010-04-02 09:38:09 +08:00
|
|
|
if (I->isDebugValue())
|
|
|
|
continue;
|
2010-02-12 03:25:55 +08:00
|
|
|
if (I->getOpcode() != X86::JMP_4 &&
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
|
|
|
|
break;
|
|
|
|
// Remove the branch.
|
|
|
|
I->eraseFromParent();
|
|
|
|
I = MBB.end();
|
|
|
|
++Count;
|
|
|
|
}
|
2006-10-21 01:42:20 +08:00
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
return Count;
|
2006-10-21 01:42:20 +08:00
|
|
|
}
|
|
|
|
|
2007-05-18 08:18:17 +08:00
|
|
|
unsigned
|
|
|
|
X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
|
|
|
MachineBasicBlock *FBB,
|
2010-06-18 06:43:56 +08:00
|
|
|
const SmallVectorImpl<MachineOperand> &Cond,
|
|
|
|
DebugLoc DL) const {
|
2006-10-21 01:42:20 +08:00
|
|
|
// Shouldn't be a fall through.
|
|
|
|
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
|
2006-10-21 13:34:23 +08:00
|
|
|
assert((Cond.size() == 1 || Cond.size() == 0) &&
|
|
|
|
"X86 branch conditions have one component!");
|
|
|
|
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
if (Cond.empty()) {
|
|
|
|
// Unconditional branch?
|
|
|
|
assert(!FBB && "Unconditional branch with multiple successors!");
|
2010-06-18 06:43:56 +08:00
|
|
|
BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
|
2007-05-18 08:18:17 +08:00
|
|
|
return 1;
|
2006-10-21 01:42:20 +08:00
|
|
|
}
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
|
|
|
|
// Conditional branch.
|
|
|
|
unsigned Count = 0;
|
|
|
|
X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
|
|
|
|
switch (CC) {
|
|
|
|
case X86::COND_NP_OR_E:
|
|
|
|
// Synthesize NP_OR_E with two branches.
|
2010-06-18 06:43:56 +08:00
|
|
|
BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
|
2010-03-05 08:33:59 +08:00
|
|
|
++Count;
|
2010-06-18 06:43:56 +08:00
|
|
|
BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
|
2010-03-05 08:33:59 +08:00
|
|
|
++Count;
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
break;
|
|
|
|
case X86::COND_NE_OR_P:
|
|
|
|
// Synthesize NE_OR_P with two branches.
|
2010-06-18 06:43:56 +08:00
|
|
|
BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
|
2010-03-05 08:33:59 +08:00
|
|
|
++Count;
|
2010-06-18 06:43:56 +08:00
|
|
|
BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
|
2010-03-05 08:33:59 +08:00
|
|
|
++Count;
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
break;
|
2010-03-05 08:33:59 +08:00
|
|
|
default: {
|
|
|
|
unsigned Opc = GetCondBranchFromCond(CC);
|
2010-06-18 06:43:56 +08:00
|
|
|
BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
|
2010-03-05 08:33:59 +08:00
|
|
|
++Count;
|
|
|
|
}
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
}
|
|
|
|
if (FBB) {
|
|
|
|
// Two-way Conditional branch. Insert the second branch.
|
2010-06-18 06:43:56 +08:00
|
|
|
BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
++Count;
|
|
|
|
}
|
|
|
|
return Count;
|
2006-10-21 01:42:20 +08:00
|
|
|
}
|
|
|
|
|
2009-04-15 08:04:23 +08:00
|
|
|
/// isHReg - Test if the given register is a physical h register.
|
|
|
|
static bool isHReg(unsigned Reg) {
|
2009-04-28 00:41:36 +08:00
|
|
|
return X86::GR8_ABCD_HRegClass.contains(Reg);
|
2009-04-15 08:04:23 +08:00
|
|
|
}
|
|
|
|
|
2010-08-27 22:43:06 +08:00
|
|
|
// Try and copy between VR128/VR64 and GR64 registers.
|
|
|
|
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
|
|
|
|
// SrcReg(VR128) -> DestReg(GR64)
|
|
|
|
// SrcReg(VR64) -> DestReg(GR64)
|
|
|
|
// SrcReg(GR64) -> DestReg(VR128)
|
|
|
|
// SrcReg(GR64) -> DestReg(VR64)
|
|
|
|
|
|
|
|
if (X86::GR64RegClass.contains(DestReg)) {
|
|
|
|
if (X86::VR128RegClass.contains(SrcReg)) {
|
|
|
|
// Copy from a VR128 register to a GR64 register.
|
|
|
|
return X86::MOVPQIto64rr;
|
|
|
|
} else if (X86::VR64RegClass.contains(SrcReg)) {
|
|
|
|
// Copy from a VR64 register to a GR64 register.
|
|
|
|
return X86::MOVSDto64rr;
|
|
|
|
}
|
|
|
|
} else if (X86::GR64RegClass.contains(SrcReg)) {
|
|
|
|
// Copy from a GR64 register to a VR128 register.
|
|
|
|
if (X86::VR128RegClass.contains(DestReg))
|
|
|
|
return X86::MOV64toPQIrr;
|
|
|
|
// Copy from a GR64 register to a VR64 register.
|
|
|
|
else if (X86::VR64RegClass.contains(DestReg))
|
|
|
|
return X86::MOV64toSDrr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-07-09 03:46:25 +08:00
|
|
|
void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MI, DebugLoc DL,
|
|
|
|
unsigned DestReg, unsigned SrcReg,
|
|
|
|
bool KillSrc) const {
|
|
|
|
// First deal with the normal symmetric copies.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
if (X86::GR64RegClass.contains(DestReg, SrcReg))
|
|
|
|
Opc = X86::MOV64rr;
|
|
|
|
else if (X86::GR32RegClass.contains(DestReg, SrcReg))
|
|
|
|
Opc = X86::MOV32rr;
|
|
|
|
else if (X86::GR16RegClass.contains(DestReg, SrcReg))
|
|
|
|
Opc = X86::MOV16rr;
|
|
|
|
else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
|
|
|
|
// Copying to or from a physical H register on x86-64 requires a NOREX
|
|
|
|
// move. Otherwise use a normal move.
|
|
|
|
if ((isHReg(DestReg) || isHReg(SrcReg)) &&
|
|
|
|
TM.getSubtarget<X86Subtarget>().is64Bit())
|
|
|
|
Opc = X86::MOV8rr_NOREX;
|
|
|
|
else
|
|
|
|
Opc = X86::MOV8rr;
|
|
|
|
} else if (X86::VR128RegClass.contains(DestReg, SrcReg))
|
|
|
|
Opc = X86::MOVAPSrr;
|
2010-07-09 06:30:35 +08:00
|
|
|
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
|
|
|
|
Opc = X86::MMX_MOVQ64rr;
|
2010-08-27 22:43:06 +08:00
|
|
|
else
|
|
|
|
Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
|
2010-07-09 03:46:25 +08:00
|
|
|
|
|
|
|
if (Opc) {
|
|
|
|
BuildMI(MBB, MI, DL, get(Opc), DestReg)
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Moving EFLAGS to / from another register requires a push and a pop.
|
|
|
|
if (SrcReg == X86::EFLAGS) {
|
|
|
|
if (X86::GR64RegClass.contains(DestReg)) {
|
|
|
|
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
|
|
|
|
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
|
|
|
|
return;
|
|
|
|
} else if (X86::GR32RegClass.contains(DestReg)) {
|
|
|
|
BuildMI(MBB, MI, DL, get(X86::PUSHF32));
|
|
|
|
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (DestReg == X86::EFLAGS) {
|
|
|
|
if (X86::GR64RegClass.contains(SrcReg)) {
|
|
|
|
BuildMI(MBB, MI, DL, get(X86::PUSH64r))
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
BuildMI(MBB, MI, DL, get(X86::POPF64));
|
|
|
|
return;
|
|
|
|
} else if (X86::GR32RegClass.contains(SrcReg)) {
|
|
|
|
BuildMI(MBB, MI, DL, get(X86::PUSH32r))
|
|
|
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
|
|
BuildMI(MBB, MI, DL, get(X86::POPF32));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
|
|
|
|
<< " to " << RI.getName(DestReg) << '\n');
|
|
|
|
llvm_unreachable("Cannot emit physreg copy instruction");
|
|
|
|
}
|
|
|
|
|
2010-06-13 04:13:29 +08:00
|
|
|
static unsigned getLoadStoreRegOpcode(unsigned Reg,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
bool isStackAligned,
|
|
|
|
const TargetMachine &TM,
|
|
|
|
bool load) {
|
2010-07-12 11:43:04 +08:00
|
|
|
switch (RC->getID()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown regclass");
|
|
|
|
case X86::GR64RegClassID:
|
|
|
|
case X86::GR64_NOSPRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV64rm : X86::MOV64mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR32RegClassID:
|
|
|
|
case X86::GR32_NOSPRegClassID:
|
|
|
|
case X86::GR32_ADRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV32rm : X86::MOV32mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR16RegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV16rm : X86::MOV16mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR8RegClassID:
|
2009-04-28 00:41:36 +08:00
|
|
|
// Copying to or from a physical H register on x86-64 requires a NOREX
|
|
|
|
// move. Otherwise use a normal move.
|
2010-06-13 04:13:29 +08:00
|
|
|
if (isHReg(Reg) &&
|
2009-04-28 00:41:36 +08:00
|
|
|
TM.getSubtarget<X86Subtarget>().is64Bit())
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
|
2009-04-28 00:41:36 +08:00
|
|
|
else
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV8rm : X86::MOV8mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR64_ABCDRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV64rm : X86::MOV64mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR32_ABCDRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV32rm : X86::MOV32mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR16_ABCDRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV16rm : X86::MOV16mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR8_ABCD_LRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV8rm :X86::MOV8mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR8_ABCD_HRegClassID:
|
2009-04-28 00:41:36 +08:00
|
|
|
if (TM.getSubtarget<X86Subtarget>().is64Bit())
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
|
2009-04-28 00:41:36 +08:00
|
|
|
else
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV8rm : X86::MOV8mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR64_NOREXRegClassID:
|
|
|
|
case X86::GR64_NOREX_NOSPRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV64rm : X86::MOV64mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR32_NOREXRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV32rm : X86::MOV32mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR16_NOREXRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV16rm : X86::MOV16mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR8_NOREXRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV8rm : X86::MOV8mr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR64_TCRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV64rm_TC : X86::MOV64mr_TC;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::GR32_TCRegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOV32rm_TC : X86::MOV32mr_TC;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::RFP80RegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::RFP64RegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::LD_Fp64m : X86::ST_Fp64m;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::RFP32RegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::LD_Fp32m : X86::ST_Fp32m;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::FR32RegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOVSSrm : X86::MOVSSmr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::FR64RegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MOVSDrm : X86::MOVSDmr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::VR128RegClassID:
|
2008-07-19 14:30:51 +08:00
|
|
|
// If stack is realigned we can use aligned stores.
|
2010-06-13 04:13:29 +08:00
|
|
|
if (isStackAligned)
|
|
|
|
return load ? X86::MOVAPSrm : X86::MOVAPSmr;
|
|
|
|
else
|
|
|
|
return load ? X86::MOVUPSrm : X86::MOVUPSmr;
|
2010-07-12 11:43:04 +08:00
|
|
|
case X86::VR64RegClassID:
|
2010-06-13 04:13:29 +08:00
|
|
|
return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
|
2008-01-02 05:11:32 +08:00
|
|
|
}
|
2010-06-13 04:13:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned getStoreRegOpcode(unsigned SrcReg,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
bool isStackAligned,
|
|
|
|
TargetMachine &TM) {
|
|
|
|
return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
|
|
|
|
}
|
|
|
|
|
2008-01-02 05:11:32 +08:00
|
|
|
|
2010-06-13 04:13:29 +08:00
|
|
|
static unsigned getLoadRegOpcode(unsigned DestReg,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
bool isStackAligned,
|
|
|
|
const TargetMachine &TM) {
|
|
|
|
return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
|
2008-01-02 05:11:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MI,
|
|
|
|
unsigned SrcReg, bool isKill, int FrameIdx,
|
2010-05-07 03:06:44 +08:00
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
const TargetRegisterInfo *TRI) const {
|
2008-07-19 14:30:51 +08:00
|
|
|
const MachineFunction &MF = *MBB.getParent();
|
2010-07-27 12:16:58 +08:00
|
|
|
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
|
|
|
|
"Stack slot too small for store");
|
2010-01-20 02:31:11 +08:00
|
|
|
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
|
2009-04-28 00:41:36 +08:00
|
|
|
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
|
2010-01-26 08:03:12 +08:00
|
|
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
2009-02-12 05:51:19 +08:00
|
|
|
addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
|
2009-05-14 05:33:08 +08:00
|
|
|
.addReg(SrcReg, getKillRegState(isKill));
|
2008-01-02 05:11:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
|
|
|
|
bool isKill,
|
|
|
|
SmallVectorImpl<MachineOperand> &Addr,
|
|
|
|
const TargetRegisterClass *RC,
|
2009-10-10 02:10:05 +08:00
|
|
|
MachineInstr::mmo_iterator MMOBegin,
|
|
|
|
MachineInstr::mmo_iterator MMOEnd,
|
2008-01-02 05:11:32 +08:00
|
|
|
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
2010-07-13 02:12:35 +08:00
|
|
|
bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
|
2009-04-28 00:41:36 +08:00
|
|
|
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
|
2010-04-03 04:16:16 +08:00
|
|
|
DebugLoc DL;
|
2009-02-13 07:08:38 +08:00
|
|
|
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
|
2008-01-02 05:11:32 +08:00
|
|
|
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(Addr[i]);
|
2009-05-14 05:33:08 +08:00
|
|
|
MIB.addReg(SrcReg, getKillRegState(isKill));
|
2009-10-10 02:10:05 +08:00
|
|
|
(*MIB).setMemRefs(MMOBegin, MMOEnd);
|
2008-01-02 05:11:32 +08:00
|
|
|
NewMIs.push_back(MIB);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
2008-07-19 14:30:51 +08:00
|
|
|
MachineBasicBlock::iterator MI,
|
|
|
|
unsigned DestReg, int FrameIdx,
|
2010-05-07 03:06:44 +08:00
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
const TargetRegisterInfo *TRI) const {
|
2008-07-19 14:30:51 +08:00
|
|
|
const MachineFunction &MF = *MBB.getParent();
|
2010-01-20 02:31:11 +08:00
|
|
|
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
|
2009-04-28 00:41:36 +08:00
|
|
|
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
|
2010-01-26 08:03:12 +08:00
|
|
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
2009-02-12 05:51:19 +08:00
|
|
|
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
|
2008-01-02 05:11:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
|
2008-07-03 17:09:37 +08:00
|
|
|
SmallVectorImpl<MachineOperand> &Addr,
|
|
|
|
const TargetRegisterClass *RC,
|
2009-10-10 02:10:05 +08:00
|
|
|
MachineInstr::mmo_iterator MMOBegin,
|
|
|
|
MachineInstr::mmo_iterator MMOEnd,
|
2008-01-02 05:11:32 +08:00
|
|
|
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
2010-07-13 02:12:35 +08:00
|
|
|
bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
|
2009-04-28 00:41:36 +08:00
|
|
|
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
|
2010-04-03 04:16:16 +08:00
|
|
|
DebugLoc DL;
|
2009-02-13 07:08:38 +08:00
|
|
|
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
|
2008-01-02 05:11:32 +08:00
|
|
|
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(Addr[i]);
|
2009-10-10 02:10:05 +08:00
|
|
|
(*MIB).setMemRefs(MMOBegin, MMOEnd);
|
2008-01-02 05:11:32 +08:00
|
|
|
NewMIs.push_back(MIB);
|
|
|
|
}
|
|
|
|
|
2008-01-05 07:57:37 +08:00
|
|
|
bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
2009-02-12 05:51:19 +08:00
|
|
|
MachineBasicBlock::iterator MI,
|
2010-05-22 09:47:14 +08:00
|
|
|
const std::vector<CalleeSavedInfo> &CSI,
|
|
|
|
const TargetRegisterInfo *TRI) const {
|
2008-01-05 07:57:37 +08:00
|
|
|
if (CSI.empty())
|
|
|
|
return false;
|
|
|
|
|
2010-01-21 05:36:02 +08:00
|
|
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
2009-02-12 05:51:19 +08:00
|
|
|
|
2008-09-27 03:14:21 +08:00
|
|
|
bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
|
2010-07-22 07:19:57 +08:00
|
|
|
bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
|
2008-10-04 19:09:36 +08:00
|
|
|
unsigned SlotSize = is64Bit ? 8 : 4;
|
|
|
|
|
|
|
|
MachineFunction &MF = *MBB.getParent();
|
2009-07-09 14:53:48 +08:00
|
|
|
unsigned FPReg = RI.getFrameRegister(MF);
|
2008-10-04 19:09:36 +08:00
|
|
|
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
2009-06-04 10:32:04 +08:00
|
|
|
unsigned CalleeFrameSize = 0;
|
2008-10-04 19:09:36 +08:00
|
|
|
|
2008-01-05 07:57:37 +08:00
|
|
|
unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
|
|
|
|
for (unsigned i = CSI.size(); i != 0; --i) {
|
|
|
|
unsigned Reg = CSI[i-1].getReg();
|
|
|
|
// Add the callee-saved register as live-in. It's killed at the spill.
|
|
|
|
MBB.addLiveIn(Reg);
|
2009-07-09 14:53:48 +08:00
|
|
|
if (Reg == FPReg)
|
|
|
|
// X86RegisterInfo::emitPrologue will handle spilling of frame register.
|
|
|
|
continue;
|
2010-07-22 07:19:57 +08:00
|
|
|
if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
|
2009-06-04 10:32:04 +08:00
|
|
|
CalleeFrameSize += SlotSize;
|
2009-07-09 14:53:48 +08:00
|
|
|
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
|
2009-06-04 10:32:04 +08:00
|
|
|
} else {
|
2010-07-22 07:19:57 +08:00
|
|
|
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
2010-06-03 04:02:30 +08:00
|
|
|
storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
|
2010-07-22 07:19:57 +08:00
|
|
|
RC, &RI);
|
2009-06-04 10:32:04 +08:00
|
|
|
}
|
2008-01-05 07:57:37 +08:00
|
|
|
}
|
2009-06-04 10:32:04 +08:00
|
|
|
|
|
|
|
X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
|
2008-01-05 07:57:37 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
2009-02-12 05:51:19 +08:00
|
|
|
MachineBasicBlock::iterator MI,
|
2010-05-22 09:47:14 +08:00
|
|
|
const std::vector<CalleeSavedInfo> &CSI,
|
|
|
|
const TargetRegisterInfo *TRI) const {
|
2008-01-05 07:57:37 +08:00
|
|
|
if (CSI.empty())
|
|
|
|
return false;
|
2009-02-12 05:51:19 +08:00
|
|
|
|
2010-01-21 05:36:02 +08:00
|
|
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
2009-02-12 05:51:19 +08:00
|
|
|
|
2009-07-09 14:53:48 +08:00
|
|
|
MachineFunction &MF = *MBB.getParent();
|
|
|
|
unsigned FPReg = RI.getFrameRegister(MF);
|
2008-01-05 07:57:37 +08:00
|
|
|
bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
|
2010-07-22 07:19:57 +08:00
|
|
|
bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
|
2008-01-05 07:57:37 +08:00
|
|
|
unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
|
|
|
|
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
|
|
|
|
unsigned Reg = CSI[i].getReg();
|
2009-07-09 14:53:48 +08:00
|
|
|
if (Reg == FPReg)
|
|
|
|
// X86RegisterInfo::emitEpilogue will handle restoring of frame register.
|
|
|
|
continue;
|
2010-07-22 07:19:57 +08:00
|
|
|
if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
|
2009-06-04 10:32:04 +08:00
|
|
|
BuildMI(MBB, MI, DL, get(Opc), Reg);
|
|
|
|
} else {
|
2010-07-22 07:19:57 +08:00
|
|
|
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
2010-06-03 04:02:30 +08:00
|
|
|
loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
|
2010-07-22 07:19:57 +08:00
|
|
|
RC, &RI);
|
2009-06-04 10:32:04 +08:00
|
|
|
}
|
2008-01-05 07:57:37 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2010-04-26 15:38:55 +08:00
|
|
|
MachineInstr*
|
|
|
|
X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
|
2010-04-29 09:13:30 +08:00
|
|
|
int FrameIx, uint64_t Offset,
|
2010-04-26 15:38:55 +08:00
|
|
|
const MDNode *MDPtr,
|
|
|
|
DebugLoc DL) const {
|
|
|
|
X86AddressMode AM;
|
|
|
|
AM.BaseType = X86AddressMode::FrameIndexBase;
|
|
|
|
AM.Base.FrameIndex = FrameIx;
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
|
|
|
|
addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
|
|
|
|
return &*MIB;
|
|
|
|
}
|
|
|
|
|
2008-07-08 07:14:23 +08:00
|
|
|
static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
|
2009-01-06 01:59:02 +08:00
|
|
|
const SmallVectorImpl<MachineOperand> &MOs,
|
2009-02-03 08:55:04 +08:00
|
|
|
MachineInstr *MI,
|
|
|
|
const TargetInstrInfo &TII) {
|
2008-01-07 09:35:02 +08:00
|
|
|
// Create the base instruction with the memory operand as the first part.
|
2009-02-03 08:55:04 +08:00
|
|
|
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
|
|
|
|
MI->getDebugLoc(), true);
|
2008-01-07 09:35:02 +08:00
|
|
|
MachineInstrBuilder MIB(NewMI);
|
|
|
|
unsigned NumAddrOps = MOs.size();
|
|
|
|
for (unsigned i = 0; i != NumAddrOps; ++i)
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(MOs[i]);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (NumAddrOps < 4) // FrameIndex only
|
2009-04-09 05:14:34 +08:00
|
|
|
addOffset(MIB, 0);
|
2008-01-07 09:35:02 +08:00
|
|
|
|
|
|
|
// Loop over the rest of the ri operands, converting them over.
|
2008-01-07 15:27:27 +08:00
|
|
|
unsigned NumOps = MI->getDesc().getNumOperands()-2;
|
2008-01-07 09:35:02 +08:00
|
|
|
for (unsigned i = 0; i != NumOps; ++i) {
|
|
|
|
MachineOperand &MO = MI->getOperand(i+2);
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(MO);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
|
|
|
|
MachineOperand &MO = MI->getOperand(i);
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(MO);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
return MIB;
|
|
|
|
}
|
|
|
|
|
2008-07-08 07:14:23 +08:00
|
|
|
static MachineInstr *FuseInst(MachineFunction &MF,
|
|
|
|
unsigned Opcode, unsigned OpNo,
|
2009-01-06 01:59:02 +08:00
|
|
|
const SmallVectorImpl<MachineOperand> &MOs,
|
2008-01-07 09:35:02 +08:00
|
|
|
MachineInstr *MI, const TargetInstrInfo &TII) {
|
2009-02-03 08:55:04 +08:00
|
|
|
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
|
|
|
|
MI->getDebugLoc(), true);
|
2008-01-07 09:35:02 +08:00
|
|
|
MachineInstrBuilder MIB(NewMI);
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
|
|
|
MachineOperand &MO = MI->getOperand(i);
|
|
|
|
if (i == OpNo) {
|
2008-10-03 23:45:36 +08:00
|
|
|
assert(MO.isReg() && "Expected to fold into reg operand!");
|
2008-01-07 09:35:02 +08:00
|
|
|
unsigned NumAddrOps = MOs.size();
|
|
|
|
for (unsigned i = 0; i != NumAddrOps; ++i)
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(MOs[i]);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (NumAddrOps < 4) // FrameIndex only
|
2009-04-09 05:14:34 +08:00
|
|
|
addOffset(MIB, 0);
|
2008-01-07 09:35:02 +08:00
|
|
|
} else {
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(MO);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return MIB;
|
|
|
|
}
|
|
|
|
|
|
|
|
static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
|
2009-01-06 01:59:02 +08:00
|
|
|
const SmallVectorImpl<MachineOperand> &MOs,
|
2008-01-07 09:35:02 +08:00
|
|
|
MachineInstr *MI) {
|
2008-07-08 07:14:23 +08:00
|
|
|
MachineFunction &MF = *MI->getParent()->getParent();
|
2009-02-12 05:51:19 +08:00
|
|
|
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
|
2008-01-07 09:35:02 +08:00
|
|
|
|
|
|
|
unsigned NumAddrOps = MOs.size();
|
|
|
|
for (unsigned i = 0; i != NumAddrOps; ++i)
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(MOs[i]);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (NumAddrOps < 4) // FrameIndex only
|
2009-04-09 05:14:34 +08:00
|
|
|
addOffset(MIB, 0);
|
2008-01-07 09:35:02 +08:00
|
|
|
return MIB.addImm(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr*
|
2008-12-04 02:43:12 +08:00
|
|
|
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
|
|
|
MachineInstr *MI, unsigned i,
|
2009-07-15 14:10:07 +08:00
|
|
|
const SmallVectorImpl<MachineOperand> &MOs,
|
2009-09-11 08:39:26 +08:00
|
|
|
unsigned Size, unsigned Align) const {
|
2009-07-15 14:10:07 +08:00
|
|
|
const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
|
2008-01-07 09:35:02 +08:00
|
|
|
bool isTwoAddrFold = false;
|
2008-01-07 15:27:27 +08:00
|
|
|
unsigned NumOps = MI->getDesc().getNumOperands();
|
2008-01-07 09:35:02 +08:00
|
|
|
bool isTwoAddr = NumOps > 1 &&
|
2008-01-07 15:27:27 +08:00
|
|
|
MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
|
2008-01-07 09:35:02 +08:00
|
|
|
|
|
|
|
MachineInstr *NewMI = NULL;
|
|
|
|
// Folding a memory location into the two-address part of a two-address
|
|
|
|
// instruction is different than folding it other places. It requires
|
|
|
|
// replacing the *two* registers with the memory location.
|
|
|
|
if (isTwoAddr && NumOps >= 2 && i < 2 &&
|
2008-10-03 23:45:36 +08:00
|
|
|
MI->getOperand(0).isReg() &&
|
|
|
|
MI->getOperand(1).isReg() &&
|
2008-01-07 09:35:02 +08:00
|
|
|
MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
|
|
|
|
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
|
|
|
|
isTwoAddrFold = true;
|
|
|
|
} else if (i == 0) { // If operand 0
|
2010-01-12 12:42:54 +08:00
|
|
|
if (MI->getOpcode() == X86::MOV64r0)
|
|
|
|
NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
|
|
|
|
else if (MI->getOpcode() == X86::MOV32r0)
|
2008-01-07 09:35:02 +08:00
|
|
|
NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
|
2010-01-12 12:42:54 +08:00
|
|
|
else if (MI->getOpcode() == X86::MOV16r0)
|
|
|
|
NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
|
2008-01-07 09:35:02 +08:00
|
|
|
else if (MI->getOpcode() == X86::MOV8r0)
|
|
|
|
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
|
2008-07-03 17:09:37 +08:00
|
|
|
if (NewMI)
|
2008-01-07 09:35:02 +08:00
|
|
|
return NewMI;
|
|
|
|
|
|
|
|
OpcodeTablePtr = &RegOp2MemOpTable0;
|
|
|
|
} else if (i == 1) {
|
|
|
|
OpcodeTablePtr = &RegOp2MemOpTable1;
|
|
|
|
} else if (i == 2) {
|
|
|
|
OpcodeTablePtr = &RegOp2MemOpTable2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If table selected...
|
|
|
|
if (OpcodeTablePtr) {
|
|
|
|
// Find the Opcode to fuse
|
2009-11-10 09:02:17 +08:00
|
|
|
DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
|
2008-01-07 09:35:02 +08:00
|
|
|
OpcodeTablePtr->find((unsigned*)MI->getOpcode());
|
|
|
|
if (I != OpcodeTablePtr->end()) {
|
2009-09-11 08:39:26 +08:00
|
|
|
unsigned Opcode = I->second.first;
|
2009-07-15 14:10:07 +08:00
|
|
|
unsigned MinAlign = I->second.second;
|
|
|
|
if (Align < MinAlign)
|
|
|
|
return NULL;
|
2009-09-11 09:01:31 +08:00
|
|
|
bool NarrowToMOV32rm = false;
|
2009-09-11 08:39:26 +08:00
|
|
|
if (Size) {
|
|
|
|
unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
|
|
|
|
if (Size < RCSize) {
|
|
|
|
// Check if it's safe to fold the load. If the size of the object is
|
|
|
|
// narrower than the load width, then it's not.
|
|
|
|
if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
|
|
|
|
return NULL;
|
|
|
|
// If this is a 64-bit load, but the spill slot is 32, then we can do
|
|
|
|
// a 32-bit load which is implicitly zero-extended. This likely is due
|
|
|
|
// to liveintervalanalysis remat'ing a load from stack slot.
|
2009-09-11 09:01:31 +08:00
|
|
|
if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
|
|
|
|
return NULL;
|
2009-09-11 08:39:26 +08:00
|
|
|
Opcode = X86::MOV32rm;
|
2009-09-11 09:01:31 +08:00
|
|
|
NarrowToMOV32rm = true;
|
2009-09-11 08:39:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-01-07 09:35:02 +08:00
|
|
|
if (isTwoAddrFold)
|
2009-09-11 08:39:26 +08:00
|
|
|
NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
|
2008-01-07 09:35:02 +08:00
|
|
|
else
|
2009-09-11 08:39:26 +08:00
|
|
|
NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
|
2009-09-11 09:01:31 +08:00
|
|
|
|
|
|
|
if (NarrowToMOV32rm) {
|
|
|
|
// If this is the special case where we use a MOV32rm to load a 32-bit
|
|
|
|
// value and zero-extend the top bits. Change the destination register
|
|
|
|
// to a 32-bit one.
|
|
|
|
unsigned DstReg = NewMI->getOperand(0).getReg();
|
|
|
|
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
|
|
|
|
NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
|
2010-05-24 22:48:17 +08:00
|
|
|
X86::sub_32bit));
|
2009-09-11 09:01:31 +08:00
|
|
|
else
|
2010-05-24 22:48:17 +08:00
|
|
|
NewMI->getOperand(0).setSubReg(X86::sub_32bit);
|
2009-09-11 09:01:31 +08:00
|
|
|
}
|
2008-01-07 09:35:02 +08:00
|
|
|
return NewMI;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// No fusion
|
2010-07-10 04:43:09 +08:00
|
|
|
if (PrintFailedFusing && !MI->isCopy())
|
2010-01-05 09:29:29 +08:00
|
|
|
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
|
2008-01-07 09:35:02 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-12-04 02:43:12 +08:00
|
|
|
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
|
|
|
MachineInstr *MI,
|
2009-07-15 14:10:07 +08:00
|
|
|
const SmallVectorImpl<unsigned> &Ops,
|
2008-12-04 02:43:12 +08:00
|
|
|
int FrameIndex) const {
|
2008-01-07 09:35:02 +08:00
|
|
|
// Check switch flag
|
|
|
|
if (NoFusing) return NULL;
|
|
|
|
|
2009-12-23 01:47:23 +08:00
|
|
|
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
|
2009-12-18 15:40:29 +08:00
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
case X86::CVTSD2SSrr:
|
|
|
|
case X86::Int_CVTSD2SSrr:
|
|
|
|
case X86::CVTSS2SDrr:
|
|
|
|
case X86::Int_CVTSS2SDrr:
|
|
|
|
case X86::RCPSSr:
|
|
|
|
case X86::RCPSSr_Int:
|
|
|
|
case X86::ROUNDSDr_Int:
|
|
|
|
case X86::ROUNDSSr_Int:
|
|
|
|
case X86::RSQRTSSr:
|
|
|
|
case X86::RSQRTSSr_Int:
|
|
|
|
case X86::SQRTSSr:
|
|
|
|
case X86::SQRTSSr_Int:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-02-09 05:20:40 +08:00
|
|
|
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
2009-09-11 08:39:26 +08:00
|
|
|
unsigned Size = MFI->getObjectSize(FrameIndex);
|
2008-02-09 05:20:40 +08:00
|
|
|
unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
|
|
|
|
unsigned NewOpc = 0;
|
2009-09-11 08:39:26 +08:00
|
|
|
unsigned RCSize = 0;
|
2008-01-07 09:35:02 +08:00
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
default: return NULL;
|
2009-09-11 08:39:26 +08:00
|
|
|
case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
|
2010-05-19 05:42:03 +08:00
|
|
|
case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
|
|
|
|
case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
|
|
|
|
case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
2009-09-11 08:39:26 +08:00
|
|
|
// Check if it's safe to fold the load. If the size of the object is
|
|
|
|
// narrower than the load width, then it's not.
|
|
|
|
if (Size < RCSize)
|
|
|
|
return NULL;
|
2008-01-07 09:35:02 +08:00
|
|
|
// Change to CMPXXri r, 0 first.
|
2008-01-12 02:10:50 +08:00
|
|
|
MI->setDesc(get(NewOpc));
|
2008-01-07 09:35:02 +08:00
|
|
|
MI->getOperand(1).ChangeToImmediate(0);
|
|
|
|
} else if (Ops.size() != 1)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
SmallVector<MachineOperand,4> MOs;
|
|
|
|
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
|
2009-09-11 08:39:26 +08:00
|
|
|
return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
|
2008-12-04 02:43:12 +08:00
|
|
|
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
|
|
|
MachineInstr *MI,
|
2009-07-15 14:10:07 +08:00
|
|
|
const SmallVectorImpl<unsigned> &Ops,
|
2008-12-04 02:43:12 +08:00
|
|
|
MachineInstr *LoadMI) const {
|
2008-01-07 09:35:02 +08:00
|
|
|
// Check switch flag
|
|
|
|
if (NoFusing) return NULL;
|
|
|
|
|
2009-12-23 01:47:23 +08:00
|
|
|
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
|
2009-12-18 15:40:29 +08:00
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
case X86::CVTSD2SSrr:
|
|
|
|
case X86::Int_CVTSD2SSrr:
|
|
|
|
case X86::CVTSS2SDrr:
|
|
|
|
case X86::Int_CVTSS2SDrr:
|
|
|
|
case X86::RCPSSr:
|
|
|
|
case X86::RCPSSr_Int:
|
|
|
|
case X86::ROUNDSDr_Int:
|
|
|
|
case X86::ROUNDSSr_Int:
|
|
|
|
case X86::RSQRTSSr:
|
|
|
|
case X86::RSQRTSSr_Int:
|
|
|
|
case X86::SQRTSSr:
|
|
|
|
case X86::SQRTSSr_Int:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-07-12 08:10:52 +08:00
|
|
|
// Determine the alignment of the load.
|
2008-02-09 05:20:40 +08:00
|
|
|
unsigned Alignment = 0;
|
2008-07-12 08:10:52 +08:00
|
|
|
if (LoadMI->hasOneMemOperand())
|
2009-09-26 04:36:54 +08:00
|
|
|
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
|
2009-09-22 02:30:38 +08:00
|
|
|
else
|
|
|
|
switch (LoadMI->getOpcode()) {
|
2010-08-13 04:20:53 +08:00
|
|
|
case X86::AVX_SET0PSY:
|
|
|
|
case X86::AVX_SET0PDY:
|
|
|
|
Alignment = 32;
|
|
|
|
break;
|
2010-03-31 08:40:13 +08:00
|
|
|
case X86::V_SET0PS:
|
|
|
|
case X86::V_SET0PD:
|
|
|
|
case X86::V_SET0PI:
|
2009-09-22 02:30:38 +08:00
|
|
|
case X86::V_SETALLONES:
|
2010-08-13 04:20:53 +08:00
|
|
|
case X86::AVX_SET0PS:
|
|
|
|
case X86::AVX_SET0PD:
|
|
|
|
case X86::AVX_SET0PI:
|
2009-09-22 02:30:38 +08:00
|
|
|
Alignment = 16;
|
|
|
|
break;
|
|
|
|
case X86::FsFLD0SD:
|
|
|
|
Alignment = 8;
|
|
|
|
break;
|
|
|
|
case X86::FsFLD0SS:
|
|
|
|
Alignment = 4;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Don't know how to fold this instruction!");
|
|
|
|
}
|
2008-01-07 09:35:02 +08:00
|
|
|
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
|
|
|
|
unsigned NewOpc = 0;
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
default: return NULL;
|
|
|
|
case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
|
2010-05-19 05:54:15 +08:00
|
|
|
case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
|
|
|
|
case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
|
|
|
|
case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
// Change to CMPXXri r, 0 first.
|
2008-01-12 02:10:50 +08:00
|
|
|
MI->setDesc(get(NewOpc));
|
2008-01-07 09:35:02 +08:00
|
|
|
MI->getOperand(1).ChangeToImmediate(0);
|
|
|
|
} else if (Ops.size() != 1)
|
|
|
|
return NULL;
|
|
|
|
|
2010-08-12 07:08:22 +08:00
|
|
|
// Make sure the subregisters match.
|
|
|
|
// Otherwise we risk changing the size of the load.
|
|
|
|
if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
|
|
|
|
return NULL;
|
|
|
|
|
2010-07-09 06:41:28 +08:00
|
|
|
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
|
2009-09-22 02:30:38 +08:00
|
|
|
switch (LoadMI->getOpcode()) {
|
2010-03-31 08:40:13 +08:00
|
|
|
case X86::V_SET0PS:
|
|
|
|
case X86::V_SET0PD:
|
|
|
|
case X86::V_SET0PI:
|
2009-09-22 02:30:38 +08:00
|
|
|
case X86::V_SETALLONES:
|
2010-08-13 04:20:53 +08:00
|
|
|
case X86::AVX_SET0PS:
|
|
|
|
case X86::AVX_SET0PD:
|
|
|
|
case X86::AVX_SET0PI:
|
|
|
|
case X86::AVX_SET0PSY:
|
|
|
|
case X86::AVX_SET0PDY:
|
2009-09-22 02:30:38 +08:00
|
|
|
case X86::FsFLD0SD:
|
|
|
|
case X86::FsFLD0SS: {
|
2010-03-31 08:40:13 +08:00
|
|
|
// Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
|
2008-12-03 13:21:24 +08:00
|
|
|
// Create a constant-pool entry and operands to load from it.
|
|
|
|
|
2010-03-09 11:01:40 +08:00
|
|
|
// Medium and large mode can't fold loads this way.
|
|
|
|
if (TM.getCodeModel() != CodeModel::Small &&
|
|
|
|
TM.getCodeModel() != CodeModel::Kernel)
|
|
|
|
return NULL;
|
|
|
|
|
2008-12-03 13:21:24 +08:00
|
|
|
// x86-32 PIC requires a PIC base register for constant pools.
|
|
|
|
unsigned PICBase = 0;
|
2009-07-17 05:24:13 +08:00
|
|
|
if (TM.getRelocationModel() == Reloc::PIC_) {
|
2009-07-17 02:44:05 +08:00
|
|
|
if (TM.getSubtarget<X86Subtarget>().is64Bit())
|
|
|
|
PICBase = X86::RIP;
|
2009-07-17 05:24:13 +08:00
|
|
|
else
|
2010-07-10 17:00:22 +08:00
|
|
|
// FIXME: PICBase = getGlobalBaseReg(&MF);
|
2009-07-17 02:44:05 +08:00
|
|
|
// This doesn't work for several reasons.
|
|
|
|
// 1. GlobalBaseReg may have been spilled.
|
|
|
|
// 2. It may not be live at MI.
|
2009-09-22 02:30:38 +08:00
|
|
|
return NULL;
|
2009-07-17 05:24:13 +08:00
|
|
|
}
|
2008-12-03 13:21:24 +08:00
|
|
|
|
2009-09-22 02:30:38 +08:00
|
|
|
// Create a constant-pool entry.
|
2008-12-03 13:21:24 +08:00
|
|
|
MachineConstantPool &MCP = *MF.getConstantPool();
|
2009-09-22 02:30:38 +08:00
|
|
|
const Type *Ty;
|
2010-08-13 04:20:53 +08:00
|
|
|
unsigned Opc = LoadMI->getOpcode();
|
|
|
|
if (Opc == X86::FsFLD0SS)
|
2009-09-22 02:30:38 +08:00
|
|
|
Ty = Type::getFloatTy(MF.getFunction()->getContext());
|
2010-08-13 04:20:53 +08:00
|
|
|
else if (Opc == X86::FsFLD0SD)
|
2009-09-22 02:30:38 +08:00
|
|
|
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
|
2010-08-13 04:20:53 +08:00
|
|
|
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
|
|
|
|
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
|
2009-09-22 02:30:38 +08:00
|
|
|
else
|
|
|
|
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
|
2010-04-15 09:51:59 +08:00
|
|
|
const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
|
2009-09-22 02:30:38 +08:00
|
|
|
Constant::getAllOnesValue(Ty) :
|
|
|
|
Constant::getNullValue(Ty);
|
|
|
|
unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
|
2008-12-03 13:21:24 +08:00
|
|
|
|
|
|
|
// Create operands to load from the constant pool entry.
|
|
|
|
MOs.push_back(MachineOperand::CreateReg(PICBase, false));
|
|
|
|
MOs.push_back(MachineOperand::CreateImm(1));
|
|
|
|
MOs.push_back(MachineOperand::CreateReg(0, false));
|
|
|
|
MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
|
2009-04-09 05:14:34 +08:00
|
|
|
MOs.push_back(MachineOperand::CreateReg(0, false));
|
2009-09-22 02:30:38 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default: {
|
2008-12-03 13:21:24 +08:00
|
|
|
// Folding a normal load. Just copy the load's address operands.
|
|
|
|
unsigned NumOps = LoadMI->getDesc().getNumOperands();
|
2010-07-09 06:41:28 +08:00
|
|
|
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
|
2008-12-03 13:21:24 +08:00
|
|
|
MOs.push_back(LoadMI->getOperand(i));
|
2009-09-22 02:30:38 +08:00
|
|
|
break;
|
|
|
|
}
|
2008-12-03 13:21:24 +08:00
|
|
|
}
|
2009-09-11 08:39:26 +08:00
|
|
|
return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-10-16 09:49:15 +08:00
|
|
|
bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
|
|
|
|
const SmallVectorImpl<unsigned> &Ops) const {
|
2008-01-07 09:35:02 +08:00
|
|
|
// Check switch flag
|
|
|
|
if (NoFusing) return 0;
|
|
|
|
|
|
|
|
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
default: return false;
|
|
|
|
case X86::TEST8rr:
|
|
|
|
case X86::TEST16rr:
|
|
|
|
case X86::TEST32rr:
|
|
|
|
case X86::TEST64rr:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Ops.size() != 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned OpNum = Ops[0];
|
|
|
|
unsigned Opc = MI->getOpcode();
|
2008-01-07 15:27:27 +08:00
|
|
|
unsigned NumOps = MI->getDesc().getNumOperands();
|
2008-01-07 09:35:02 +08:00
|
|
|
bool isTwoAddr = NumOps > 1 &&
|
2008-01-07 15:27:27 +08:00
|
|
|
MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
|
2008-01-07 09:35:02 +08:00
|
|
|
|
|
|
|
// Folding a memory location into the two-address part of a two-address
|
|
|
|
// instruction is different than folding it other places. It requires
|
|
|
|
// replacing the *two* registers with the memory location.
|
2009-07-15 14:10:07 +08:00
|
|
|
const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
|
2008-01-07 09:35:02 +08:00
|
|
|
if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
|
|
|
|
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
|
|
|
|
} else if (OpNum == 0) { // If operand 0
|
|
|
|
switch (Opc) {
|
2009-07-15 04:19:57 +08:00
|
|
|
case X86::MOV8r0:
|
2010-01-12 12:42:54 +08:00
|
|
|
case X86::MOV16r0:
|
2008-01-07 09:35:02 +08:00
|
|
|
case X86::MOV32r0:
|
2010-01-12 12:42:54 +08:00
|
|
|
case X86::MOV64r0:
|
2008-01-07 09:35:02 +08:00
|
|
|
return true;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
OpcodeTablePtr = &RegOp2MemOpTable0;
|
|
|
|
} else if (OpNum == 1) {
|
|
|
|
OpcodeTablePtr = &RegOp2MemOpTable1;
|
|
|
|
} else if (OpNum == 2) {
|
|
|
|
OpcodeTablePtr = &RegOp2MemOpTable2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (OpcodeTablePtr) {
|
|
|
|
// Find the Opcode to fuse
|
2009-11-10 09:02:17 +08:00
|
|
|
DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
|
2008-01-07 09:35:02 +08:00
|
|
|
OpcodeTablePtr->find((unsigned*)Opc);
|
|
|
|
if (I != OpcodeTablePtr->end())
|
|
|
|
return true;
|
|
|
|
}
|
2010-07-10 04:43:13 +08:00
|
|
|
return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
|
|
|
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
|
2009-02-12 05:51:19 +08:00
|
|
|
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
2009-11-10 09:02:17 +08:00
|
|
|
DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
|
2008-01-07 09:35:02 +08:00
|
|
|
MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
|
|
|
|
if (I == MemOp2RegOpTable.end())
|
|
|
|
return false;
|
|
|
|
unsigned Opc = I->second.first;
|
|
|
|
unsigned Index = I->second.second & 0xf;
|
|
|
|
bool FoldedLoad = I->second.second & (1 << 4);
|
|
|
|
bool FoldedStore = I->second.second & (1 << 5);
|
|
|
|
if (UnfoldLoad && !FoldedLoad)
|
|
|
|
return false;
|
|
|
|
UnfoldLoad &= FoldedLoad;
|
|
|
|
if (UnfoldStore && !FoldedStore)
|
|
|
|
return false;
|
|
|
|
UnfoldStore &= FoldedStore;
|
|
|
|
|
2008-01-07 15:27:27 +08:00
|
|
|
const TargetInstrDesc &TID = get(Opc);
|
2008-01-07 09:35:02 +08:00
|
|
|
const TargetOperandInfo &TOI = TID.OpInfo[Index];
|
2009-07-30 05:10:12 +08:00
|
|
|
const TargetRegisterClass *RC = TOI.getRegClass(&RI);
|
2010-07-03 04:36:18 +08:00
|
|
|
if (!MI->hasOneMemOperand() &&
|
|
|
|
RC == &X86::VR128RegClass &&
|
|
|
|
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
|
|
|
|
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
|
|
|
|
// conservatively assume the address is unaligned. That's bad for
|
|
|
|
// performance.
|
|
|
|
return false;
|
2010-07-09 06:41:28 +08:00
|
|
|
SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
|
2008-01-07 09:35:02 +08:00
|
|
|
SmallVector<MachineOperand,2> BeforeOps;
|
|
|
|
SmallVector<MachineOperand,2> AfterOps;
|
|
|
|
SmallVector<MachineOperand,4> ImpOps;
|
|
|
|
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
|
|
|
MachineOperand &Op = MI->getOperand(i);
|
2010-07-09 06:41:28 +08:00
|
|
|
if (i >= Index && i < Index + X86::AddrNumOperands)
|
2008-01-07 09:35:02 +08:00
|
|
|
AddrOps.push_back(Op);
|
2008-10-03 23:45:36 +08:00
|
|
|
else if (Op.isReg() && Op.isImplicit())
|
2008-01-07 09:35:02 +08:00
|
|
|
ImpOps.push_back(Op);
|
|
|
|
else if (i < Index)
|
|
|
|
BeforeOps.push_back(Op);
|
|
|
|
else if (i > Index)
|
|
|
|
AfterOps.push_back(Op);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit the load instruction.
|
|
|
|
if (UnfoldLoad) {
|
2009-10-10 02:10:05 +08:00
|
|
|
std::pair<MachineInstr::mmo_iterator,
|
|
|
|
MachineInstr::mmo_iterator> MMOs =
|
|
|
|
MF.extractLoadMemRefs(MI->memoperands_begin(),
|
|
|
|
MI->memoperands_end());
|
|
|
|
loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (UnfoldStore) {
|
|
|
|
// Address operands cannot be marked isKill.
|
2010-07-09 06:41:28 +08:00
|
|
|
for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
|
2008-01-07 09:35:02 +08:00
|
|
|
MachineOperand &MO = NewMIs[0]->getOperand(i);
|
2008-10-03 23:45:36 +08:00
|
|
|
if (MO.isReg())
|
2008-01-07 09:35:02 +08:00
|
|
|
MO.setIsKill(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit the data processing instruction.
|
2009-02-03 08:55:04 +08:00
|
|
|
MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
|
2008-01-07 09:35:02 +08:00
|
|
|
MachineInstrBuilder MIB(DataMI);
|
|
|
|
|
|
|
|
if (FoldedStore)
|
2009-05-14 05:33:08 +08:00
|
|
|
MIB.addReg(Reg, RegState::Define);
|
2008-01-07 09:35:02 +08:00
|
|
|
for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(BeforeOps[i]);
|
2008-01-07 09:35:02 +08:00
|
|
|
if (FoldedLoad)
|
|
|
|
MIB.addReg(Reg);
|
|
|
|
for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
|
2009-02-18 13:45:50 +08:00
|
|
|
MIB.addOperand(AfterOps[i]);
|
2008-01-07 09:35:02 +08:00
|
|
|
for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
|
|
|
|
MachineOperand &MO = ImpOps[i];
|
2009-05-14 05:33:08 +08:00
|
|
|
MIB.addReg(MO.getReg(),
|
|
|
|
getDefRegState(MO.isDef()) |
|
|
|
|
RegState::Implicit |
|
|
|
|
getKillRegState(MO.isKill()) |
|
2009-06-30 16:49:04 +08:00
|
|
|
getDeadRegState(MO.isDead()) |
|
|
|
|
getUndefRegState(MO.isUndef()));
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
|
|
|
|
unsigned NewOpc = 0;
|
|
|
|
switch (DataMI->getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case X86::CMP64ri32:
|
2010-05-19 05:54:15 +08:00
|
|
|
case X86::CMP64ri8:
|
2008-01-07 09:35:02 +08:00
|
|
|
case X86::CMP32ri:
|
2010-05-19 05:54:15 +08:00
|
|
|
case X86::CMP32ri8:
|
2008-01-07 09:35:02 +08:00
|
|
|
case X86::CMP16ri:
|
2010-05-19 05:54:15 +08:00
|
|
|
case X86::CMP16ri8:
|
2008-01-07 09:35:02 +08:00
|
|
|
case X86::CMP8ri: {
|
|
|
|
MachineOperand &MO0 = DataMI->getOperand(0);
|
|
|
|
MachineOperand &MO1 = DataMI->getOperand(1);
|
|
|
|
if (MO1.getImm() == 0) {
|
|
|
|
switch (DataMI->getOpcode()) {
|
|
|
|
default: break;
|
2010-05-19 05:54:15 +08:00
|
|
|
case X86::CMP64ri8:
|
2008-01-07 09:35:02 +08:00
|
|
|
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
|
2010-05-19 05:54:15 +08:00
|
|
|
case X86::CMP32ri8:
|
2008-01-07 09:35:02 +08:00
|
|
|
case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
|
2010-05-19 05:54:15 +08:00
|
|
|
case X86::CMP16ri8:
|
2008-01-07 09:35:02 +08:00
|
|
|
case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
|
|
|
|
case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
|
|
|
|
}
|
2008-01-12 02:10:50 +08:00
|
|
|
DataMI->setDesc(get(NewOpc));
|
2008-01-07 09:35:02 +08:00
|
|
|
MO1.ChangeToRegister(MO0.getReg(), false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
NewMIs.push_back(DataMI);
|
|
|
|
|
|
|
|
// Emit the store instruction.
|
|
|
|
if (UnfoldStore) {
|
2009-07-30 05:10:12 +08:00
|
|
|
const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
|
2009-10-10 02:10:05 +08:00
|
|
|
std::pair<MachineInstr::mmo_iterator,
|
|
|
|
MachineInstr::mmo_iterator> MMOs =
|
|
|
|
MF.extractStoreMemRefs(MI->memoperands_begin(),
|
|
|
|
MI->memoperands_end());
|
|
|
|
storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
2009-02-12 05:51:19 +08:00
|
|
|
SmallVectorImpl<SDNode*> &NewNodes) const {
|
2008-07-18 03:10:17 +08:00
|
|
|
if (!N->isMachineOpcode())
|
2008-01-07 09:35:02 +08:00
|
|
|
return false;
|
|
|
|
|
2009-11-10 09:02:17 +08:00
|
|
|
DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
|
2008-07-18 03:10:17 +08:00
|
|
|
MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
|
2008-01-07 09:35:02 +08:00
|
|
|
if (I == MemOp2RegOpTable.end())
|
|
|
|
return false;
|
|
|
|
unsigned Opc = I->second.first;
|
|
|
|
unsigned Index = I->second.second & 0xf;
|
|
|
|
bool FoldedLoad = I->second.second & (1 << 4);
|
|
|
|
bool FoldedStore = I->second.second & (1 << 5);
|
2008-01-07 15:27:27 +08:00
|
|
|
const TargetInstrDesc &TID = get(Opc);
|
2009-07-30 05:10:12 +08:00
|
|
|
const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI);
|
2009-03-05 03:23:38 +08:00
|
|
|
unsigned NumDefs = TID.NumDefs;
|
2008-07-28 05:46:04 +08:00
|
|
|
std::vector<SDValue> AddrOps;
|
|
|
|
std::vector<SDValue> BeforeOps;
|
|
|
|
std::vector<SDValue> AfterOps;
|
2009-02-06 09:31:28 +08:00
|
|
|
DebugLoc dl = N->getDebugLoc();
|
2008-01-07 09:35:02 +08:00
|
|
|
unsigned NumOps = N->getNumOperands();
|
2009-09-26 04:36:54 +08:00
|
|
|
for (unsigned i = 0; i != NumOps-1; ++i) {
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Op = N->getOperand(i);
|
2010-07-09 06:41:28 +08:00
|
|
|
if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
|
2008-01-07 09:35:02 +08:00
|
|
|
AddrOps.push_back(Op);
|
2009-03-05 03:23:38 +08:00
|
|
|
else if (i < Index-NumDefs)
|
2008-01-07 09:35:02 +08:00
|
|
|
BeforeOps.push_back(Op);
|
2009-03-05 03:23:38 +08:00
|
|
|
else if (i > Index-NumDefs)
|
2008-01-07 09:35:02 +08:00
|
|
|
AfterOps.push_back(Op);
|
|
|
|
}
|
2008-07-28 05:46:04 +08:00
|
|
|
SDValue Chain = N->getOperand(NumOps-1);
|
2008-01-07 09:35:02 +08:00
|
|
|
AddrOps.push_back(Chain);
|
|
|
|
|
|
|
|
// Emit the load instruction.
|
|
|
|
SDNode *Load = 0;
|
2009-10-10 02:10:05 +08:00
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
2008-01-07 09:35:02 +08:00
|
|
|
if (FoldedLoad) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = *RC->vt_begin();
|
2009-11-17 05:56:03 +08:00
|
|
|
std::pair<MachineInstr::mmo_iterator,
|
|
|
|
MachineInstr::mmo_iterator> MMOs =
|
|
|
|
MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
|
|
|
|
cast<MachineSDNode>(N)->memoperands_end());
|
2010-07-03 04:36:18 +08:00
|
|
|
if (!(*MMOs.first) &&
|
|
|
|
RC == &X86::VR128RegClass &&
|
|
|
|
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
|
|
|
|
// Do not introduce a slow unaligned load.
|
|
|
|
return false;
|
|
|
|
bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
|
2009-09-26 02:54:59 +08:00
|
|
|
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
|
|
|
|
VT, MVT::Other, &AddrOps[0], AddrOps.size());
|
2008-01-07 09:35:02 +08:00
|
|
|
NewNodes.push_back(Load);
|
2009-10-10 02:10:05 +08:00
|
|
|
|
|
|
|
// Preserve memory reference information.
|
|
|
|
cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Emit the data processing instruction.
|
2009-08-11 06:56:29 +08:00
|
|
|
std::vector<EVT> VTs;
|
2008-01-07 09:35:02 +08:00
|
|
|
const TargetRegisterClass *DstRC = 0;
|
2008-01-07 11:13:06 +08:00
|
|
|
if (TID.getNumDefs() > 0) {
|
2009-07-30 05:10:12 +08:00
|
|
|
DstRC = TID.OpInfo[0].getRegClass(&RI);
|
2008-01-07 09:35:02 +08:00
|
|
|
VTs.push_back(*DstRC->vt_begin());
|
|
|
|
}
|
|
|
|
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
|
2009-08-11 06:56:29 +08:00
|
|
|
EVT VT = N->getValueType(i);
|
2009-08-12 04:47:22 +08:00
|
|
|
if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
|
2008-01-07 09:35:02 +08:00
|
|
|
VTs.push_back(VT);
|
|
|
|
}
|
|
|
|
if (Load)
|
2008-07-28 05:46:04 +08:00
|
|
|
BeforeOps.push_back(SDValue(Load, 0));
|
2008-01-07 09:35:02 +08:00
|
|
|
std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
|
2009-09-26 02:54:59 +08:00
|
|
|
SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
|
|
|
|
BeforeOps.size());
|
2008-01-07 09:35:02 +08:00
|
|
|
NewNodes.push_back(NewNode);
|
|
|
|
|
|
|
|
// Emit the store instruction.
|
|
|
|
if (FoldedStore) {
|
|
|
|
AddrOps.pop_back();
|
2008-07-28 05:46:04 +08:00
|
|
|
AddrOps.push_back(SDValue(NewNode, 0));
|
2008-01-07 09:35:02 +08:00
|
|
|
AddrOps.push_back(Chain);
|
2009-11-17 05:56:03 +08:00
|
|
|
std::pair<MachineInstr::mmo_iterator,
|
|
|
|
MachineInstr::mmo_iterator> MMOs =
|
|
|
|
MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
|
|
|
|
cast<MachineSDNode>(N)->memoperands_end());
|
2010-07-03 04:36:18 +08:00
|
|
|
if (!(*MMOs.first) &&
|
|
|
|
RC == &X86::VR128RegClass &&
|
|
|
|
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
|
|
|
|
// Do not introduce a slow unaligned store.
|
|
|
|
return false;
|
|
|
|
bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
|
2009-09-26 02:54:59 +08:00
|
|
|
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
|
|
|
|
isAligned, TM),
|
|
|
|
dl, MVT::Other,
|
|
|
|
&AddrOps[0], AddrOps.size());
|
2008-01-07 09:35:02 +08:00
|
|
|
NewNodes.push_back(Store);
|
2009-10-10 02:10:05 +08:00
|
|
|
|
|
|
|
// Preserve memory reference information.
|
|
|
|
cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
|
2008-01-07 09:35:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
|
2009-10-31 06:18:41 +08:00
|
|
|
bool UnfoldLoad, bool UnfoldStore,
|
|
|
|
unsigned *LoadRegIndex) const {
|
2009-11-10 09:02:17 +08:00
|
|
|
DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
|
2008-01-07 09:35:02 +08:00
|
|
|
MemOp2RegOpTable.find((unsigned*)Opc);
|
|
|
|
if (I == MemOp2RegOpTable.end())
|
|
|
|
return 0;
|
|
|
|
bool FoldedLoad = I->second.second & (1 << 4);
|
|
|
|
bool FoldedStore = I->second.second & (1 << 5);
|
|
|
|
if (UnfoldLoad && !FoldedLoad)
|
|
|
|
return 0;
|
|
|
|
if (UnfoldStore && !FoldedStore)
|
|
|
|
return 0;
|
2009-10-31 06:18:41 +08:00
|
|
|
if (LoadRegIndex)
|
|
|
|
*LoadRegIndex = I->second.second & 0xf;
|
2008-01-07 09:35:02 +08:00
|
|
|
return I->second.first;
|
|
|
|
}
|
|
|
|
|
2010-01-22 11:34:51 +08:00
|
|
|
bool
|
|
|
|
X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
|
|
|
int64_t &Offset1, int64_t &Offset2) const {
|
|
|
|
if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
|
|
|
|
return false;
|
|
|
|
unsigned Opc1 = Load1->getMachineOpcode();
|
|
|
|
unsigned Opc2 = Load2->getMachineOpcode();
|
|
|
|
switch (Opc1) {
|
|
|
|
default: return false;
|
|
|
|
case X86::MOV8rm:
|
|
|
|
case X86::MOV16rm:
|
|
|
|
case X86::MOV32rm:
|
|
|
|
case X86::MOV64rm:
|
|
|
|
case X86::LD_Fp32m:
|
|
|
|
case X86::LD_Fp64m:
|
|
|
|
case X86::LD_Fp80m:
|
|
|
|
case X86::MOVSSrm:
|
|
|
|
case X86::MOVSDrm:
|
|
|
|
case X86::MMX_MOVD64rm:
|
|
|
|
case X86::MMX_MOVQ64rm:
|
|
|
|
case X86::FsMOVAPSrm:
|
|
|
|
case X86::FsMOVAPDrm:
|
|
|
|
case X86::MOVAPSrm:
|
|
|
|
case X86::MOVUPSrm:
|
|
|
|
case X86::MOVUPSrm_Int:
|
|
|
|
case X86::MOVAPDrm:
|
|
|
|
case X86::MOVDQArm:
|
|
|
|
case X86::MOVDQUrm:
|
|
|
|
case X86::MOVDQUrm_Int:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
switch (Opc2) {
|
|
|
|
default: return false;
|
|
|
|
case X86::MOV8rm:
|
|
|
|
case X86::MOV16rm:
|
|
|
|
case X86::MOV32rm:
|
|
|
|
case X86::MOV64rm:
|
|
|
|
case X86::LD_Fp32m:
|
|
|
|
case X86::LD_Fp64m:
|
|
|
|
case X86::LD_Fp80m:
|
|
|
|
case X86::MOVSSrm:
|
|
|
|
case X86::MOVSDrm:
|
|
|
|
case X86::MMX_MOVD64rm:
|
|
|
|
case X86::MMX_MOVQ64rm:
|
|
|
|
case X86::FsMOVAPSrm:
|
|
|
|
case X86::FsMOVAPDrm:
|
|
|
|
case X86::MOVAPSrm:
|
|
|
|
case X86::MOVUPSrm:
|
|
|
|
case X86::MOVUPSrm_Int:
|
|
|
|
case X86::MOVAPDrm:
|
|
|
|
case X86::MOVDQArm:
|
|
|
|
case X86::MOVDQUrm:
|
|
|
|
case X86::MOVDQUrm_Int:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if chain operands and base addresses match.
|
|
|
|
if (Load1->getOperand(0) != Load2->getOperand(0) ||
|
|
|
|
Load1->getOperand(5) != Load2->getOperand(5))
|
|
|
|
return false;
|
|
|
|
// Segment operands should match as well.
|
|
|
|
if (Load1->getOperand(4) != Load2->getOperand(4))
|
|
|
|
return false;
|
|
|
|
// Scale should be 1, Index should be Reg0.
|
|
|
|
if (Load1->getOperand(1) == Load2->getOperand(1) &&
|
|
|
|
Load1->getOperand(2) == Load2->getOperand(2)) {
|
|
|
|
if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Now let's examine the displacements.
|
|
|
|
if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
|
|
|
|
isa<ConstantSDNode>(Load2->getOperand(3))) {
|
|
|
|
Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
|
|
|
|
Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
|
|
|
int64_t Offset1, int64_t Offset2,
|
|
|
|
unsigned NumLoads) const {
|
|
|
|
assert(Offset2 > Offset1);
|
|
|
|
if ((Offset2 - Offset1) / 8 > 64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Opc1 = Load1->getMachineOpcode();
|
|
|
|
unsigned Opc2 = Load2->getMachineOpcode();
|
|
|
|
if (Opc1 != Opc2)
|
|
|
|
return false; // FIXME: overly conservative?
|
|
|
|
|
|
|
|
switch (Opc1) {
|
|
|
|
default: break;
|
|
|
|
case X86::LD_Fp32m:
|
|
|
|
case X86::LD_Fp64m:
|
|
|
|
case X86::LD_Fp80m:
|
|
|
|
case X86::MMX_MOVD64rm:
|
|
|
|
case X86::MMX_MOVQ64rm:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
EVT VT = Load1->getValueType(0);
|
|
|
|
switch (VT.getSimpleVT().SimpleTy) {
|
2010-06-23 06:16:17 +08:00
|
|
|
default:
|
2010-01-22 11:34:51 +08:00
|
|
|
// XMM registers. In 64-bit mode we can be a bit more aggressive since we
|
|
|
|
// have 16 of them to play with.
|
|
|
|
if (TM.getSubtargetImpl()->is64Bit()) {
|
|
|
|
if (NumLoads >= 3)
|
|
|
|
return false;
|
2010-06-23 06:16:17 +08:00
|
|
|
} else if (NumLoads) {
|
2010-01-22 11:34:51 +08:00
|
|
|
return false;
|
2010-06-23 06:16:17 +08:00
|
|
|
}
|
2010-01-22 11:34:51 +08:00
|
|
|
break;
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
case MVT::i64:
|
2010-01-23 07:49:11 +08:00
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
2010-01-22 11:34:51 +08:00
|
|
|
if (NumLoads)
|
|
|
|
return false;
|
2010-06-23 06:16:17 +08:00
|
|
|
break;
|
2010-01-22 11:34:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-10-21 01:42:20 +08:00
|
|
|
bool X86InstrInfo::
|
2008-08-15 06:49:33 +08:00
|
|
|
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
|
2006-10-21 13:52:40 +08:00
|
|
|
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
|
2008-08-30 07:21:31 +08:00
|
|
|
X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
|
Optimized FCMP_OEQ and FCMP_UNE for x86.
Where previously LLVM might emit code like this:
ucomisd %xmm1, %xmm0
setne %al
setp %cl
orb %al, %cl
jne .LBB4_2
it now emits this:
ucomisd %xmm1, %xmm0
jne .LBB4_2
jp .LBB4_2
It has fewer instructions and uses fewer registers, but it does
have more branches. And in the case that this code is followed by
a non-fallthrough edge, it may be followed by a jmp instruction,
resulting in three branch instructions in sequence. Some effort
is made to avoid this situation.
To achieve this, X86ISelLowering.cpp now recognizes FCMP_OEQ and
FCMP_UNE in lowered form, and replace them with code that emits
two branches, except in the case where it would require converting
a fall-through edge to an explicit branch.
Also, X86InstrInfo.cpp's branch analysis and transform code now
knows now to handle blocks with multiple conditional branches. It
uses loops instead of having fixed checks for up to two
instructions. It can now analyze and transform code generated
from FCMP_OEQ and FCMP_UNE.
llvm-svn: 57873
2008-10-21 11:29:32 +08:00
|
|
|
if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
|
|
|
|
return true;
|
2008-08-30 07:21:31 +08:00
|
|
|
Cond[0].setImm(GetOppositeBranchCondition(CC));
|
2006-10-21 13:52:40 +08:00
|
|
|
return false;
|
2006-10-21 01:42:20 +08:00
|
|
|
}
|
|
|
|
|
2008-10-27 15:14:50 +08:00
|
|
|
bool X86InstrInfo::
|
2009-02-07 01:17:30 +08:00
|
|
|
isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
|
|
|
|
// FIXME: Return false for x87 stack register classes for now. We can't
|
2008-10-27 15:14:50 +08:00
|
|
|
// allow any loads of these registers before FpGet_ST0_80.
|
2009-02-07 01:17:30 +08:00
|
|
|
return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
|
|
|
|
RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
|
2008-10-27 15:14:50 +08:00
|
|
|
}
|
|
|
|
|
2008-04-17 04:10:13 +08:00
|
|
|
|
2010-02-06 06:10:22 +08:00
|
|
|
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher)
|
|
|
|
/// register? e.g. r8, xmm8, xmm13, etc.
|
|
|
|
bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
|
|
|
|
switch (RegNo) {
|
2008-04-17 04:10:13 +08:00
|
|
|
default: break;
|
|
|
|
case X86::R8: case X86::R9: case X86::R10: case X86::R11:
|
|
|
|
case X86::R12: case X86::R13: case X86::R14: case X86::R15:
|
|
|
|
case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
|
|
|
|
case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
|
|
|
|
case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
|
|
|
|
case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
|
|
|
|
case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
|
|
|
|
case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
|
|
|
|
case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
|
|
|
|
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
|
2010-07-10 02:27:43 +08:00
|
|
|
case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
|
|
|
|
case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
|
2010-09-22 13:29:50 +08:00
|
|
|
case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
|
|
|
|
case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
|
2008-04-17 04:10:13 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-09-30 08:58:23 +08:00
|
|
|
/// getGlobalBaseReg - Return a virtual register initialized with the
|
|
|
|
/// the global base register value. Output instructions required to
|
|
|
|
/// initialize the register in the function entry block, if necessary.
|
2008-09-24 02:22:58 +08:00
|
|
|
///
|
2010-07-10 17:00:22 +08:00
|
|
|
/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
|
|
|
|
///
|
2008-09-30 08:58:23 +08:00
|
|
|
unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
|
|
|
|
assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
|
|
|
|
"X86-64 PIC uses RIP relative addressing");
|
|
|
|
|
|
|
|
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
|
|
|
|
unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
|
|
|
|
if (GlobalBaseReg != 0)
|
|
|
|
return GlobalBaseReg;
|
|
|
|
|
2010-07-10 17:00:22 +08:00
|
|
|
// Create the register. The code to initialize it is inserted
|
|
|
|
// later, by the CGBR pass (below).
|
2008-09-24 02:22:58 +08:00
|
|
|
MachineRegisterInfo &RegInfo = MF->getRegInfo();
|
2010-07-10 17:00:22 +08:00
|
|
|
GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
|
2008-09-30 08:58:23 +08:00
|
|
|
X86FI->setGlobalBaseReg(GlobalBaseReg);
|
|
|
|
return GlobalBaseReg;
|
2008-09-24 02:22:58 +08:00
|
|
|
}
|
2010-03-26 01:25:00 +08:00
|
|
|
|
2010-03-30 07:24:21 +08:00
|
|
|
// These are the replaceable SSE instructions. Some of these have Int variants
|
|
|
|
// that we don't include here. We don't want to replace instructions selected
|
|
|
|
// by intrinsics.
|
|
|
|
static const unsigned ReplaceableInstrs[][3] = {
|
2010-08-12 10:08:52 +08:00
|
|
|
//PackedSingle PackedDouble PackedInt
|
2010-03-31 06:46:53 +08:00
|
|
|
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
|
|
|
|
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
|
|
|
|
{ X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
|
|
|
|
{ X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
|
|
|
|
{ X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
|
|
|
|
{ X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
|
|
|
|
{ X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
|
|
|
|
{ X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
|
|
|
|
{ X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
|
|
|
|
{ X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
|
|
|
|
{ X86::ORPSrm, X86::ORPDrm, X86::PORrm },
|
|
|
|
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
|
2010-03-31 08:40:13 +08:00
|
|
|
{ X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
|
2010-03-31 06:46:53 +08:00
|
|
|
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
|
|
|
|
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
|
2010-08-13 04:20:53 +08:00
|
|
|
// AVX 128-bit support
|
|
|
|
{ X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
|
|
|
|
{ X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
|
|
|
|
{ X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
|
|
|
|
{ X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
|
|
|
|
{ X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
|
|
|
|
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
|
|
|
|
{ X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
|
|
|
|
{ X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
|
|
|
|
{ X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
|
|
|
|
{ X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
|
|
|
|
{ X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
|
|
|
|
{ X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
|
|
|
|
{ X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
|
|
|
|
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
|
|
|
|
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
|
2010-03-30 07:24:21 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// FIXME: Some shuffle and unpack instructions have equivalents in different
|
|
|
|
// domains, but they require a bit more work than just switching opcodes.
|
|
|
|
|
|
|
|
static const unsigned *lookup(unsigned opcode, unsigned domain) {
|
|
|
|
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
|
|
|
|
if (ReplaceableInstrs[i][domain-1] == opcode)
|
|
|
|
return ReplaceableInstrs[i];
|
|
|
|
return 0;
|
|
|
|
}
|
2010-03-26 01:25:00 +08:00
|
|
|
|
2010-03-30 07:24:21 +08:00
|
|
|
std::pair<uint16_t, uint16_t>
|
|
|
|
X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
|
|
|
|
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
|
2010-03-31 06:46:53 +08:00
|
|
|
return std::make_pair(domain,
|
|
|
|
domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
|
2010-03-30 07:24:21 +08:00
|
|
|
}
|
2010-03-26 01:25:00 +08:00
|
|
|
|
2010-03-30 07:24:21 +08:00
|
|
|
void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
|
|
|
|
assert(Domain>0 && Domain<4 && "Invalid execution domain");
|
|
|
|
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
|
|
|
|
assert(dom && "Not an SSE instruction");
|
|
|
|
const unsigned *table = lookup(MI->getOpcode(), dom);
|
|
|
|
assert(table && "Cannot change domain");
|
|
|
|
MI->setDesc(get(table[Domain-1]));
|
2010-03-26 01:25:00 +08:00
|
|
|
}
|
2010-04-27 07:37:21 +08:00
|
|
|
|
|
|
|
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
|
|
|
|
void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
|
|
|
|
NopInst.setOpcode(X86::NOOP);
|
|
|
|
}
|
2010-07-10 17:00:22 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
/// CGBR - Create Global Base Reg pass. This initializes the PIC
|
|
|
|
/// global base register for x86-32.
|
|
|
|
struct CGBR : public MachineFunctionPass {
|
|
|
|
static char ID;
|
2010-08-07 02:33:48 +08:00
|
|
|
CGBR() : MachineFunctionPass(ID) {}
|
2010-07-10 17:00:22 +08:00
|
|
|
|
|
|
|
virtual bool runOnMachineFunction(MachineFunction &MF) {
|
|
|
|
const X86TargetMachine *TM =
|
|
|
|
static_cast<const X86TargetMachine *>(&MF.getTarget());
|
|
|
|
|
|
|
|
assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
|
|
|
|
"X86-64 PIC uses RIP relative addressing");
|
|
|
|
|
|
|
|
// Only emit a global base reg in PIC mode.
|
|
|
|
if (TM->getRelocationModel() != Reloc::PIC_)
|
|
|
|
return false;
|
|
|
|
|
2010-09-18 04:24:24 +08:00
|
|
|
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
|
|
|
unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
|
|
|
|
|
|
|
|
// If we didn't need a GlobalBaseReg, don't insert code.
|
|
|
|
if (GlobalBaseReg == 0)
|
|
|
|
return false;
|
|
|
|
|
2010-07-10 17:00:22 +08:00
|
|
|
// Insert the set of GlobalBaseReg into the first MBB of the function
|
|
|
|
MachineBasicBlock &FirstMBB = MF.front();
|
|
|
|
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
|
|
|
|
DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
|
|
|
|
MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
|
|
|
const X86InstrInfo *TII = TM->getInstrInfo();
|
|
|
|
|
|
|
|
unsigned PC;
|
|
|
|
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
|
|
|
|
PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
|
|
|
|
else
|
2010-09-18 04:24:24 +08:00
|
|
|
PC = GlobalBaseReg;
|
2010-07-10 17:00:22 +08:00
|
|
|
|
|
|
|
// Operand of MovePCtoStack is completely ignored by asm printer. It's
|
|
|
|
// only used in JIT code emission as displacement to pc.
|
|
|
|
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
|
|
|
|
|
|
|
|
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
|
|
|
|
// not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
|
|
|
|
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
|
|
|
|
// Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
|
|
|
|
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
|
|
|
|
.addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
|
|
|
|
X86II::MO_GOT_ABSOLUTE_ADDRESS);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual const char *getPassName() const {
|
|
|
|
return "X86 PIC Global Base Reg Initialization";
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
|
|
|
AU.setPreservesCFG();
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
char CGBR::ID = 0;
|
|
|
|
FunctionPass*
|
|
|
|
llvm::createGlobalBaseRegPass() { return new CGBR(); }
|