2015-02-02 00:15:07 +08:00
|
|
|
//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-02-02 00:15:07 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file defines the X86-specific support for the FastISel class. Much
|
|
|
|
// of the target-specific code is generated by tablegen in the file
|
|
|
|
// X86GenFastISel.inc, which is #included here.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "X86.h"
|
|
|
|
#include "X86CallingConv.h"
|
|
|
|
#include "X86InstrBuilder.h"
|
|
|
|
#include "X86InstrInfo.h"
|
|
|
|
#include "X86MachineFunctionInfo.h"
|
|
|
|
#include "X86RegisterInfo.h"
|
|
|
|
#include "X86Subtarget.h"
|
|
|
|
#include "X86TargetMachine.h"
|
|
|
|
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
|
|
|
#include "llvm/CodeGen/FastISel.h"
|
|
|
|
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
#include "llvm/IR/CallSite.h"
|
|
|
|
#include "llvm/IR/CallingConv.h"
|
2016-04-15 02:29:59 +08:00
|
|
|
#include "llvm/IR/DebugInfo.h"
|
2015-02-02 00:15:07 +08:00
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
|
|
|
#include "llvm/IR/GlobalAlias.h"
|
|
|
|
#include "llvm/IR/GlobalVariable.h"
|
|
|
|
#include "llvm/IR/Instructions.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
#include "llvm/IR/Operator.h"
|
2015-02-11 06:00:34 +08:00
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
2015-06-23 20:21:54 +08:00
|
|
|
#include "llvm/MC/MCSymbol.h"
|
2015-02-02 00:15:07 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#include "llvm/Target/TargetOptions.h"
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
class X86FastISel final : public FastISel {
|
|
|
|
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
|
|
|
|
/// make the right decision when generating code for different targets.
|
|
|
|
const X86Subtarget *Subtarget;
|
|
|
|
|
|
|
|
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
|
|
|
|
/// floating point ops.
|
|
|
|
/// When SSE is available, use it for f32 operations.
|
|
|
|
/// When SSE2 is available, use it for f64 operations.
|
|
|
|
bool X86ScalarSSEf64;
|
|
|
|
bool X86ScalarSSEf32;
|
|
|
|
|
|
|
|
public:
|
|
|
|
explicit X86FastISel(FunctionLoweringInfo &funcInfo,
|
|
|
|
const TargetLibraryInfo *libInfo)
|
2015-02-03 07:03:45 +08:00
|
|
|
: FastISel(funcInfo, libInfo) {
|
|
|
|
Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
|
2015-02-02 00:15:07 +08:00
|
|
|
X86ScalarSSEf64 = Subtarget->hasSSE2();
|
|
|
|
X86ScalarSSEf32 = Subtarget->hasSSE1();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool fastSelectInstruction(const Instruction *I) override;
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// The specified machine instr operand is a vreg, and that
|
2015-02-02 00:15:07 +08:00
|
|
|
/// vreg is being provided by the specified load instruction. If possible,
|
|
|
|
/// try to fold the load as an operand to the instruction, returning true if
|
|
|
|
/// possible.
|
|
|
|
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
|
|
|
const LoadInst *LI) override;
|
|
|
|
|
|
|
|
bool fastLowerArguments() override;
|
|
|
|
bool fastLowerCall(CallLoweringInfo &CLI) override;
|
|
|
|
bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
|
|
|
|
|
|
|
|
#include "X86GenFastISel.inc"
|
|
|
|
|
|
|
|
private:
|
2016-06-12 23:39:02 +08:00
|
|
|
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
|
|
|
|
const DebugLoc &DL);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
2015-05-06 07:41:53 +08:00
|
|
|
bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
|
2015-03-26 19:29:02 +08:00
|
|
|
unsigned &ResultReg, unsigned Alignment = 1);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
2015-05-06 07:41:53 +08:00
|
|
|
bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
|
2015-02-02 00:15:07 +08:00
|
|
|
MachineMemOperand *MMO = nullptr, bool Aligned = false);
|
|
|
|
bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
2015-05-06 07:41:53 +08:00
|
|
|
X86AddressMode &AM,
|
2015-02-02 00:15:07 +08:00
|
|
|
MachineMemOperand *MMO = nullptr, bool Aligned = false);
|
|
|
|
|
|
|
|
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
|
|
|
|
unsigned &ResultReg);
|
|
|
|
|
|
|
|
bool X86SelectAddress(const Value *V, X86AddressMode &AM);
|
|
|
|
bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
|
|
|
|
|
|
|
|
bool X86SelectLoad(const Instruction *I);
|
|
|
|
|
|
|
|
bool X86SelectStore(const Instruction *I);
|
|
|
|
|
|
|
|
bool X86SelectRet(const Instruction *I);
|
|
|
|
|
|
|
|
bool X86SelectCmp(const Instruction *I);
|
|
|
|
|
|
|
|
bool X86SelectZExt(const Instruction *I);
|
|
|
|
|
2017-09-03 02:53:46 +08:00
|
|
|
bool X86SelectSExt(const Instruction *I);
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
bool X86SelectBranch(const Instruction *I);
|
|
|
|
|
|
|
|
bool X86SelectShift(const Instruction *I);
|
|
|
|
|
|
|
|
bool X86SelectDivRem(const Instruction *I);
|
|
|
|
|
|
|
|
bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
|
|
|
|
|
|
|
|
bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
|
|
|
|
|
|
|
|
bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
|
|
|
|
|
|
|
|
bool X86SelectSelect(const Instruction *I);
|
|
|
|
|
|
|
|
bool X86SelectTrunc(const Instruction *I);
|
|
|
|
|
2015-02-10 20:04:41 +08:00
|
|
|
bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
|
|
|
|
const TargetRegisterClass *RC);
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
bool X86SelectFPExt(const Instruction *I);
|
|
|
|
bool X86SelectFPTrunc(const Instruction *I);
|
2015-02-18 07:40:58 +08:00
|
|
|
bool X86SelectSIToFP(const Instruction *I);
|
2018-07-14 06:09:30 +08:00
|
|
|
bool X86SelectUIToFP(const Instruction *I);
|
|
|
|
bool X86SelectIntToFP(const Instruction *I, bool IsSigned);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
const X86InstrInfo *getInstrInfo() const {
|
2015-02-03 07:03:45 +08:00
|
|
|
return Subtarget->getInstrInfo();
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
const X86TargetMachine *getTargetMachine() const {
|
|
|
|
return static_cast<const X86TargetMachine *>(&TM);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
|
|
|
|
|
|
|
|
unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
|
|
|
|
unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
|
|
|
|
unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
|
|
|
|
unsigned fastMaterializeConstant(const Constant *C) override;
|
|
|
|
|
|
|
|
unsigned fastMaterializeAlloca(const AllocaInst *C) override;
|
|
|
|
|
|
|
|
unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
|
|
|
|
|
|
|
|
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
|
|
|
|
/// computed in an SSE register, not on the X87 floating point stack.
|
|
|
|
bool isScalarFPTypeInSSEReg(EVT VT) const {
|
|
|
|
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
|
|
|
|
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
|
|
|
|
|
|
|
|
bool IsMemcpySmall(uint64_t Len);
|
|
|
|
|
|
|
|
bool TryEmitSmallMemcpy(X86AddressMode DestAM,
|
|
|
|
X86AddressMode SrcAM, uint64_t Len);
|
|
|
|
|
|
|
|
bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
|
|
|
|
const Value *Cond);
|
2015-05-06 07:41:53 +08:00
|
|
|
|
|
|
|
const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
|
|
|
|
X86AddressMode &AM);
|
2016-12-05 12:51:31 +08:00
|
|
|
|
|
|
|
unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
|
|
|
|
const TargetRegisterClass *RC, unsigned Op0,
|
|
|
|
bool Op0IsKill, unsigned Op1, bool Op1IsKill,
|
|
|
|
unsigned Op2, bool Op2IsKill, unsigned Op3,
|
|
|
|
bool Op3IsKill);
|
2015-02-02 00:15:07 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace.
|
|
|
|
|
|
|
|
static std::pair<unsigned, bool>
|
|
|
|
getX86SSEConditionCode(CmpInst::Predicate Predicate) {
|
|
|
|
unsigned CC;
|
|
|
|
bool NeedSwap = false;
|
|
|
|
|
|
|
|
// SSE Condition code mapping:
|
|
|
|
// 0 - EQ
|
|
|
|
// 1 - LT
|
|
|
|
// 2 - LE
|
|
|
|
// 3 - UNORD
|
|
|
|
// 4 - NEQ
|
|
|
|
// 5 - NLT
|
|
|
|
// 6 - NLE
|
|
|
|
// 7 - ORD
|
|
|
|
switch (Predicate) {
|
|
|
|
default: llvm_unreachable("Unexpected predicate");
|
|
|
|
case CmpInst::FCMP_OEQ: CC = 0; break;
|
2016-08-17 13:10:15 +08:00
|
|
|
case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
|
2015-02-02 00:15:07 +08:00
|
|
|
case CmpInst::FCMP_OLT: CC = 1; break;
|
2016-08-17 13:10:15 +08:00
|
|
|
case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
|
2015-02-02 00:15:07 +08:00
|
|
|
case CmpInst::FCMP_OLE: CC = 2; break;
|
|
|
|
case CmpInst::FCMP_UNO: CC = 3; break;
|
|
|
|
case CmpInst::FCMP_UNE: CC = 4; break;
|
2016-08-17 13:10:15 +08:00
|
|
|
case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
|
2015-02-02 00:15:07 +08:00
|
|
|
case CmpInst::FCMP_UGE: CC = 5; break;
|
2016-08-17 13:10:15 +08:00
|
|
|
case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
|
2015-02-02 00:15:07 +08:00
|
|
|
case CmpInst::FCMP_UGT: CC = 6; break;
|
|
|
|
case CmpInst::FCMP_ORD: CC = 7; break;
|
2017-10-09 09:05:15 +08:00
|
|
|
case CmpInst::FCMP_UEQ: CC = 8; break;
|
|
|
|
case CmpInst::FCMP_ONE: CC = 12; break;
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_pair(CC, NeedSwap);
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Adds a complex addressing mode to the given machine instr builder.
|
2015-05-06 07:41:53 +08:00
|
|
|
/// Note, this will constrain the index register. If its not possible to
|
|
|
|
/// constrain the given index register, then a new one will be created. The
|
|
|
|
/// IndexReg field of the addressing mode will be updated to match in this case.
|
|
|
|
const MachineInstrBuilder &
|
|
|
|
X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
|
|
|
|
X86AddressMode &AM) {
|
|
|
|
// First constrain the index register. It needs to be a GR64_NOSP.
|
|
|
|
AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg,
|
|
|
|
MIB->getNumOperands() +
|
|
|
|
X86::AddrIndexReg);
|
|
|
|
return ::addFullAddress(MIB, AM);
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Check if it is possible to fold the condition from the XALU intrinsic
|
2015-02-02 00:15:07 +08:00
|
|
|
/// into the user. The condition code will only be updated on success.
|
|
|
|
bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
|
|
|
|
const Value *Cond) {
|
|
|
|
if (!isa<ExtractValueInst>(Cond))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const auto *EV = cast<ExtractValueInst>(Cond);
|
|
|
|
if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
|
|
|
|
MVT RetVT;
|
|
|
|
const Function *Callee = II->getCalledFunction();
|
|
|
|
Type *RetTy =
|
|
|
|
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
|
|
|
|
if (!isTypeLegal(RetTy, RetVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (RetVT != MVT::i32 && RetVT != MVT::i64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
X86::CondCode TmpCC;
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default: return false;
|
|
|
|
case Intrinsic::sadd_with_overflow:
|
|
|
|
case Intrinsic::ssub_with_overflow:
|
|
|
|
case Intrinsic::smul_with_overflow:
|
|
|
|
case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
|
|
|
|
case Intrinsic::uadd_with_overflow:
|
|
|
|
case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if both instructions are in the same basic block.
|
|
|
|
if (II->getParent() != I->getParent())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Make sure nothing is in the way
|
2015-10-20 05:48:29 +08:00
|
|
|
BasicBlock::const_iterator Start(I);
|
|
|
|
BasicBlock::const_iterator End(II);
|
2015-02-02 00:15:07 +08:00
|
|
|
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
|
|
|
|
// We only expect extractvalue instructions between the intrinsic and the
|
|
|
|
// instruction to be selected.
|
|
|
|
if (!isa<ExtractValueInst>(Itr))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check that the extractvalue operand comes from the intrinsic.
|
|
|
|
const auto *EVI = cast<ExtractValueInst>(Itr);
|
|
|
|
if (EVI->getAggregateOperand() != II)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
CC = TmpCC;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
|
2015-02-02 00:15:07 +08:00
|
|
|
if (evt == MVT::Other || !evt.isSimple())
|
|
|
|
// Unhandled type. Halt "fast" selection and bail.
|
|
|
|
return false;
|
|
|
|
|
|
|
|
VT = evt.getSimpleVT();
|
|
|
|
// For now, require SSE/SSE2 for performing floating-point operations,
|
|
|
|
// since x87 requires additional work.
|
|
|
|
if (VT == MVT::f64 && !X86ScalarSSEf64)
|
|
|
|
return false;
|
|
|
|
if (VT == MVT::f32 && !X86ScalarSSEf32)
|
|
|
|
return false;
|
|
|
|
// Similarly, no f80 support yet.
|
|
|
|
if (VT == MVT::f80)
|
|
|
|
return false;
|
|
|
|
// We only handle legal types. For example, on x86-32 the instruction
|
|
|
|
// selector contains all of the 64-bit instructions from x86-64,
|
|
|
|
// under the assumption that i64 won't be used if the target doesn't
|
|
|
|
// support it.
|
|
|
|
return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
|
|
|
|
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
|
|
|
|
/// Return true and the result register by reference if it is possible.
|
2015-05-06 07:41:53 +08:00
|
|
|
bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
2015-03-26 19:29:02 +08:00
|
|
|
MachineMemOperand *MMO, unsigned &ResultReg,
|
|
|
|
unsigned Alignment) {
|
2016-06-07 21:47:23 +08:00
|
|
|
bool HasSSE41 = Subtarget->hasSSE41();
|
2016-06-02 12:19:45 +08:00
|
|
|
bool HasAVX = Subtarget->hasAVX();
|
2016-06-07 21:47:23 +08:00
|
|
|
bool HasAVX2 = Subtarget->hasAVX2();
|
2016-09-06 07:58:40 +08:00
|
|
|
bool HasAVX512 = Subtarget->hasAVX512();
|
|
|
|
bool HasVLX = Subtarget->hasVLX();
|
2016-06-07 21:47:23 +08:00
|
|
|
bool IsNonTemporal = MMO && MMO->isNonTemporal();
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
// Get opcode and regclass of the output for the given load instruction.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
const TargetRegisterClass *RC = nullptr;
|
|
|
|
switch (VT.getSimpleVT().SimpleTy) {
|
|
|
|
default: return false;
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
Opc = X86::MOV8rm;
|
|
|
|
RC = &X86::GR8RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::i16:
|
|
|
|
Opc = X86::MOV16rm;
|
|
|
|
RC = &X86::GR16RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::i32:
|
|
|
|
Opc = X86::MOV32rm;
|
|
|
|
RC = &X86::GR32RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
// Must be in x86-64 mode.
|
|
|
|
Opc = X86::MOV64rm;
|
|
|
|
RC = &X86::GR64RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
if (X86ScalarSSEf32) {
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
|
2015-02-02 00:15:07 +08:00
|
|
|
} else {
|
|
|
|
Opc = X86::LD_Fp32m;
|
|
|
|
RC = &X86::RFP32RegClass;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MVT::f64:
|
|
|
|
if (X86ScalarSSEf64) {
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
|
2015-02-02 00:15:07 +08:00
|
|
|
} else {
|
|
|
|
Opc = X86::LD_Fp64m;
|
|
|
|
RC = &X86::RFP64RegClass;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MVT::f80:
|
|
|
|
// No f80 support yet.
|
|
|
|
return false;
|
2015-03-26 19:29:02 +08:00
|
|
|
case MVT::v4f32:
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
|
|
|
|
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
2016-06-07 21:47:23 +08:00
|
|
|
else if (Alignment >= 16)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVAPSZ128rm :
|
|
|
|
HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
|
2015-03-26 19:29:02 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVUPSZ128rm :
|
|
|
|
HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
|
2015-03-26 19:29:02 +08:00
|
|
|
break;
|
|
|
|
case MVT::v2f64:
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
|
|
|
|
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
2016-06-07 21:47:23 +08:00
|
|
|
else if (Alignment >= 16)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVAPDZ128rm :
|
|
|
|
HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
|
2015-03-26 19:29:02 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVUPDZ128rm :
|
|
|
|
HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
|
2015-03-26 19:29:02 +08:00
|
|
|
break;
|
|
|
|
case MVT::v4i32:
|
|
|
|
case MVT::v2i64:
|
|
|
|
case MVT::v8i16:
|
|
|
|
case MVT::v16i8:
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 16)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
|
|
|
|
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
2016-06-07 21:47:23 +08:00
|
|
|
else if (Alignment >= 16)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVDQA64Z128rm :
|
|
|
|
HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
|
2015-03-26 19:29:02 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVDQU64Z128rm :
|
|
|
|
HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
|
2015-03-26 19:29:02 +08:00
|
|
|
break;
|
2016-06-02 12:19:45 +08:00
|
|
|
case MVT::v8f32:
|
|
|
|
assert(HasAVX);
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
|
2017-06-06 22:18:39 +08:00
|
|
|
else if (IsNonTemporal && Alignment >= 16)
|
|
|
|
return false; // Force split for X86::VMOVNTDQArm
|
2016-09-06 07:58:40 +08:00
|
|
|
else if (Alignment >= 32)
|
|
|
|
Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
|
2016-06-07 21:47:23 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
|
2016-06-02 12:19:45 +08:00
|
|
|
break;
|
|
|
|
case MVT::v4f64:
|
|
|
|
assert(HasAVX);
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
2017-10-28 04:13:10 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
|
2017-06-06 22:18:39 +08:00
|
|
|
else if (IsNonTemporal && Alignment >= 16)
|
|
|
|
return false; // Force split for X86::VMOVNTDQArm
|
2016-09-06 07:58:40 +08:00
|
|
|
else if (Alignment >= 32)
|
|
|
|
Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
|
2016-06-07 21:47:23 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
|
2016-06-02 12:19:45 +08:00
|
|
|
break;
|
|
|
|
case MVT::v8i32:
|
|
|
|
case MVT::v4i64:
|
|
|
|
case MVT::v16i16:
|
|
|
|
case MVT::v32i8:
|
|
|
|
assert(HasAVX);
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
2017-10-28 04:13:10 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
|
2017-06-06 22:18:39 +08:00
|
|
|
else if (IsNonTemporal && Alignment >= 16)
|
|
|
|
return false; // Force split for X86::VMOVNTDQArm
|
2016-09-06 07:58:40 +08:00
|
|
|
else if (Alignment >= 32)
|
|
|
|
Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
|
2016-06-07 21:47:23 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
|
2016-06-02 12:19:45 +08:00
|
|
|
break;
|
2016-06-02 12:51:37 +08:00
|
|
|
case MVT::v16f32:
|
2016-09-06 07:58:40 +08:00
|
|
|
assert(HasAVX512);
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 64)
|
|
|
|
Opc = X86::VMOVNTDQAZrm;
|
|
|
|
else
|
|
|
|
Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
|
2016-06-02 12:51:37 +08:00
|
|
|
RC = &X86::VR512RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::v8f64:
|
2016-09-06 07:58:40 +08:00
|
|
|
assert(HasAVX512);
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 64)
|
|
|
|
Opc = X86::VMOVNTDQAZrm;
|
|
|
|
else
|
|
|
|
Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
|
2016-06-02 12:51:37 +08:00
|
|
|
RC = &X86::VR512RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::v8i64:
|
|
|
|
case MVT::v16i32:
|
|
|
|
case MVT::v32i16:
|
|
|
|
case MVT::v64i8:
|
2016-09-06 07:58:40 +08:00
|
|
|
assert(HasAVX512);
|
2016-06-02 12:51:37 +08:00
|
|
|
// Note: There are a lot more choices based on type with AVX-512, but
|
|
|
|
// there's really no advantage when the load isn't masked.
|
2016-06-07 21:47:23 +08:00
|
|
|
if (IsNonTemporal && Alignment >= 64)
|
|
|
|
Opc = X86::VMOVNTDQAZrm;
|
|
|
|
else
|
|
|
|
Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
|
2016-06-02 12:51:37 +08:00
|
|
|
RC = &X86::VR512RegClass;
|
|
|
|
break;
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ResultReg = createResultReg(RC);
|
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
|
|
|
|
addFullAddress(MIB, AM);
|
|
|
|
if (MMO)
|
|
|
|
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// X86FastEmitStore - Emit a machine instruction to store a value Val of
|
|
|
|
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
|
|
|
|
/// and a displacement offset, or a GlobalAddress,
|
|
|
|
/// i.e. V. Return true if it is possible.
|
|
|
|
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
2015-05-06 07:41:53 +08:00
|
|
|
X86AddressMode &AM,
|
2015-02-02 00:15:07 +08:00
|
|
|
MachineMemOperand *MMO, bool Aligned) {
|
2017-04-11 00:58:07 +08:00
|
|
|
bool HasSSE1 = Subtarget->hasSSE1();
|
2015-10-14 18:03:13 +08:00
|
|
|
bool HasSSE2 = Subtarget->hasSSE2();
|
2015-10-17 21:04:42 +08:00
|
|
|
bool HasSSE4A = Subtarget->hasSSE4A();
|
2015-10-14 18:03:13 +08:00
|
|
|
bool HasAVX = Subtarget->hasAVX();
|
2016-09-06 07:58:40 +08:00
|
|
|
bool HasAVX512 = Subtarget->hasAVX512();
|
|
|
|
bool HasVLX = Subtarget->hasVLX();
|
2015-10-14 18:03:13 +08:00
|
|
|
bool IsNonTemporal = MMO && MMO->isNonTemporal();
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
// Get opcode and regclass of the output for the given store instruction.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
switch (VT.getSimpleVT().SimpleTy) {
|
|
|
|
case MVT::f80: // No f80 support yet.
|
|
|
|
default: return false;
|
|
|
|
case MVT::i1: {
|
|
|
|
// Mask out all but lowest bit.
|
|
|
|
unsigned AndResult = createResultReg(&X86::GR8RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(X86::AND8ri), AndResult)
|
|
|
|
.addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
|
|
|
|
ValReg = AndResult;
|
2016-08-17 13:10:15 +08:00
|
|
|
LLVM_FALLTHROUGH; // handle i1 as i8.
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
case MVT::i8: Opc = X86::MOV8mr; break;
|
|
|
|
case MVT::i16: Opc = X86::MOV16mr; break;
|
2015-10-14 18:03:13 +08:00
|
|
|
case MVT::i32:
|
|
|
|
Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
// Must be in x86-64 mode.
|
|
|
|
Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
|
|
|
|
break;
|
2015-02-02 00:15:07 +08:00
|
|
|
case MVT::f32:
|
2015-10-17 21:04:42 +08:00
|
|
|
if (X86ScalarSSEf32) {
|
|
|
|
if (IsNonTemporal && HasSSE4A)
|
|
|
|
Opc = X86::MOVNTSS;
|
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasAVX512 ? X86::VMOVSSZmr :
|
|
|
|
HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
|
2015-10-17 21:04:42 +08:00
|
|
|
} else
|
|
|
|
Opc = X86::ST_Fp32m;
|
2015-02-02 00:15:07 +08:00
|
|
|
break;
|
|
|
|
case MVT::f64:
|
2015-10-17 21:04:42 +08:00
|
|
|
if (X86ScalarSSEf32) {
|
|
|
|
if (IsNonTemporal && HasSSE4A)
|
|
|
|
Opc = X86::MOVNTSD;
|
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasAVX512 ? X86::VMOVSDZmr :
|
|
|
|
HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
|
2015-10-17 21:04:42 +08:00
|
|
|
} else
|
|
|
|
Opc = X86::ST_Fp64m;
|
2015-02-02 00:15:07 +08:00
|
|
|
break;
|
2017-04-11 00:58:07 +08:00
|
|
|
case MVT::x86mmx:
|
|
|
|
Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
|
|
|
|
break;
|
2015-02-02 00:15:07 +08:00
|
|
|
case MVT::v4f32:
|
2015-10-14 18:03:13 +08:00
|
|
|
if (Aligned) {
|
|
|
|
if (IsNonTemporal)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTPSZ128mr :
|
|
|
|
HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
|
2015-10-14 18:03:13 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVAPSZ128mr :
|
|
|
|
HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
|
2015-10-14 18:03:13 +08:00
|
|
|
} else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVUPSZ128mr :
|
|
|
|
HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
|
2015-02-02 00:15:07 +08:00
|
|
|
break;
|
|
|
|
case MVT::v2f64:
|
2015-10-14 18:03:13 +08:00
|
|
|
if (Aligned) {
|
|
|
|
if (IsNonTemporal)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTPDZ128mr :
|
|
|
|
HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
|
2015-10-14 18:03:13 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVAPDZ128mr :
|
|
|
|
HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
|
2015-10-14 18:03:13 +08:00
|
|
|
} else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVUPDZ128mr :
|
|
|
|
HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
|
2015-02-02 00:15:07 +08:00
|
|
|
break;
|
|
|
|
case MVT::v4i32:
|
|
|
|
case MVT::v2i64:
|
|
|
|
case MVT::v8i16:
|
|
|
|
case MVT::v16i8:
|
2015-10-14 18:03:13 +08:00
|
|
|
if (Aligned) {
|
|
|
|
if (IsNonTemporal)
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVNTDQZ128mr :
|
|
|
|
HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
|
2015-10-14 18:03:13 +08:00
|
|
|
else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVDQA64Z128mr :
|
|
|
|
HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
|
2015-10-14 18:03:13 +08:00
|
|
|
} else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVDQU64Z128mr :
|
|
|
|
HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
|
2016-06-02 12:19:45 +08:00
|
|
|
break;
|
|
|
|
case MVT::v8f32:
|
|
|
|
assert(HasAVX);
|
2016-09-06 07:58:40 +08:00
|
|
|
if (Aligned) {
|
|
|
|
if (IsNonTemporal)
|
|
|
|
Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
|
|
|
|
else
|
|
|
|
Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
|
|
|
|
} else
|
|
|
|
Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
|
2016-06-02 12:19:45 +08:00
|
|
|
break;
|
|
|
|
case MVT::v4f64:
|
|
|
|
assert(HasAVX);
|
|
|
|
if (Aligned) {
|
2016-09-06 07:58:40 +08:00
|
|
|
if (IsNonTemporal)
|
|
|
|
Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
|
|
|
|
else
|
|
|
|
Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
|
2016-06-02 12:19:45 +08:00
|
|
|
} else
|
2016-09-06 07:58:40 +08:00
|
|
|
Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
|
2016-06-02 12:19:45 +08:00
|
|
|
break;
|
|
|
|
case MVT::v8i32:
|
|
|
|
case MVT::v4i64:
|
|
|
|
case MVT::v16i16:
|
|
|
|
case MVT::v32i8:
|
|
|
|
assert(HasAVX);
|
2016-09-06 07:58:40 +08:00
|
|
|
if (Aligned) {
|
|
|
|
if (IsNonTemporal)
|
|
|
|
Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
|
|
|
|
else
|
|
|
|
Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
|
|
|
|
} else
|
|
|
|
Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
|
2015-02-02 00:15:07 +08:00
|
|
|
break;
|
2016-06-02 12:51:37 +08:00
|
|
|
case MVT::v16f32:
|
2016-09-06 07:58:40 +08:00
|
|
|
assert(HasAVX512);
|
2016-06-02 12:51:37 +08:00
|
|
|
if (Aligned)
|
|
|
|
Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
|
|
|
|
else
|
|
|
|
Opc = X86::VMOVUPSZmr;
|
|
|
|
break;
|
|
|
|
case MVT::v8f64:
|
2016-09-06 07:58:40 +08:00
|
|
|
assert(HasAVX512);
|
2016-06-02 12:51:37 +08:00
|
|
|
if (Aligned) {
|
|
|
|
Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
|
|
|
|
} else
|
|
|
|
Opc = X86::VMOVUPDZmr;
|
|
|
|
break;
|
|
|
|
case MVT::v8i64:
|
|
|
|
case MVT::v16i32:
|
|
|
|
case MVT::v32i16:
|
|
|
|
case MVT::v64i8:
|
2016-09-06 07:58:40 +08:00
|
|
|
assert(HasAVX512);
|
2016-06-02 12:51:37 +08:00
|
|
|
// Note: There are a lot more choices based on type with AVX-512, but
|
|
|
|
// there's really no advantage when the store isn't masked.
|
|
|
|
if (Aligned)
|
|
|
|
Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
|
|
|
|
else
|
|
|
|
Opc = X86::VMOVDQU64Zmr;
|
|
|
|
break;
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
2016-04-28 06:33:42 +08:00
|
|
|
const MCInstrDesc &Desc = TII.get(Opc);
|
|
|
|
// Some of the instructions in the previous switch use FR128 instead
|
|
|
|
// of FR32 for ValReg. Make sure the register we feed the instruction
|
|
|
|
// matches its register class constraints.
|
|
|
|
// Note: This is fine to do a copy from FR32 to FR128, this is the
|
|
|
|
// same registers behind the scene and actually why it did not trigger
|
|
|
|
// any bugs before.
|
|
|
|
ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
|
2015-02-02 00:15:07 +08:00
|
|
|
MachineInstrBuilder MIB =
|
2016-04-28 06:33:42 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
|
2015-02-02 00:15:07 +08:00
|
|
|
addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
|
|
|
|
if (MMO)
|
|
|
|
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
2015-05-06 07:41:53 +08:00
|
|
|
X86AddressMode &AM,
|
2015-02-02 00:15:07 +08:00
|
|
|
MachineMemOperand *MMO, bool Aligned) {
|
|
|
|
// Handle 'null' like i32/i64 0.
|
|
|
|
if (isa<ConstantPointerNull>(Val))
|
|
|
|
Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
|
|
|
|
|
|
|
|
// If this is a store of a simple constant, fold the constant into the store.
|
|
|
|
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
|
|
|
|
unsigned Opc = 0;
|
|
|
|
bool Signed = true;
|
|
|
|
switch (VT.getSimpleVT().SimpleTy) {
|
|
|
|
default: break;
|
2016-08-17 13:10:15 +08:00
|
|
|
case MVT::i1:
|
|
|
|
Signed = false;
|
|
|
|
LLVM_FALLTHROUGH; // Handle as i8.
|
2015-02-02 00:15:07 +08:00
|
|
|
case MVT::i8: Opc = X86::MOV8mi; break;
|
|
|
|
case MVT::i16: Opc = X86::MOV16mi; break;
|
|
|
|
case MVT::i32: Opc = X86::MOV32mi; break;
|
|
|
|
case MVT::i64:
|
|
|
|
// Must be a 32-bit sign extended value.
|
|
|
|
if (isInt<32>(CI->getSExtValue()))
|
|
|
|
Opc = X86::MOV64mi32;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Opc) {
|
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
|
|
|
|
addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
|
|
|
|
: CI->getZExtValue());
|
|
|
|
if (MMO)
|
|
|
|
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned ValReg = getRegForValue(Val);
|
|
|
|
if (ValReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
bool ValKill = hasTrivialKill(Val);
|
|
|
|
return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
|
|
|
|
/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
|
|
|
|
/// ISD::SIGN_EXTEND).
|
|
|
|
bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
|
|
|
|
unsigned Src, EVT SrcVT,
|
|
|
|
unsigned &ResultReg) {
|
|
|
|
unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
|
|
|
|
Src, /*TODO: Kill=*/false);
|
|
|
|
if (RR == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
ResultReg = RR;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
|
|
|
|
// Handle constant address.
|
|
|
|
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
|
|
|
|
// Can't handle alternate code models yet.
|
|
|
|
if (TM.getCodeModel() != CodeModel::Small)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Can't handle TLS yet.
|
|
|
|
if (GV->isThreadLocal())
|
|
|
|
return false;
|
|
|
|
|
2018-08-02 01:44:37 +08:00
|
|
|
// Can't handle !absolute_symbol references yet.
|
|
|
|
if (GV->isAbsoluteSymbolRef())
|
|
|
|
return false;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
// RIP-relative addresses can't have additional register operands, so if
|
|
|
|
// we've already folded stuff into the addressing mode, just force the
|
|
|
|
// global value into its own register, which we can use as the basereg.
|
|
|
|
if (!Subtarget->isPICStyleRIPRel() ||
|
|
|
|
(AM.Base.Reg == 0 && AM.IndexReg == 0)) {
|
|
|
|
// Okay, we've committed to selecting this global. Set up the address.
|
|
|
|
AM.GV = GV;
|
|
|
|
|
|
|
|
// Allow the subtarget to classify the global.
|
2016-05-20 06:07:57 +08:00
|
|
|
unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// If this reference is relative to the pic base, set it now.
|
|
|
|
if (isGlobalRelativeToPICBase(GVFlags)) {
|
|
|
|
// FIXME: How do we know Base.Reg is free??
|
|
|
|
AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unless the ABI requires an extra load, return a direct reference to
|
|
|
|
// the global.
|
|
|
|
if (!isGlobalStubReference(GVFlags)) {
|
|
|
|
if (Subtarget->isPICStyleRIPRel()) {
|
|
|
|
// Use rip-relative addressing if we can. Above we verified that the
|
|
|
|
// base and index registers are unused.
|
|
|
|
assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
|
|
|
|
AM.Base.Reg = X86::RIP;
|
|
|
|
}
|
|
|
|
AM.GVOpFlags = GVFlags;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ok, we need to do a load from a stub. If we've already loaded from
|
|
|
|
// this stub, reuse the loaded pointer, otherwise emit the load now.
|
|
|
|
DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
|
|
|
|
unsigned LoadReg;
|
|
|
|
if (I != LocalValueMap.end() && I->second != 0) {
|
|
|
|
LoadReg = I->second;
|
|
|
|
} else {
|
|
|
|
// Issue load from stub.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
const TargetRegisterClass *RC = nullptr;
|
|
|
|
X86AddressMode StubAM;
|
|
|
|
StubAM.Base.Reg = AM.Base.Reg;
|
|
|
|
StubAM.GV = GV;
|
|
|
|
StubAM.GVOpFlags = GVFlags;
|
|
|
|
|
|
|
|
// Prepare for inserting code in the local-value area.
|
|
|
|
SavePoint SaveInsertPt = enterLocalValueArea();
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
if (TLI.getPointerTy(DL) == MVT::i64) {
|
2015-02-02 00:15:07 +08:00
|
|
|
Opc = X86::MOV64rm;
|
|
|
|
RC = &X86::GR64RegClass;
|
|
|
|
|
|
|
|
if (Subtarget->isPICStyleRIPRel())
|
|
|
|
StubAM.Base.Reg = X86::RIP;
|
|
|
|
} else {
|
|
|
|
Opc = X86::MOV32rm;
|
|
|
|
RC = &X86::GR32RegClass;
|
|
|
|
}
|
|
|
|
|
|
|
|
LoadReg = createResultReg(RC);
|
|
|
|
MachineInstrBuilder LoadMI =
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
|
|
|
|
addFullAddress(LoadMI, StubAM);
|
|
|
|
|
|
|
|
// Ok, back to normal mode.
|
|
|
|
leaveLocalValueArea(SaveInsertPt);
|
|
|
|
|
|
|
|
// Prevent loading GV stub multiple times in same MBB.
|
|
|
|
LocalValueMap[V] = LoadReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now construct the final address. Note that the Disp, Scale,
|
|
|
|
// and Index values may already be set here.
|
|
|
|
AM.Base.Reg = LoadReg;
|
|
|
|
AM.GV = nullptr;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If all else fails, try to materialize the value in a register.
|
|
|
|
if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
|
|
|
|
if (AM.Base.Reg == 0) {
|
|
|
|
AM.Base.Reg = getRegForValue(V);
|
|
|
|
return AM.Base.Reg != 0;
|
|
|
|
}
|
|
|
|
if (AM.IndexReg == 0) {
|
|
|
|
assert(AM.Scale == 1 && "Scale with no index!");
|
|
|
|
AM.IndexReg = getRegForValue(V);
|
|
|
|
return AM.IndexReg != 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// X86SelectAddress - Attempt to fill in an address from the given value.
|
|
|
|
///
|
|
|
|
bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
|
|
|
|
SmallVector<const Value *, 32> GEPs;
|
|
|
|
redo_gep:
|
|
|
|
const User *U = nullptr;
|
|
|
|
unsigned Opcode = Instruction::UserOp1;
|
|
|
|
if (const Instruction *I = dyn_cast<Instruction>(V)) {
|
|
|
|
// Don't walk into other basic blocks; it's possible we haven't
|
|
|
|
// visited them yet, so the instructions may not yet be assigned
|
|
|
|
// virtual registers.
|
|
|
|
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
|
|
|
|
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
|
|
|
|
Opcode = I->getOpcode();
|
|
|
|
U = I;
|
|
|
|
}
|
|
|
|
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
|
|
|
|
Opcode = C->getOpcode();
|
|
|
|
U = C;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
|
|
|
|
if (Ty->getAddressSpace() > 255)
|
|
|
|
// Fast instruction selection doesn't support the special
|
|
|
|
// address spaces.
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (Opcode) {
|
|
|
|
default: break;
|
|
|
|
case Instruction::BitCast:
|
|
|
|
// Look past bitcasts.
|
|
|
|
return X86SelectAddress(U->getOperand(0), AM);
|
|
|
|
|
|
|
|
case Instruction::IntToPtr:
|
|
|
|
// Look past no-op inttoptrs.
|
2015-07-09 10:09:04 +08:00
|
|
|
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
|
|
|
|
TLI.getPointerTy(DL))
|
2015-02-02 00:15:07 +08:00
|
|
|
return X86SelectAddress(U->getOperand(0), AM);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Instruction::PtrToInt:
|
|
|
|
// Look past no-op ptrtoints.
|
2015-07-09 10:09:04 +08:00
|
|
|
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
|
2015-02-02 00:15:07 +08:00
|
|
|
return X86SelectAddress(U->getOperand(0), AM);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Instruction::Alloca: {
|
|
|
|
// Do static allocas.
|
|
|
|
const AllocaInst *A = cast<AllocaInst>(V);
|
|
|
|
DenseMap<const AllocaInst *, int>::iterator SI =
|
|
|
|
FuncInfo.StaticAllocaMap.find(A);
|
|
|
|
if (SI != FuncInfo.StaticAllocaMap.end()) {
|
|
|
|
AM.BaseType = X86AddressMode::FrameIndexBase;
|
|
|
|
AM.Base.FrameIndex = SI->second;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case Instruction::Add: {
|
|
|
|
// Adds of constants are common and easy enough.
|
|
|
|
if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
|
|
|
|
uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
|
|
|
|
// They have to fit in the 32-bit signed displacement field though.
|
|
|
|
if (isInt<32>(Disp)) {
|
|
|
|
AM.Disp = (uint32_t)Disp;
|
|
|
|
return X86SelectAddress(U->getOperand(0), AM);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case Instruction::GetElementPtr: {
|
|
|
|
X86AddressMode SavedAM = AM;
|
|
|
|
|
|
|
|
// Pattern-match simple GEPs.
|
|
|
|
uint64_t Disp = (int32_t)AM.Disp;
|
|
|
|
unsigned IndexReg = AM.IndexReg;
|
|
|
|
unsigned Scale = AM.Scale;
|
|
|
|
gep_type_iterator GTI = gep_type_begin(U);
|
|
|
|
// Iterate through the indices, folding what we can. Constants can be
|
|
|
|
// folded, and one dynamic index can be handled, if the scale is supported.
|
|
|
|
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
|
|
|
|
i != e; ++i, ++GTI) {
|
|
|
|
const Value *Op = *i;
|
2016-12-02 10:24:42 +08:00
|
|
|
if (StructType *STy = GTI.getStructTypeOrNull()) {
|
2015-02-02 00:15:07 +08:00
|
|
|
const StructLayout *SL = DL.getStructLayout(STy);
|
|
|
|
Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// A array/variable index is always of the form i*S where S is the
|
|
|
|
// constant scale size. See if we can push the scale into immediates.
|
|
|
|
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
|
|
|
|
for (;;) {
|
|
|
|
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
|
|
|
|
// Constant-offset addressing.
|
|
|
|
Disp += CI->getSExtValue() * S;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (canFoldAddIntoGEP(U, Op)) {
|
|
|
|
// A compatible add with a constant operand. Fold the constant.
|
|
|
|
ConstantInt *CI =
|
|
|
|
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
|
|
|
|
Disp += CI->getSExtValue() * S;
|
|
|
|
// Iterate on the other operand.
|
|
|
|
Op = cast<AddOperator>(Op)->getOperand(0);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (IndexReg == 0 &&
|
|
|
|
(!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
|
|
|
|
(S == 1 || S == 2 || S == 4 || S == 8)) {
|
|
|
|
// Scaled-index addressing.
|
|
|
|
Scale = S;
|
|
|
|
IndexReg = getRegForGEPIndex(Op).first;
|
|
|
|
if (IndexReg == 0)
|
|
|
|
return false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// Unsupported.
|
|
|
|
goto unsupported_gep;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for displacement overflow.
|
|
|
|
if (!isInt<32>(Disp))
|
|
|
|
break;
|
|
|
|
|
|
|
|
AM.IndexReg = IndexReg;
|
|
|
|
AM.Scale = Scale;
|
|
|
|
AM.Disp = (uint32_t)Disp;
|
|
|
|
GEPs.push_back(V);
|
|
|
|
|
|
|
|
if (const GetElementPtrInst *GEP =
|
|
|
|
dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
|
|
|
|
// Ok, the GEP indices were covered by constant-offset and scaled-index
|
|
|
|
// addressing. Update the address state and move on to examining the base.
|
|
|
|
V = GEP;
|
|
|
|
goto redo_gep;
|
|
|
|
} else if (X86SelectAddress(U->getOperand(0), AM)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we couldn't merge the gep value into this addr mode, revert back to
|
|
|
|
// our address and just match the value instead of completely failing.
|
|
|
|
AM = SavedAM;
|
|
|
|
|
2016-06-24 12:05:21 +08:00
|
|
|
for (const Value *I : reverse(GEPs))
|
|
|
|
if (handleConstantAddresses(I, AM))
|
2015-02-02 00:15:07 +08:00
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
unsupported_gep:
|
|
|
|
// Ok, the GEP indices weren't all covered.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return handleConstantAddresses(V, AM);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// X86SelectCallAddress - Attempt to fill in an address from the given value.
|
|
|
|
///
|
|
|
|
bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
|
|
|
|
const User *U = nullptr;
|
|
|
|
unsigned Opcode = Instruction::UserOp1;
|
|
|
|
const Instruction *I = dyn_cast<Instruction>(V);
|
|
|
|
// Record if the value is defined in the same basic block.
|
|
|
|
//
|
|
|
|
// This information is crucial to know whether or not folding an
|
|
|
|
// operand is valid.
|
|
|
|
// Indeed, FastISel generates or reuses a virtual register for all
|
|
|
|
// operands of all instructions it selects. Obviously, the definition and
|
|
|
|
// its uses must use the same virtual register otherwise the produced
|
|
|
|
// code is incorrect.
|
|
|
|
// Before instruction selection, FunctionLoweringInfo::set sets the virtual
|
|
|
|
// registers for values that are alive across basic blocks. This ensures
|
|
|
|
// that the values are consistently set between across basic block, even
|
|
|
|
// if different instruction selection mechanisms are used (e.g., a mix of
|
|
|
|
// SDISel and FastISel).
|
|
|
|
// For values local to a basic block, the instruction selection process
|
|
|
|
// generates these virtual registers with whatever method is appropriate
|
|
|
|
// for its needs. In particular, FastISel and SDISel do not share the way
|
|
|
|
// local virtual registers are set.
|
|
|
|
// Therefore, this is impossible (or at least unsafe) to share values
|
|
|
|
// between basic blocks unless they use the same instruction selection
|
|
|
|
// method, which is not guarantee for X86.
|
|
|
|
// Moreover, things like hasOneUse could not be used accurately, if we
|
|
|
|
// allow to reference values across basic blocks whereas they are not
|
|
|
|
// alive across basic blocks initially.
|
|
|
|
bool InMBB = true;
|
|
|
|
if (I) {
|
|
|
|
Opcode = I->getOpcode();
|
|
|
|
U = I;
|
|
|
|
InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
|
|
|
|
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
|
|
|
|
Opcode = C->getOpcode();
|
|
|
|
U = C;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (Opcode) {
|
|
|
|
default: break;
|
|
|
|
case Instruction::BitCast:
|
|
|
|
// Look past bitcasts if its operand is in the same BB.
|
|
|
|
if (InMBB)
|
|
|
|
return X86SelectCallAddress(U->getOperand(0), AM);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Instruction::IntToPtr:
|
|
|
|
// Look past no-op inttoptrs if its operand is in the same BB.
|
|
|
|
if (InMBB &&
|
2015-07-09 10:09:04 +08:00
|
|
|
TLI.getValueType(DL, U->getOperand(0)->getType()) ==
|
|
|
|
TLI.getPointerTy(DL))
|
2015-02-02 00:15:07 +08:00
|
|
|
return X86SelectCallAddress(U->getOperand(0), AM);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case Instruction::PtrToInt:
|
|
|
|
// Look past no-op ptrtoints if its operand is in the same BB.
|
2015-07-09 10:09:04 +08:00
|
|
|
if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
|
2015-02-02 00:15:07 +08:00
|
|
|
return X86SelectCallAddress(U->getOperand(0), AM);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle constant address.
|
|
|
|
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
|
|
|
|
// Can't handle alternate code models yet.
|
|
|
|
if (TM.getCodeModel() != CodeModel::Small)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// RIP-relative addresses can't have additional register operands.
|
|
|
|
if (Subtarget->isPICStyleRIPRel() &&
|
|
|
|
(AM.Base.Reg != 0 || AM.IndexReg != 0))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Can't handle TLS.
|
|
|
|
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
|
|
|
|
if (GVar->isThreadLocal())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Okay, we've committed to selecting this global. Set up the basic address.
|
|
|
|
AM.GV = GV;
|
|
|
|
|
2017-08-05 08:10:43 +08:00
|
|
|
// Return a direct reference to the global. Fastisel can handle calls to
|
|
|
|
// functions that require loads, such as dllimport and nonlazybind
|
|
|
|
// functions.
|
2015-02-02 00:15:07 +08:00
|
|
|
if (Subtarget->isPICStyleRIPRel()) {
|
|
|
|
// Use rip-relative addressing if we can. Above we verified that the
|
|
|
|
// base and index registers are unused.
|
|
|
|
assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
|
|
|
|
AM.Base.Reg = X86::RIP;
|
2016-05-20 20:20:10 +08:00
|
|
|
} else {
|
|
|
|
AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If all else fails, try to materialize the value in a register.
|
|
|
|
if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
|
|
|
|
if (AM.Base.Reg == 0) {
|
|
|
|
AM.Base.Reg = getRegForValue(V);
|
|
|
|
return AM.Base.Reg != 0;
|
|
|
|
}
|
|
|
|
if (AM.IndexReg == 0) {
|
|
|
|
assert(AM.Scale == 1 && "Scale with no index!");
|
|
|
|
AM.IndexReg = getRegForValue(V);
|
|
|
|
return AM.IndexReg != 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// X86SelectStore - Select and emit code to implement store instructions.
|
|
|
|
bool X86FastISel::X86SelectStore(const Instruction *I) {
|
|
|
|
// Atomic stores need special handling.
|
|
|
|
const StoreInst *S = cast<StoreInst>(I);
|
|
|
|
|
|
|
|
if (S->isAtomic())
|
|
|
|
return false;
|
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
const Value *PtrV = I->getOperand(1);
|
|
|
|
if (TLI.supportSwiftError()) {
|
|
|
|
// Swifterror values can come from either a function parameter with
|
|
|
|
// swifterror attribute or an alloca with swifterror attribute.
|
|
|
|
if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
|
|
|
|
if (Arg->hasSwiftErrorAttr())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
|
|
|
|
if (Alloca->isSwiftError())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
const Value *Val = S->getValueOperand();
|
|
|
|
const Value *Ptr = S->getPointerOperand();
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Alignment = S->getAlignment();
|
|
|
|
unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
|
|
|
|
if (Alignment == 0) // Ensure that codegen never sees alignment 0
|
|
|
|
Alignment = ABIAlignment;
|
|
|
|
bool Aligned = Alignment >= ABIAlignment;
|
|
|
|
|
|
|
|
X86AddressMode AM;
|
|
|
|
if (!X86SelectAddress(Ptr, AM))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// X86SelectRet - Select and emit code to implement ret instructions.
|
|
|
|
bool X86FastISel::X86SelectRet(const Instruction *I) {
|
|
|
|
const ReturnInst *Ret = cast<ReturnInst>(I);
|
|
|
|
const Function &F = *I->getParent()->getParent();
|
|
|
|
const X86MachineFunctionInfo *X86MFInfo =
|
|
|
|
FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
|
|
|
|
|
|
|
|
if (!FuncInfo.CanLowerReturn)
|
|
|
|
return false;
|
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
if (TLI.supportSwiftError() &&
|
|
|
|
F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
|
|
|
|
return false;
|
|
|
|
|
2016-01-12 09:08:46 +08:00
|
|
|
if (TLI.supportSplitCSR(FuncInfo.MF))
|
|
|
|
return false;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
CallingConv::ID CC = F.getCallingConv();
|
|
|
|
if (CC != CallingConv::C &&
|
|
|
|
CC != CallingConv::Fast &&
|
|
|
|
CC != CallingConv::X86_FastCall &&
|
2016-07-14 21:54:26 +08:00
|
|
|
CC != CallingConv::X86_StdCall &&
|
2016-07-12 09:30:35 +08:00
|
|
|
CC != CallingConv::X86_ThisCall &&
|
2016-07-16 04:18:37 +08:00
|
|
|
CC != CallingConv::X86_64_SysV &&
|
2017-07-18 04:05:19 +08:00
|
|
|
CC != CallingConv::Win64)
|
2015-02-02 00:15:07 +08:00
|
|
|
return false;
|
|
|
|
|
2016-07-12 09:30:35 +08:00
|
|
|
// Don't handle popping bytes if they don't fit the ret's immediate.
|
|
|
|
if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
|
2015-02-02 00:15:07 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// fastcc with -tailcallopt is intended to provide a guaranteed
|
|
|
|
// tail call optimization. Fastisel doesn't know how to do that.
|
|
|
|
if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Let SDISel handle vararg functions.
|
|
|
|
if (F.isVarArg())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Build a list of return value registers.
|
|
|
|
SmallVector<unsigned, 4> RetRegs;
|
|
|
|
|
|
|
|
if (Ret->getNumOperands() > 0) {
|
|
|
|
SmallVector<ISD::OutputArg, 4> Outs;
|
2018-07-28 21:25:19 +08:00
|
|
|
GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// Analyze operands of the call, assigning locations to each operand.
|
|
|
|
SmallVector<CCValAssign, 16> ValLocs;
|
|
|
|
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
|
|
|
|
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
|
|
|
|
|
|
|
|
const Value *RV = Ret->getOperand(0);
|
|
|
|
unsigned Reg = getRegForValue(RV);
|
|
|
|
if (Reg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Only handle a single return value for now.
|
|
|
|
if (ValLocs.size() != 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
CCValAssign &VA = ValLocs[0];
|
|
|
|
|
|
|
|
// Don't bother handling odd stuff for now.
|
|
|
|
if (VA.getLocInfo() != CCValAssign::Full)
|
|
|
|
return false;
|
|
|
|
// Only handle register returns for now.
|
|
|
|
if (!VA.isRegLoc())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// The calling-convention tables for x87 returns don't tell
|
|
|
|
// the whole story.
|
|
|
|
if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned SrcReg = Reg + VA.getValNo();
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT SrcVT = TLI.getValueType(DL, RV->getType());
|
2015-02-02 00:15:07 +08:00
|
|
|
EVT DstVT = VA.getValVT();
|
|
|
|
// Special handling for extended integers.
|
|
|
|
if (SrcVT != DstVT) {
|
|
|
|
if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
assert(DstVT == MVT::i32 && "X86 should always ext to i32");
|
|
|
|
|
|
|
|
if (SrcVT == MVT::i1) {
|
|
|
|
if (Outs[0].Flags.isSExt())
|
|
|
|
return false;
|
|
|
|
SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
|
|
|
|
SrcVT = MVT::i8;
|
|
|
|
}
|
|
|
|
unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
|
|
|
|
ISD::SIGN_EXTEND;
|
|
|
|
SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
|
|
|
|
SrcReg, /*TODO: Kill=*/false);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make the copy.
|
|
|
|
unsigned DstReg = VA.getLocReg();
|
|
|
|
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
|
|
|
|
// Avoid a cross-class copy. This is very unlikely.
|
|
|
|
if (!SrcRC->contains(DstReg))
|
|
|
|
return false;
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
|
|
|
|
|
|
|
|
// Add register to return instruction.
|
|
|
|
RetRegs.push_back(VA.getLocReg());
|
|
|
|
}
|
|
|
|
|
2016-04-27 02:08:06 +08:00
|
|
|
// Swift calling convention does not require we copy the sret argument
|
|
|
|
// into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
|
|
|
|
|
2016-01-04 01:22:03 +08:00
|
|
|
// All x86 ABIs require that for returning structs by value we copy
|
|
|
|
// the sret argument into %rax/%eax (depending on ABI) for the return.
|
|
|
|
// We saved the argument into a virtual register in the entry block,
|
2015-12-28 22:39:21 +08:00
|
|
|
// so now we copy the value out and into %rax/%eax.
|
2016-04-27 02:08:06 +08:00
|
|
|
if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned Reg = X86MFInfo->getSRetReturnReg();
|
|
|
|
assert(Reg &&
|
|
|
|
"SRetReturnReg should have been set in LowerFormalArguments()!");
|
2018-09-12 01:57:23 +08:00
|
|
|
unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
|
2015-02-02 00:15:07 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
|
|
|
|
RetRegs.push_back(RetReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now emit the RET.
|
2016-07-12 09:30:35 +08:00
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
if (X86MFInfo->getBytesToPopOnReturn()) {
|
|
|
|
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
|
|
|
|
.addImm(X86MFInfo->getBytesToPopOnReturn());
|
|
|
|
} else {
|
|
|
|
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
|
|
|
|
}
|
2015-02-02 00:15:07 +08:00
|
|
|
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
|
|
|
|
MIB.addReg(RetRegs[i], RegState::Implicit);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// X86SelectLoad - Select and emit code to implement load instructions.
|
|
|
|
///
|
|
|
|
bool X86FastISel::X86SelectLoad(const Instruction *I) {
|
|
|
|
const LoadInst *LI = cast<LoadInst>(I);
|
|
|
|
|
|
|
|
// Atomic loads need special handling.
|
|
|
|
if (LI->isAtomic())
|
|
|
|
return false;
|
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
const Value *SV = I->getOperand(0);
|
|
|
|
if (TLI.supportSwiftError()) {
|
|
|
|
// Swifterror values can come from either a function parameter with
|
|
|
|
// swifterror attribute or an alloca with swifterror attribute.
|
|
|
|
if (const Argument *Arg = dyn_cast<Argument>(SV)) {
|
|
|
|
if (Arg->hasSwiftErrorAttr())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
|
|
|
|
if (Alloca->isSwiftError())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Value *Ptr = LI->getPointerOperand();
|
|
|
|
|
|
|
|
X86AddressMode AM;
|
|
|
|
if (!X86SelectAddress(Ptr, AM))
|
|
|
|
return false;
|
|
|
|
|
2015-03-26 19:29:02 +08:00
|
|
|
unsigned Alignment = LI->getAlignment();
|
|
|
|
unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
|
|
|
|
if (Alignment == 0) // Ensure that codegen never sees alignment 0
|
|
|
|
Alignment = ABIAlignment;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned ResultReg = 0;
|
2015-03-26 19:29:02 +08:00
|
|
|
if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
|
|
|
|
Alignment))
|
2015-02-02 00:15:07 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
|
2017-10-31 05:09:19 +08:00
|
|
|
bool HasAVX512 = Subtarget->hasAVX512();
|
2015-02-02 00:15:07 +08:00
|
|
|
bool HasAVX = Subtarget->hasAVX();
|
|
|
|
bool X86ScalarSSEf32 = Subtarget->hasSSE1();
|
|
|
|
bool X86ScalarSSEf64 = Subtarget->hasSSE2();
|
|
|
|
|
|
|
|
switch (VT.getSimpleVT().SimpleTy) {
|
|
|
|
default: return 0;
|
|
|
|
case MVT::i8: return X86::CMP8rr;
|
|
|
|
case MVT::i16: return X86::CMP16rr;
|
|
|
|
case MVT::i32: return X86::CMP32rr;
|
|
|
|
case MVT::i64: return X86::CMP64rr;
|
|
|
|
case MVT::f32:
|
2017-10-31 10:34:29 +08:00
|
|
|
return X86ScalarSSEf32
|
|
|
|
? (HasAVX512 ? X86::VUCOMISSZrr
|
|
|
|
: HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
|
|
|
|
: 0;
|
2015-02-02 00:15:07 +08:00
|
|
|
case MVT::f64:
|
2017-10-31 10:34:29 +08:00
|
|
|
return X86ScalarSSEf64
|
|
|
|
? (HasAVX512 ? X86::VUCOMISDZrr
|
|
|
|
: HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
|
|
|
|
: 0;
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-16 22:05:49 +08:00
|
|
|
/// If we have a comparison with RHS as the RHS of the comparison, return an
|
|
|
|
/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
|
2015-02-02 00:15:07 +08:00
|
|
|
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
|
2015-03-16 22:25:08 +08:00
|
|
|
int64_t Val = RHSC->getSExtValue();
|
2015-02-02 00:15:07 +08:00
|
|
|
switch (VT.getSimpleVT().SimpleTy) {
|
|
|
|
// Otherwise, we can't fold the immediate into this comparison.
|
2015-03-16 22:05:49 +08:00
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
case MVT::i8:
|
|
|
|
return X86::CMP8ri;
|
|
|
|
case MVT::i16:
|
2015-03-16 22:25:08 +08:00
|
|
|
if (isInt<8>(Val))
|
|
|
|
return X86::CMP16ri8;
|
2015-03-16 22:05:49 +08:00
|
|
|
return X86::CMP16ri;
|
|
|
|
case MVT::i32:
|
2015-03-16 22:25:08 +08:00
|
|
|
if (isInt<8>(Val))
|
|
|
|
return X86::CMP32ri8;
|
2015-03-16 22:05:49 +08:00
|
|
|
return X86::CMP32ri;
|
2015-02-02 00:15:07 +08:00
|
|
|
case MVT::i64:
|
2015-03-16 22:25:08 +08:00
|
|
|
if (isInt<8>(Val))
|
|
|
|
return X86::CMP64ri8;
|
2015-02-02 00:15:07 +08:00
|
|
|
// 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
|
|
|
|
// field.
|
2015-03-16 22:25:08 +08:00
|
|
|
if (isInt<32>(Val))
|
2015-02-02 00:15:07 +08:00
|
|
|
return X86::CMP64ri32;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
|
|
|
|
const DebugLoc &CurDbgLoc) {
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned Op0Reg = getRegForValue(Op0);
|
|
|
|
if (Op0Reg == 0) return false;
|
|
|
|
|
|
|
|
// Handle 'null' like i32/i64 0.
|
|
|
|
if (isa<ConstantPointerNull>(Op1))
|
|
|
|
Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
|
|
|
|
|
|
|
|
// We have two options: compare with register or immediate. If the RHS of
|
|
|
|
// the compare is an immediate that we can fold into this compare, use
|
|
|
|
// CMPri, otherwise use CMPrr.
|
|
|
|
if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
|
|
|
|
if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
|
|
|
|
.addReg(Op0Reg)
|
|
|
|
.addImm(Op1C->getSExtValue());
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
|
|
|
|
if (CompareOpc == 0) return false;
|
|
|
|
|
|
|
|
unsigned Op1Reg = getRegForValue(Op1);
|
|
|
|
if (Op1Reg == 0) return false;
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
|
|
|
|
.addReg(Op0Reg)
|
|
|
|
.addReg(Op1Reg);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86SelectCmp(const Instruction *I) {
|
|
|
|
const CmpInst *CI = cast<CmpInst>(I);
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Try to optimize or fold the cmp.
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
switch (Predicate) {
|
|
|
|
default: break;
|
|
|
|
case CmpInst::FCMP_FALSE: {
|
|
|
|
ResultReg = createResultReg(&X86::GR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
|
|
|
|
ResultReg);
|
|
|
|
ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
|
|
|
|
X86::sub_8bit);
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case CmpInst::FCMP_TRUE: {
|
|
|
|
ResultReg = createResultReg(&X86::GR8RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
|
|
|
|
ResultReg).addImm(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ResultReg) {
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const Value *LHS = CI->getOperand(0);
|
|
|
|
const Value *RHS = CI->getOperand(1);
|
|
|
|
|
|
|
|
// The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
|
|
|
|
// We don't have to materialize a zero constant for this case and can just use
|
|
|
|
// %x again on the RHS.
|
|
|
|
if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
|
|
|
|
const auto *RHSC = dyn_cast<ConstantFP>(RHS);
|
|
|
|
if (RHSC && RHSC->isNullValue())
|
|
|
|
RHS = LHS;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
|
2016-09-05 15:14:21 +08:00
|
|
|
static const uint16_t SETFOpcTable[2][3] = {
|
2015-02-02 00:15:07 +08:00
|
|
|
{ X86::SETEr, X86::SETNPr, X86::AND8rr },
|
|
|
|
{ X86::SETNEr, X86::SETPr, X86::OR8rr }
|
|
|
|
};
|
2016-09-05 15:14:21 +08:00
|
|
|
const uint16_t *SETFOpc = nullptr;
|
2015-02-02 00:15:07 +08:00
|
|
|
switch (Predicate) {
|
|
|
|
default: break;
|
|
|
|
case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
|
|
|
|
case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ResultReg = createResultReg(&X86::GR8RegClass);
|
|
|
|
if (SETFOpc) {
|
|
|
|
if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
|
|
|
|
unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
|
|
|
|
FlagReg1);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
|
|
|
|
FlagReg2);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
|
|
|
|
ResultReg).addReg(FlagReg1).addReg(FlagReg2);
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
X86::CondCode CC;
|
|
|
|
bool SwapArgs;
|
2017-05-11 14:36:37 +08:00
|
|
|
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
|
2015-02-02 00:15:07 +08:00
|
|
|
assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
|
|
|
|
unsigned Opc = X86::getSETFromCond(CC);
|
|
|
|
|
|
|
|
if (SwapArgs)
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
|
|
|
// Emit a compare of LHS/RHS.
|
|
|
|
if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86SelectZExt(const Instruction *I) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT DstVT = TLI.getValueType(DL, I->getType());
|
2015-02-02 00:15:07 +08:00
|
|
|
if (!TLI.isTypeLegal(DstVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned ResultReg = getRegForValue(I->getOperand(0));
|
|
|
|
if (ResultReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Handle zero-extension from i1 to i8, which is common.
|
2015-07-09 10:09:04 +08:00
|
|
|
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
|
2016-12-05 14:09:55 +08:00
|
|
|
if (SrcVT == MVT::i1) {
|
2015-02-02 00:15:07 +08:00
|
|
|
// Set the high bits to zero.
|
|
|
|
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
|
|
|
|
SrcVT = MVT::i8;
|
|
|
|
|
|
|
|
if (ResultReg == 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstVT == MVT::i64) {
|
|
|
|
// Handle extension to 64-bits via sub-register shenanigans.
|
|
|
|
unsigned MovInst;
|
|
|
|
|
|
|
|
switch (SrcVT.SimpleTy) {
|
|
|
|
case MVT::i8: MovInst = X86::MOVZX32rr8; break;
|
|
|
|
case MVT::i16: MovInst = X86::MOVZX32rr16; break;
|
|
|
|
case MVT::i32: MovInst = X86::MOV32rr; break;
|
|
|
|
default: llvm_unreachable("Unexpected zext to i64 source type");
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Result32 = createResultReg(&X86::GR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
|
|
|
|
.addReg(ResultReg);
|
|
|
|
|
|
|
|
ResultReg = createResultReg(&X86::GR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
|
|
|
|
ResultReg)
|
|
|
|
.addImm(0).addReg(Result32).addImm(X86::sub_32bit);
|
2017-09-03 02:53:46 +08:00
|
|
|
} else if (DstVT == MVT::i16) {
|
|
|
|
// i8->i16 doesn't exist in the autogenerated isel table. Need to zero
|
|
|
|
// extend to 32-bits and then extract down to 16-bits.
|
|
|
|
unsigned Result32 = createResultReg(&X86::GR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
|
|
|
|
Result32).addReg(ResultReg);
|
|
|
|
|
|
|
|
ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
|
|
|
|
X86::sub_16bit);
|
2015-02-02 00:15:07 +08:00
|
|
|
} else if (DstVT != MVT::i8) {
|
|
|
|
ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
|
|
|
|
ResultReg, /*Kill=*/true);
|
|
|
|
if (ResultReg == 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-03 02:53:46 +08:00
|
|
|
bool X86FastISel::X86SelectSExt(const Instruction *I) {
|
|
|
|
EVT DstVT = TLI.getValueType(DL, I->getType());
|
|
|
|
if (!TLI.isTypeLegal(DstVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned ResultReg = getRegForValue(I->getOperand(0));
|
|
|
|
if (ResultReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Handle sign-extension from i1 to i8.
|
|
|
|
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
|
|
|
|
if (SrcVT == MVT::i1) {
|
|
|
|
// Set the high bits to zero.
|
|
|
|
unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
|
|
|
|
/*TODO: Kill=*/false);
|
|
|
|
if (ZExtReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Negate the result to make an 8-bit sign extended value.
|
|
|
|
ResultReg = createResultReg(&X86::GR8RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
|
|
|
|
ResultReg).addReg(ZExtReg);
|
|
|
|
|
|
|
|
SrcVT = MVT::i8;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DstVT == MVT::i16) {
|
|
|
|
// i8->i16 doesn't exist in the autogenerated isel table. Need to sign
|
|
|
|
// extend to 32-bits and then extract down to 16-bits.
|
|
|
|
unsigned Result32 = createResultReg(&X86::GR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
|
|
|
|
Result32).addReg(ResultReg);
|
|
|
|
|
|
|
|
ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
|
|
|
|
X86::sub_16bit);
|
|
|
|
} else if (DstVT != MVT::i8) {
|
|
|
|
ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
|
|
|
|
ResultReg, /*Kill=*/true);
|
|
|
|
if (ResultReg == 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
bool X86FastISel::X86SelectBranch(const Instruction *I) {
|
|
|
|
// Unconditional branches are selected by tablegen-generated code.
|
|
|
|
// Handle a conditional branch.
|
|
|
|
const BranchInst *BI = cast<BranchInst>(I);
|
|
|
|
MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
|
|
|
|
MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
|
|
|
|
|
|
|
|
// Fold the common case of a conditional branch with a comparison
|
|
|
|
// in the same block (values defined on other blocks may not have
|
|
|
|
// initialized registers).
|
|
|
|
X86::CondCode CC;
|
|
|
|
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
|
|
|
|
if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// Try to optimize or fold the cmp.
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
|
|
|
switch (Predicate) {
|
|
|
|
default: break;
|
|
|
|
case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
|
|
|
|
case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const Value *CmpLHS = CI->getOperand(0);
|
|
|
|
const Value *CmpRHS = CI->getOperand(1);
|
|
|
|
|
|
|
|
// The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
|
|
|
|
// 0.0.
|
|
|
|
// We don't have to materialize a zero constant for this case and can just
|
|
|
|
// use %x again on the RHS.
|
|
|
|
if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
|
|
|
|
const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
|
|
|
|
if (CmpRHSC && CmpRHSC->isNullValue())
|
|
|
|
CmpRHS = CmpLHS;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to take advantage of fallthrough opportunities.
|
|
|
|
if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
|
|
|
|
std::swap(TrueMBB, FalseMBB);
|
|
|
|
Predicate = CmpInst::getInversePredicate(Predicate);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
|
|
|
|
// code check. Instead two branch instructions are required to check all
|
|
|
|
// the flags. First we change the predicate to a supported condition code,
|
|
|
|
// which will be the first branch. Later one we will emit the second
|
|
|
|
// branch.
|
|
|
|
bool NeedExtraBranch = false;
|
|
|
|
switch (Predicate) {
|
|
|
|
default: break;
|
|
|
|
case CmpInst::FCMP_OEQ:
|
2016-08-17 13:10:15 +08:00
|
|
|
std::swap(TrueMBB, FalseMBB);
|
|
|
|
LLVM_FALLTHROUGH;
|
2015-02-02 00:15:07 +08:00
|
|
|
case CmpInst::FCMP_UNE:
|
|
|
|
NeedExtraBranch = true;
|
|
|
|
Predicate = CmpInst::FCMP_ONE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool SwapArgs;
|
|
|
|
unsigned BranchOpc;
|
2017-05-11 14:36:37 +08:00
|
|
|
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
|
2015-02-02 00:15:07 +08:00
|
|
|
assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
|
|
|
|
|
|
|
|
BranchOpc = X86::GetCondBranchFromCond(CC);
|
|
|
|
if (SwapArgs)
|
|
|
|
std::swap(CmpLHS, CmpRHS);
|
|
|
|
|
|
|
|
// Emit a compare of the LHS and RHS, setting the flags.
|
|
|
|
if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
|
|
|
|
.addMBB(TrueMBB);
|
|
|
|
|
|
|
|
// X86 requires a second branch to handle UNE (and OEQ, which is mapped
|
|
|
|
// to UNE above).
|
|
|
|
if (NeedExtraBranch) {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
|
|
|
|
.addMBB(TrueMBB);
|
|
|
|
}
|
|
|
|
|
2015-08-26 09:38:00 +08:00
|
|
|
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
|
2015-02-02 00:15:07 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
|
|
|
|
// Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
|
|
|
|
// typically happen for _Bool and C++ bools.
|
|
|
|
MVT SourceVT;
|
|
|
|
if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
|
|
|
|
isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
|
|
|
|
unsigned TestOpc = 0;
|
|
|
|
switch (SourceVT.SimpleTy) {
|
|
|
|
default: break;
|
|
|
|
case MVT::i8: TestOpc = X86::TEST8ri; break;
|
|
|
|
case MVT::i16: TestOpc = X86::TEST16ri; break;
|
|
|
|
case MVT::i32: TestOpc = X86::TEST32ri; break;
|
|
|
|
case MVT::i64: TestOpc = X86::TEST64ri32; break;
|
|
|
|
}
|
|
|
|
if (TestOpc) {
|
|
|
|
unsigned OpReg = getRegForValue(TI->getOperand(0));
|
|
|
|
if (OpReg == 0) return false;
|
2016-08-21 16:02:27 +08:00
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
|
|
|
|
.addReg(OpReg).addImm(1);
|
|
|
|
|
|
|
|
unsigned JmpOpc = X86::JNE_1;
|
|
|
|
if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
|
|
|
|
std::swap(TrueMBB, FalseMBB);
|
|
|
|
JmpOpc = X86::JE_1;
|
|
|
|
}
|
|
|
|
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
|
|
|
|
.addMBB(TrueMBB);
|
2015-08-26 09:38:00 +08:00
|
|
|
|
|
|
|
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
|
2015-02-02 00:15:07 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
|
|
|
|
// Fake request the condition, otherwise the intrinsic might be completely
|
|
|
|
// optimized away.
|
|
|
|
unsigned TmpReg = getRegForValue(BI->getCondition());
|
|
|
|
if (TmpReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
|
|
|
|
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
|
|
|
|
.addMBB(TrueMBB);
|
2015-08-26 09:38:00 +08:00
|
|
|
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
|
2015-02-02 00:15:07 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise do a clumsy setcc and re-test it.
|
|
|
|
// Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
|
|
|
|
// in an explicit cast, so make sure to handle that correctly.
|
|
|
|
unsigned OpReg = getRegForValue(BI->getCondition());
|
|
|
|
if (OpReg == 0) return false;
|
|
|
|
|
2016-09-28 19:22:17 +08:00
|
|
|
// In case OpReg is a K register, COPY to a GPR
|
|
|
|
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
|
|
|
|
unsigned KOpReg = OpReg;
|
2017-03-29 00:35:29 +08:00
|
|
|
OpReg = createResultReg(&X86::GR32RegClass);
|
2016-09-28 19:22:17 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), OpReg)
|
|
|
|
.addReg(KOpReg);
|
2017-03-29 00:35:29 +08:00
|
|
|
OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
|
|
|
|
X86::sub_8bit);
|
2016-09-28 19:22:17 +08:00
|
|
|
}
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
|
|
|
|
.addReg(OpReg)
|
|
|
|
.addImm(1);
|
2015-02-02 00:15:07 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
|
|
|
|
.addMBB(TrueMBB);
|
2015-08-26 09:38:00 +08:00
|
|
|
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
|
2015-02-02 00:15:07 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86SelectShift(const Instruction *I) {
|
|
|
|
unsigned CReg = 0, OpReg = 0;
|
|
|
|
const TargetRegisterClass *RC = nullptr;
|
2018-03-15 01:57:19 +08:00
|
|
|
if (I->getType()->isIntegerTy(8)) {
|
|
|
|
CReg = X86::CL;
|
|
|
|
RC = &X86::GR8RegClass;
|
|
|
|
switch (I->getOpcode()) {
|
|
|
|
case Instruction::LShr: OpReg = X86::SHR8rCL; break;
|
|
|
|
case Instruction::AShr: OpReg = X86::SAR8rCL; break;
|
|
|
|
case Instruction::Shl: OpReg = X86::SHL8rCL; break;
|
|
|
|
default: return false;
|
|
|
|
}
|
|
|
|
} else if (I->getType()->isIntegerTy(16)) {
|
2015-02-02 00:15:07 +08:00
|
|
|
CReg = X86::CX;
|
|
|
|
RC = &X86::GR16RegClass;
|
|
|
|
switch (I->getOpcode()) {
|
2017-10-29 03:56:56 +08:00
|
|
|
default: llvm_unreachable("Unexpected shift opcode");
|
2015-02-02 00:15:07 +08:00
|
|
|
case Instruction::LShr: OpReg = X86::SHR16rCL; break;
|
|
|
|
case Instruction::AShr: OpReg = X86::SAR16rCL; break;
|
|
|
|
case Instruction::Shl: OpReg = X86::SHL16rCL; break;
|
|
|
|
}
|
|
|
|
} else if (I->getType()->isIntegerTy(32)) {
|
|
|
|
CReg = X86::ECX;
|
|
|
|
RC = &X86::GR32RegClass;
|
|
|
|
switch (I->getOpcode()) {
|
2017-10-29 03:56:56 +08:00
|
|
|
default: llvm_unreachable("Unexpected shift opcode");
|
2015-02-02 00:15:07 +08:00
|
|
|
case Instruction::LShr: OpReg = X86::SHR32rCL; break;
|
|
|
|
case Instruction::AShr: OpReg = X86::SAR32rCL; break;
|
|
|
|
case Instruction::Shl: OpReg = X86::SHL32rCL; break;
|
|
|
|
}
|
|
|
|
} else if (I->getType()->isIntegerTy(64)) {
|
|
|
|
CReg = X86::RCX;
|
|
|
|
RC = &X86::GR64RegClass;
|
|
|
|
switch (I->getOpcode()) {
|
2017-10-29 03:56:56 +08:00
|
|
|
default: llvm_unreachable("Unexpected shift opcode");
|
2015-02-02 00:15:07 +08:00
|
|
|
case Instruction::LShr: OpReg = X86::SHR64rCL; break;
|
|
|
|
case Instruction::AShr: OpReg = X86::SAR64rCL; break;
|
|
|
|
case Instruction::Shl: OpReg = X86::SHL64rCL; break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(I->getType(), VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Op0Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (Op0Reg == 0) return false;
|
|
|
|
|
|
|
|
unsigned Op1Reg = getRegForValue(I->getOperand(1));
|
|
|
|
if (Op1Reg == 0) return false;
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
|
|
|
|
CReg).addReg(Op1Reg);
|
|
|
|
|
|
|
|
// The shift instruction uses X86::CL. If we defined a super-register
|
|
|
|
// of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
|
2018-03-15 01:57:19 +08:00
|
|
|
if (CReg != X86::CL)
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::KILL), X86::CL)
|
|
|
|
.addReg(CReg, RegState::Kill);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
|
|
|
|
.addReg(Op0Reg);
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86SelectDivRem(const Instruction *I) {
|
|
|
|
const static unsigned NumTypes = 4; // i8, i16, i32, i64
|
|
|
|
const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
|
|
|
|
const static bool S = true; // IsSigned
|
|
|
|
const static bool U = false; // !IsSigned
|
|
|
|
const static unsigned Copy = TargetOpcode::COPY;
|
|
|
|
// For the X86 DIV/IDIV instruction, in most cases the dividend
|
|
|
|
// (numerator) must be in a specific register pair highreg:lowreg,
|
|
|
|
// producing the quotient in lowreg and the remainder in highreg.
|
|
|
|
// For most data types, to set up the instruction, the dividend is
|
|
|
|
// copied into lowreg, and lowreg is sign-extended or zero-extended
|
|
|
|
// into highreg. The exception is i8, where the dividend is defined
|
|
|
|
// as a single register rather than a register pair, and we
|
|
|
|
// therefore directly sign-extend or zero-extend the dividend into
|
|
|
|
// lowreg, instead of copying, and ignore the highreg.
|
|
|
|
const static struct DivRemEntry {
|
|
|
|
// The following portion depends only on the data type.
|
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
unsigned LowInReg; // low part of the register pair
|
|
|
|
unsigned HighInReg; // high part of the register pair
|
|
|
|
// The following portion depends on both the data type and the operation.
|
|
|
|
struct DivRemResult {
|
|
|
|
unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
|
|
|
|
unsigned OpSignExtend; // Opcode for sign-extending lowreg into
|
|
|
|
// highreg, or copying a zero into highreg.
|
|
|
|
unsigned OpCopy; // Opcode for copying dividend into lowreg, or
|
|
|
|
// zero/sign-extending into lowreg for i8.
|
|
|
|
unsigned DivRemResultReg; // Register containing the desired result.
|
|
|
|
bool IsOpSigned; // Whether to use signed or unsigned form.
|
|
|
|
} ResultTable[NumOps];
|
|
|
|
} OpTable[NumTypes] = {
|
|
|
|
{ &X86::GR8RegClass, X86::AX, 0, {
|
|
|
|
{ X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
|
|
|
|
{ X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
|
|
|
|
{ X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
|
|
|
|
{ X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
|
|
|
|
}
|
|
|
|
}, // i8
|
|
|
|
{ &X86::GR16RegClass, X86::AX, X86::DX, {
|
|
|
|
{ X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
|
|
|
|
{ X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
|
|
|
|
{ X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
|
|
|
|
{ X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
|
|
|
|
}
|
|
|
|
}, // i16
|
|
|
|
{ &X86::GR32RegClass, X86::EAX, X86::EDX, {
|
|
|
|
{ X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
|
|
|
|
{ X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
|
|
|
|
{ X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
|
|
|
|
{ X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
|
|
|
|
}
|
|
|
|
}, // i32
|
|
|
|
{ &X86::GR64RegClass, X86::RAX, X86::RDX, {
|
|
|
|
{ X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
|
|
|
|
{ X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
|
2018-11-01 05:53:24 +08:00
|
|
|
{ X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
|
|
|
|
{ X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
}, // i64
|
|
|
|
};
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(I->getType(), VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned TypeIndex, OpIndex;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: return false;
|
|
|
|
case MVT::i8: TypeIndex = 0; break;
|
|
|
|
case MVT::i16: TypeIndex = 1; break;
|
|
|
|
case MVT::i32: TypeIndex = 2; break;
|
|
|
|
case MVT::i64: TypeIndex = 3;
|
|
|
|
if (!Subtarget->is64Bit())
|
|
|
|
return false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (I->getOpcode()) {
|
|
|
|
default: llvm_unreachable("Unexpected div/rem opcode");
|
|
|
|
case Instruction::SDiv: OpIndex = 0; break;
|
|
|
|
case Instruction::SRem: OpIndex = 1; break;
|
|
|
|
case Instruction::UDiv: OpIndex = 2; break;
|
|
|
|
case Instruction::URem: OpIndex = 3; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const DivRemEntry &TypeEntry = OpTable[TypeIndex];
|
|
|
|
const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
|
|
|
|
unsigned Op0Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (Op0Reg == 0)
|
|
|
|
return false;
|
|
|
|
unsigned Op1Reg = getRegForValue(I->getOperand(1));
|
|
|
|
if (Op1Reg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Move op0 into low-order input register.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
|
|
|
|
// Zero-extend or sign-extend into high-order input register.
|
|
|
|
if (OpEntry.OpSignExtend) {
|
|
|
|
if (OpEntry.IsOpSigned)
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(OpEntry.OpSignExtend));
|
|
|
|
else {
|
2018-11-01 05:53:24 +08:00
|
|
|
unsigned Zero32 = createResultReg(&X86::GR32RegClass);
|
2015-02-02 00:15:07 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2018-11-01 05:53:24 +08:00
|
|
|
TII.get(X86::MOV32r0), Zero32);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// Copy the zero into the appropriate sub/super/identical physical
|
|
|
|
// register. Unfortunately the operations needed are not uniform enough
|
|
|
|
// to fit neatly into the table above.
|
2018-11-01 05:53:24 +08:00
|
|
|
if (VT == MVT::i16) {
|
2015-02-02 00:15:07 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Copy), TypeEntry.HighInReg)
|
2018-11-01 05:53:24 +08:00
|
|
|
.addReg(Zero32, 0, X86::sub_16bit);
|
|
|
|
} else if (VT == MVT::i32) {
|
2015-02-02 00:15:07 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Copy), TypeEntry.HighInReg)
|
2018-11-01 05:53:24 +08:00
|
|
|
.addReg(Zero32);
|
|
|
|
} else if (VT == MVT::i64) {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
|
|
|
|
.addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
|
|
|
|
}
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// Generate the DIV/IDIV instruction.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
|
2017-11-29 01:15:09 +08:00
|
|
|
// For i8 remainder, we can't reference ah directly, as we'll end
|
|
|
|
// up with bogus copies like %r9b = COPY %ah. Reference ax
|
|
|
|
// instead to prevent ah references in a rex instruction.
|
2015-02-02 00:15:07 +08:00
|
|
|
//
|
|
|
|
// The current assumption of the fast register allocator is that isel
|
2017-09-27 05:35:11 +08:00
|
|
|
// won't generate explicit references to the GR8_NOREX registers. If
|
2015-02-02 00:15:07 +08:00
|
|
|
// the allocator and/or the backend get enhanced to be more robust in
|
|
|
|
// that regard, this can be, and should be, removed.
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
if ((I->getOpcode() == Instruction::SRem ||
|
|
|
|
I->getOpcode() == Instruction::URem) &&
|
|
|
|
OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
|
|
|
|
unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
|
|
|
|
unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Copy), SourceSuperReg).addReg(X86::AX);
|
|
|
|
|
|
|
|
// Shift AX right by 8 bits instead of using AH.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
|
|
|
|
ResultSuperReg).addReg(SourceSuperReg).addImm(8);
|
|
|
|
|
|
|
|
// Now reference the 8-bit subreg of the result.
|
|
|
|
ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
|
|
|
|
/*Kill=*/true, X86::sub_8bit);
|
|
|
|
}
|
|
|
|
// Copy the result out of the physreg if we haven't already.
|
|
|
|
if (!ResultReg) {
|
|
|
|
ResultReg = createResultReg(TypeEntry.RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
|
|
|
|
.addReg(OpEntry.DivRemResultReg);
|
|
|
|
}
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Emit a conditional move instruction (if the are supported) to lower
|
2015-02-02 00:15:07 +08:00
|
|
|
/// the select.
|
|
|
|
bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
|
|
|
|
// Check if the subtarget supports these instructions.
|
|
|
|
if (!Subtarget->hasCMov())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// FIXME: Add support for i8.
|
|
|
|
if (RetVT < MVT::i16 || RetVT > MVT::i64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Value *Cond = I->getOperand(0);
|
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
|
|
|
|
bool NeedTest = true;
|
|
|
|
X86::CondCode CC = X86::COND_NE;
|
|
|
|
|
|
|
|
// Optimize conditions coming from a compare if both instructions are in the
|
|
|
|
// same basic block (values defined in other basic blocks may not have
|
|
|
|
// initialized registers).
|
|
|
|
const auto *CI = dyn_cast<CmpInst>(Cond);
|
|
|
|
if (CI && (CI->getParent() == I->getParent())) {
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
|
|
|
|
|
|
|
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
|
2016-09-05 15:14:21 +08:00
|
|
|
static const uint16_t SETFOpcTable[2][3] = {
|
2015-02-02 00:15:07 +08:00
|
|
|
{ X86::SETNPr, X86::SETEr , X86::TEST8rr },
|
|
|
|
{ X86::SETPr, X86::SETNEr, X86::OR8rr }
|
|
|
|
};
|
2016-09-05 15:14:21 +08:00
|
|
|
const uint16_t *SETFOpc = nullptr;
|
2015-02-02 00:15:07 +08:00
|
|
|
switch (Predicate) {
|
|
|
|
default: break;
|
|
|
|
case CmpInst::FCMP_OEQ:
|
|
|
|
SETFOpc = &SETFOpcTable[0][0];
|
|
|
|
Predicate = CmpInst::ICMP_NE;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UNE:
|
|
|
|
SETFOpc = &SETFOpcTable[1][0];
|
|
|
|
Predicate = CmpInst::ICMP_NE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool NeedSwap;
|
2017-05-11 14:36:37 +08:00
|
|
|
std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
|
2015-02-02 00:15:07 +08:00
|
|
|
assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
|
|
|
|
|
|
|
|
const Value *CmpLHS = CI->getOperand(0);
|
|
|
|
const Value *CmpRHS = CI->getOperand(1);
|
|
|
|
if (NeedSwap)
|
|
|
|
std::swap(CmpLHS, CmpRHS);
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
|
2015-02-02 00:15:07 +08:00
|
|
|
// Emit a compare of the LHS and RHS, setting the flags.
|
|
|
|
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (SETFOpc) {
|
|
|
|
unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
|
|
|
|
unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
|
|
|
|
FlagReg1);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
|
|
|
|
FlagReg2);
|
|
|
|
auto const &II = TII.get(SETFOpc[2]);
|
|
|
|
if (II.getNumDefs()) {
|
|
|
|
unsigned TmpReg = createResultReg(&X86::GR8RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
|
|
|
|
.addReg(FlagReg2).addReg(FlagReg1);
|
|
|
|
} else {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
|
|
|
|
.addReg(FlagReg2).addReg(FlagReg1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
NeedTest = false;
|
|
|
|
} else if (foldX86XALUIntrinsic(CC, I, Cond)) {
|
|
|
|
// Fake request the condition, otherwise the intrinsic might be completely
|
|
|
|
// optimized away.
|
|
|
|
unsigned TmpReg = getRegForValue(Cond);
|
|
|
|
if (TmpReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
NeedTest = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NeedTest) {
|
|
|
|
// Selects operate on i1, however, CondReg is 8 bits width and may contain
|
|
|
|
// garbage. Indeed, only the less significant bit is supposed to be
|
|
|
|
// accurate. If we read more than the lsb, we may see non-zero values
|
|
|
|
// whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
|
|
|
|
// the select. This is achieved by performing TEST against 1.
|
|
|
|
unsigned CondReg = getRegForValue(Cond);
|
|
|
|
if (CondReg == 0)
|
|
|
|
return false;
|
|
|
|
bool CondIsKill = hasTrivialKill(Cond);
|
|
|
|
|
2016-09-28 19:22:17 +08:00
|
|
|
// In case OpReg is a K register, COPY to a GPR
|
|
|
|
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
|
|
|
|
unsigned KCondReg = CondReg;
|
2017-03-29 00:35:29 +08:00
|
|
|
CondReg = createResultReg(&X86::GR32RegClass);
|
2016-08-21 16:02:27 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2016-09-28 19:22:17 +08:00
|
|
|
TII.get(TargetOpcode::COPY), CondReg)
|
|
|
|
.addReg(KCondReg, getKillRegState(CondIsKill));
|
2017-03-29 00:35:29 +08:00
|
|
|
CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
|
|
|
|
X86::sub_8bit);
|
2016-09-28 19:22:17 +08:00
|
|
|
}
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
|
|
|
|
.addReg(CondReg, getKillRegState(CondIsKill))
|
|
|
|
.addImm(1);
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const Value *LHS = I->getOperand(1);
|
|
|
|
const Value *RHS = I->getOperand(2);
|
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
|
|
|
|
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
|
|
|
if (!LHSReg || !RHSReg)
|
|
|
|
return false;
|
|
|
|
|
2017-04-25 02:55:33 +08:00
|
|
|
const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
|
|
|
|
unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
|
|
|
|
LHSReg, LHSIsKill);
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Emit SSE or AVX instructions to lower the select.
|
2015-02-02 00:15:07 +08:00
|
|
|
///
|
|
|
|
/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
|
|
|
|
/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
|
2015-03-06 05:46:54 +08:00
|
|
|
/// SSE instructions are available. If AVX is available, try to use a VBLENDV.
|
2015-02-02 00:15:07 +08:00
|
|
|
bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
|
|
|
|
// Optimize conditions coming from a compare if both instructions are in the
|
|
|
|
// same basic block (values defined in other basic blocks may not have
|
|
|
|
// initialized registers).
|
|
|
|
const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
|
|
|
|
if (!CI || (CI->getParent() != I->getParent()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (I->getType() != CI->getOperand(0)->getType() ||
|
|
|
|
!((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
|
|
|
|
(Subtarget->hasSSE2() && RetVT == MVT::f64)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Value *CmpLHS = CI->getOperand(0);
|
|
|
|
const Value *CmpRHS = CI->getOperand(1);
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
|
|
|
|
|
|
|
// The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
|
|
|
|
// We don't have to materialize a zero constant for this case and can just use
|
|
|
|
// %x again on the RHS.
|
|
|
|
if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
|
|
|
|
const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
|
|
|
|
if (CmpRHSC && CmpRHSC->isNullValue())
|
|
|
|
CmpRHS = CmpLHS;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned CC;
|
|
|
|
bool NeedSwap;
|
|
|
|
std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
|
2017-10-09 09:05:15 +08:00
|
|
|
if (CC > 7 && !Subtarget->hasAVX())
|
2015-02-02 00:15:07 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
if (NeedSwap)
|
|
|
|
std::swap(CmpLHS, CmpRHS);
|
|
|
|
|
2015-03-06 05:46:54 +08:00
|
|
|
// Choose the SSE instruction sequence based on data type (float or double).
|
2016-09-05 15:14:21 +08:00
|
|
|
static const uint16_t OpcTable[2][4] = {
|
2016-12-06 12:58:39 +08:00
|
|
|
{ X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
|
|
|
|
{ X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
|
2015-02-02 00:15:07 +08:00
|
|
|
};
|
|
|
|
|
2016-09-05 15:14:21 +08:00
|
|
|
const uint16_t *Opc = nullptr;
|
2015-02-02 00:15:07 +08:00
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default: return false;
|
2015-03-06 05:46:54 +08:00
|
|
|
case MVT::f32: Opc = &OpcTable[0][0]; break;
|
|
|
|
case MVT::f64: Opc = &OpcTable[1][0]; break;
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const Value *LHS = I->getOperand(1);
|
|
|
|
const Value *RHS = I->getOperand(2);
|
|
|
|
|
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
|
|
|
|
|
|
|
unsigned CmpLHSReg = getRegForValue(CmpLHS);
|
|
|
|
bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
|
|
|
|
|
|
|
|
unsigned CmpRHSReg = getRegForValue(CmpRHS);
|
|
|
|
bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
|
|
|
|
|
|
|
|
if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
|
2015-03-06 05:46:54 +08:00
|
|
|
unsigned ResultReg;
|
2016-12-05 12:51:31 +08:00
|
|
|
|
|
|
|
if (Subtarget->hasAVX512()) {
|
|
|
|
// If we have AVX512 we can use a mask compare and masked movss/sd.
|
|
|
|
const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
|
|
|
|
const TargetRegisterClass *VK1 = &X86::VK1RegClass;
|
|
|
|
|
|
|
|
unsigned CmpOpcode =
|
2016-12-05 14:09:55 +08:00
|
|
|
(RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
|
2016-12-05 12:51:31 +08:00
|
|
|
unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
|
|
|
|
CmpRHSReg, CmpRHSIsKill, CC);
|
|
|
|
|
|
|
|
// Need an IMPLICIT_DEF for the input that is used to generate the upper
|
|
|
|
// bits of the result register since its not based on any of the inputs.
|
|
|
|
unsigned ImplicitDefReg = createResultReg(VR128X);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
|
|
|
|
|
|
|
|
// Place RHSReg is the passthru of the masked movss/sd operation and put
|
|
|
|
// LHS in the input. The mask input comes from the compare.
|
|
|
|
unsigned MovOpcode =
|
2016-12-05 14:09:55 +08:00
|
|
|
(RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
|
2016-12-05 12:51:31 +08:00
|
|
|
unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
|
|
|
|
CmpReg, true, ImplicitDefReg, true,
|
|
|
|
LHSReg, LHSIsKill);
|
|
|
|
|
|
|
|
ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
|
|
|
|
|
|
|
|
} else if (Subtarget->hasAVX()) {
|
2015-09-01 02:25:11 +08:00
|
|
|
const TargetRegisterClass *VR128 = &X86::VR128RegClass;
|
|
|
|
|
2015-03-06 05:46:54 +08:00
|
|
|
// If we have AVX, create 1 blendv instead of 3 logic instructions.
|
|
|
|
// Blendv was introduced with SSE 4.1, but the 2 register form implicitly
|
|
|
|
// uses XMM0 as the selection register. That may need just as many
|
|
|
|
// instructions as the AND/ANDN/OR sequence due to register moves, so
|
|
|
|
// don't bother.
|
|
|
|
unsigned CmpOpcode =
|
2016-12-05 14:09:55 +08:00
|
|
|
(RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
|
2015-03-06 05:46:54 +08:00
|
|
|
unsigned BlendOpcode =
|
2016-12-05 14:09:55 +08:00
|
|
|
(RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
|
|
|
|
|
2016-12-05 12:51:31 +08:00
|
|
|
unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
|
2015-03-06 05:46:54 +08:00
|
|
|
CmpRHSReg, CmpRHSIsKill, CC);
|
2015-09-01 02:25:11 +08:00
|
|
|
unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
|
|
|
|
LHSReg, LHSIsKill, CmpReg, true);
|
|
|
|
ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
|
2015-03-06 05:46:54 +08:00
|
|
|
} else {
|
2016-12-06 12:58:39 +08:00
|
|
|
const TargetRegisterClass *VR128 = &X86::VR128RegClass;
|
2015-03-06 05:46:54 +08:00
|
|
|
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
|
|
|
|
CmpRHSReg, CmpRHSIsKill, CC);
|
2016-12-06 12:58:39 +08:00
|
|
|
unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
|
2015-03-06 05:46:54 +08:00
|
|
|
LHSReg, LHSIsKill);
|
2016-12-06 12:58:39 +08:00
|
|
|
unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
|
2015-03-06 05:46:54 +08:00
|
|
|
RHSReg, RHSIsKill);
|
2016-12-06 12:58:39 +08:00
|
|
|
unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
|
|
|
|
AndReg, /*IsKill=*/true);
|
|
|
|
ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
|
2015-03-06 05:46:54 +08:00
|
|
|
}
|
2015-02-02 00:15:07 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
|
|
|
|
// These are pseudo CMOV instructions and will be later expanded into control-
|
|
|
|
// flow.
|
|
|
|
unsigned Opc;
|
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default: return false;
|
|
|
|
case MVT::i8: Opc = X86::CMOV_GR8; break;
|
|
|
|
case MVT::i16: Opc = X86::CMOV_GR16; break;
|
|
|
|
case MVT::i32: Opc = X86::CMOV_GR32; break;
|
|
|
|
case MVT::f32: Opc = X86::CMOV_FR32; break;
|
|
|
|
case MVT::f64: Opc = X86::CMOV_FR64; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const Value *Cond = I->getOperand(0);
|
|
|
|
X86::CondCode CC = X86::COND_NE;
|
|
|
|
|
|
|
|
// Optimize conditions coming from a compare if both instructions are in the
|
|
|
|
// same basic block (values defined in other basic blocks may not have
|
|
|
|
// initialized registers).
|
|
|
|
const auto *CI = dyn_cast<CmpInst>(Cond);
|
|
|
|
if (CI && (CI->getParent() == I->getParent())) {
|
|
|
|
bool NeedSwap;
|
2017-05-11 14:36:37 +08:00
|
|
|
std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
|
2015-02-02 00:15:07 +08:00
|
|
|
if (CC > X86::LAST_VALID_COND)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Value *CmpLHS = CI->getOperand(0);
|
|
|
|
const Value *CmpRHS = CI->getOperand(1);
|
|
|
|
|
|
|
|
if (NeedSwap)
|
|
|
|
std::swap(CmpLHS, CmpRHS);
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
|
2015-02-02 00:15:07 +08:00
|
|
|
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
unsigned CondReg = getRegForValue(Cond);
|
|
|
|
if (CondReg == 0)
|
|
|
|
return false;
|
|
|
|
bool CondIsKill = hasTrivialKill(Cond);
|
2016-08-21 16:02:27 +08:00
|
|
|
|
2016-09-28 19:22:17 +08:00
|
|
|
// In case OpReg is a K register, COPY to a GPR
|
|
|
|
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
|
|
|
|
unsigned KCondReg = CondReg;
|
2017-03-29 00:35:29 +08:00
|
|
|
CondReg = createResultReg(&X86::GR32RegClass);
|
2016-08-21 16:02:27 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2016-09-28 19:22:17 +08:00
|
|
|
TII.get(TargetOpcode::COPY), CondReg)
|
|
|
|
.addReg(KCondReg, getKillRegState(CondIsKill));
|
2017-03-29 00:35:29 +08:00
|
|
|
CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
|
|
|
|
X86::sub_8bit);
|
2016-09-28 19:22:17 +08:00
|
|
|
}
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
|
|
|
|
.addReg(CondReg, getKillRegState(CondIsKill))
|
|
|
|
.addImm(1);
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const Value *LHS = I->getOperand(1);
|
|
|
|
const Value *RHS = I->getOperand(2);
|
|
|
|
|
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
|
|
|
|
|
|
|
if (!LHSReg || !RHSReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
|
|
|
|
|
|
|
|
unsigned ResultReg =
|
|
|
|
fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86SelectSelect(const Instruction *I) {
|
|
|
|
MVT RetVT;
|
|
|
|
if (!isTypeLegal(I->getType(), RetVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check if we can fold the select.
|
|
|
|
if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
|
|
|
const Value *Opnd = nullptr;
|
|
|
|
switch (Predicate) {
|
|
|
|
default: break;
|
|
|
|
case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
|
|
|
|
case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
|
|
|
|
}
|
|
|
|
// No need for a select anymore - this is an unconditional move.
|
|
|
|
if (Opnd) {
|
|
|
|
unsigned OpReg = getRegForValue(Opnd);
|
|
|
|
if (OpReg == 0)
|
|
|
|
return false;
|
|
|
|
bool OpIsKill = hasTrivialKill(Opnd);
|
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(OpReg, getKillRegState(OpIsKill));
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// First try to use real conditional move instructions.
|
|
|
|
if (X86FastEmitCMoveSelect(RetVT, I))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Try to use a sequence of SSE instructions to simulate a conditional move.
|
|
|
|
if (X86FastEmitSSESelect(RetVT, I))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// Fall-back to pseudo conditional move instructions, which will be later
|
|
|
|
// converted to control-flow.
|
|
|
|
if (X86FastEmitPseudoSelect(RetVT, I))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-07-14 06:09:30 +08:00
|
|
|
// Common code for X86SelectSIToFP and X86SelectUIToFP.
|
|
|
|
bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) {
|
2015-04-20 19:56:59 +08:00
|
|
|
// The target-independent selection algorithm in FastISel already knows how
|
|
|
|
// to select a SINT_TO_FP if the target is SSE but not AVX.
|
|
|
|
// Early exit if the subtarget doesn't have AVX.
|
2018-07-14 06:09:30 +08:00
|
|
|
// Unsigned conversion requires avx512.
|
|
|
|
bool HasAVX512 = Subtarget->hasAVX512();
|
|
|
|
if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512))
|
2015-04-20 19:56:59 +08:00
|
|
|
return false;
|
|
|
|
|
2018-07-14 05:03:43 +08:00
|
|
|
// TODO: We could sign extend narrower types.
|
|
|
|
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
|
|
|
|
if (SrcVT != MVT::i32 && SrcVT != MVT::i64)
|
2015-02-18 07:40:58 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Select integer to float/double conversion.
|
|
|
|
unsigned OpReg = getRegForValue(I->getOperand(0));
|
|
|
|
if (OpReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Opcode;
|
|
|
|
|
2018-07-14 06:09:30 +08:00
|
|
|
static const uint16_t SCvtOpc[2][2][2] = {
|
2018-07-14 05:03:43 +08:00
|
|
|
{ { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr },
|
|
|
|
{ X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } },
|
|
|
|
{ { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr },
|
|
|
|
{ X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } },
|
|
|
|
};
|
2018-07-14 06:09:30 +08:00
|
|
|
static const uint16_t UCvtOpc[2][2] = {
|
|
|
|
{ X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr },
|
|
|
|
{ X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr },
|
|
|
|
};
|
2018-07-14 05:03:43 +08:00
|
|
|
bool Is64Bit = SrcVT == MVT::i64;
|
|
|
|
|
2015-03-04 22:23:25 +08:00
|
|
|
if (I->getType()->isDoubleTy()) {
|
2018-07-14 06:09:30 +08:00
|
|
|
// s/uitofp int -> double
|
|
|
|
Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit];
|
2015-03-04 22:23:25 +08:00
|
|
|
} else if (I->getType()->isFloatTy()) {
|
2018-07-14 06:09:30 +08:00
|
|
|
// s/uitofp int -> float
|
|
|
|
Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit];
|
2015-02-18 07:40:58 +08:00
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
|
2018-07-14 05:03:43 +08:00
|
|
|
MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT();
|
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT);
|
2015-03-04 22:23:25 +08:00
|
|
|
unsigned ImplicitDefReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
|
|
|
|
unsigned ResultReg =
|
|
|
|
fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
|
2015-02-18 07:40:58 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-14 06:09:30 +08:00
|
|
|
bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
|
|
|
|
return X86SelectIntToFP(I, /*IsSigned*/true);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86SelectUIToFP(const Instruction *I) {
|
|
|
|
return X86SelectIntToFP(I, /*IsSigned*/false);
|
|
|
|
}
|
|
|
|
|
2015-02-10 20:04:41 +08:00
|
|
|
// Helper method used by X86SelectFPExt and X86SelectFPTrunc.
|
|
|
|
bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
|
|
|
|
unsigned TargetOpc,
|
|
|
|
const TargetRegisterClass *RC) {
|
|
|
|
assert((I->getOpcode() == Instruction::FPExt ||
|
|
|
|
I->getOpcode() == Instruction::FPTrunc) &&
|
|
|
|
"Instruction must be an FPExt or FPTrunc!");
|
|
|
|
|
|
|
|
unsigned OpReg = getRegForValue(I->getOperand(0));
|
|
|
|
if (OpReg == 0)
|
|
|
|
return false;
|
|
|
|
|
2017-03-01 18:20:48 +08:00
|
|
|
unsigned ImplicitDefReg;
|
|
|
|
if (Subtarget->hasAVX()) {
|
|
|
|
ImplicitDefReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-02-10 20:04:41 +08:00
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
|
|
|
|
ResultReg);
|
2017-02-23 21:15:44 +08:00
|
|
|
|
2017-03-01 18:20:48 +08:00
|
|
|
if (Subtarget->hasAVX())
|
2017-02-23 21:15:44 +08:00
|
|
|
MIB.addReg(ImplicitDefReg);
|
2017-03-01 18:20:48 +08:00
|
|
|
|
2015-02-10 20:04:41 +08:00
|
|
|
MIB.addReg(OpReg);
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
|
2015-02-10 20:04:41 +08:00
|
|
|
if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
|
|
|
|
I->getOperand(0)->getType()->isFloatTy()) {
|
2017-10-29 10:50:31 +08:00
|
|
|
bool HasAVX512 = Subtarget->hasAVX512();
|
2015-02-10 20:04:41 +08:00
|
|
|
// fpext from float to double.
|
2017-10-29 10:50:31 +08:00
|
|
|
unsigned Opc =
|
|
|
|
HasAVX512 ? X86::VCVTSS2SDZrr
|
|
|
|
: Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
|
|
|
|
return X86SelectFPExtOrFPTrunc(
|
|
|
|
I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
|
2015-02-10 20:04:41 +08:00
|
|
|
if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
|
|
|
|
I->getOperand(0)->getType()->isDoubleTy()) {
|
2017-10-29 10:50:31 +08:00
|
|
|
bool HasAVX512 = Subtarget->hasAVX512();
|
2015-02-10 20:04:41 +08:00
|
|
|
// fptrunc from double to float.
|
2017-10-29 10:50:31 +08:00
|
|
|
unsigned Opc =
|
|
|
|
HasAVX512 ? X86::VCVTSD2SSZrr
|
|
|
|
: Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
|
|
|
|
return X86SelectFPExtOrFPTrunc(
|
|
|
|
I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::X86SelectTrunc(const Instruction *I) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
|
|
|
|
EVT DstVT = TLI.getValueType(DL, I->getType());
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// This code only handles truncation to byte.
|
2017-08-31 02:08:58 +08:00
|
|
|
if (DstVT != MVT::i8 && DstVT != MVT::i1)
|
2015-02-02 00:15:07 +08:00
|
|
|
return false;
|
|
|
|
if (!TLI.isTypeLegal(SrcVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned InputReg = getRegForValue(I->getOperand(0));
|
|
|
|
if (!InputReg)
|
|
|
|
// Unhandled operand. Halt "fast" selection and bail.
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (SrcVT == MVT::i8) {
|
|
|
|
// Truncate from i8 to i1; no code needed.
|
|
|
|
updateValueMap(I, InputReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Issue an extract_subreg.
|
|
|
|
unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
|
2017-09-19 03:21:21 +08:00
|
|
|
InputReg, false,
|
2015-02-02 00:15:07 +08:00
|
|
|
X86::sub_8bit);
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::IsMemcpySmall(uint64_t Len) {
|
|
|
|
return Len <= (Subtarget->is64Bit() ? 32 : 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
|
|
|
|
X86AddressMode SrcAM, uint64_t Len) {
|
|
|
|
|
|
|
|
// Make sure we don't bloat code by inlining very large memcpy's.
|
|
|
|
if (!IsMemcpySmall(Len))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
bool i64Legal = Subtarget->is64Bit();
|
|
|
|
|
|
|
|
// We don't care about alignment here since we just emit integer accesses.
|
|
|
|
while (Len) {
|
|
|
|
MVT VT;
|
|
|
|
if (Len >= 8 && i64Legal)
|
|
|
|
VT = MVT::i64;
|
|
|
|
else if (Len >= 4)
|
|
|
|
VT = MVT::i32;
|
|
|
|
else if (Len >= 2)
|
|
|
|
VT = MVT::i16;
|
|
|
|
else
|
|
|
|
VT = MVT::i8;
|
|
|
|
|
|
|
|
unsigned Reg;
|
|
|
|
bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
|
|
|
|
RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
|
|
|
|
assert(RV && "Failed to emit load or store??");
|
|
|
|
|
|
|
|
unsigned Size = VT.getSizeInBits()/8;
|
|
|
|
Len -= Size;
|
|
|
|
DestAM.Disp += Size;
|
|
|
|
SrcAM.Disp += Size;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
|
|
|
|
// FIXME: Handle more intrinsics.
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default: return false;
|
2015-02-21 03:37:14 +08:00
|
|
|
case Intrinsic::convert_from_fp16:
|
|
|
|
case Intrinsic::convert_to_fp16: {
|
2015-05-12 09:26:05 +08:00
|
|
|
if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
|
2015-02-21 03:37:14 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
const Value *Op = II->getArgOperand(0);
|
|
|
|
unsigned InputReg = getRegForValue(Op);
|
|
|
|
if (InputReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// F16C only allows converting from float to half and from half to float.
|
|
|
|
bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
|
|
|
|
if (IsFloatToHalf) {
|
|
|
|
if (!Op->getType()->isFloatTy())
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
if (!II->getType()->isFloatTy())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
|
|
|
|
if (IsFloatToHalf) {
|
|
|
|
// 'InputReg' is implicitly promoted from register class FR32 to
|
|
|
|
// register class VR128 by method 'constrainOperandRegClass' which is
|
|
|
|
// directly called by 'fastEmitInst_ri'.
|
|
|
|
// Instruction VCVTPS2PHrr takes an extra immediate operand which is
|
2016-02-02 09:44:03 +08:00
|
|
|
// used to provide rounding control: use MXCSR.RC, encoded as 0b100.
|
|
|
|
// It's consistent with the other FP instructions, which are usually
|
|
|
|
// controlled by MXCSR.
|
|
|
|
InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
|
2015-02-21 03:37:14 +08:00
|
|
|
|
|
|
|
// Move the lower 32-bits of ResultReg to another register of class GR32.
|
|
|
|
ResultReg = createResultReg(&X86::GR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(X86::VMOVPDI2DIrr), ResultReg)
|
|
|
|
.addReg(InputReg, RegState::Kill);
|
2018-07-31 03:41:25 +08:00
|
|
|
|
2015-02-21 03:37:14 +08:00
|
|
|
// The result value is in the lower 16-bits of ResultReg.
|
|
|
|
unsigned RegIdx = X86::sub_16bit;
|
|
|
|
ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
|
|
|
|
} else {
|
|
|
|
assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
|
|
|
|
// Explicitly sign-extend the input to 32-bit.
|
|
|
|
InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
|
|
|
|
/*Kill=*/false);
|
|
|
|
|
|
|
|
// The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
|
|
|
|
InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
|
|
|
|
InputReg, /*Kill=*/true);
|
|
|
|
|
|
|
|
InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
|
|
|
|
|
|
|
|
// The result value is in the lower 32-bits of ResultReg.
|
|
|
|
// Emit an explicit copy from register class VR128 to register class FR32.
|
|
|
|
ResultReg = createResultReg(&X86::FR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(InputReg, RegState::Kill);
|
|
|
|
}
|
|
|
|
|
|
|
|
updateValueMap(II, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
2015-02-02 00:15:07 +08:00
|
|
|
case Intrinsic::frameaddress: {
|
2015-02-11 06:00:34 +08:00
|
|
|
MachineFunction *MF = FuncInfo.MF;
|
|
|
|
if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
|
|
|
|
return false;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
Type *RetTy = II->getCalledFunction()->getReturnType();
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(RetTy, VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Opc;
|
|
|
|
const TargetRegisterClass *RC = nullptr;
|
|
|
|
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: llvm_unreachable("Invalid result type for frameaddress.");
|
|
|
|
case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
|
|
|
|
case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// This needs to be set before we call getPtrSizedFrameRegister, otherwise
|
|
|
|
// we get the wrong frame register.
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI = MF->getFrameInfo();
|
|
|
|
MFI.setFrameAddressIsTaken(true);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
2015-02-03 07:03:45 +08:00
|
|
|
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
|
2015-02-11 06:00:34 +08:00
|
|
|
unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
|
2015-02-02 00:15:07 +08:00
|
|
|
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
|
|
|
|
(FrameReg == X86::EBP && VT == MVT::i32)) &&
|
|
|
|
"Invalid Frame Register!");
|
|
|
|
|
2018-01-29 13:17:03 +08:00
|
|
|
// Always make a copy of the frame register to a vreg first, so that we
|
2015-02-02 00:15:07 +08:00
|
|
|
// never directly reference the frame register (the TwoAddressInstruction-
|
|
|
|
// Pass doesn't like that).
|
|
|
|
unsigned SrcReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
|
|
|
|
|
|
|
|
// Now recursively load from the frame address.
|
|
|
|
// movq (%rbp), %rax
|
|
|
|
// movq (%rax), %rax
|
|
|
|
// movq (%rax), %rax
|
|
|
|
// ...
|
|
|
|
unsigned DestReg;
|
|
|
|
unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
|
|
|
|
while (Depth--) {
|
|
|
|
DestReg = createResultReg(RC);
|
|
|
|
addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc), DestReg), SrcReg);
|
|
|
|
SrcReg = DestReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
updateValueMap(II, SrcReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case Intrinsic::memcpy: {
|
|
|
|
const MemCpyInst *MCI = cast<MemCpyInst>(II);
|
|
|
|
// Don't handle volatile or variable length memcpys.
|
|
|
|
if (MCI->isVolatile())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (isa<ConstantInt>(MCI->getLength())) {
|
|
|
|
// Small memcpy's are common enough that we want to do them
|
|
|
|
// without a call if possible.
|
|
|
|
uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
|
|
|
|
if (IsMemcpySmall(Len)) {
|
|
|
|
X86AddressMode DestAM, SrcAM;
|
|
|
|
if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
|
|
|
|
!X86SelectAddress(MCI->getRawSource(), SrcAM))
|
|
|
|
return false;
|
|
|
|
TryEmitSmallMemcpy(DestAM, SrcAM, Len);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
|
|
|
|
if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
|
|
|
|
return false;
|
|
|
|
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
case Intrinsic::memset: {
|
|
|
|
const MemSetInst *MSI = cast<MemSetInst>(II);
|
|
|
|
|
|
|
|
if (MSI->isVolatile())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
|
|
|
|
if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (MSI->getDestAddressSpace() > 255)
|
|
|
|
return false;
|
|
|
|
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
case Intrinsic::stackprotector: {
|
|
|
|
// Emit code to store the stack guard onto the stack.
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT PtrTy = TLI.getPointerTy(DL);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
const Value *Op1 = II->getArgOperand(0); // The guard's value.
|
|
|
|
const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
|
|
|
|
|
|
|
|
MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
|
|
|
|
|
|
|
|
// Grab the frame index.
|
|
|
|
X86AddressMode AM;
|
|
|
|
if (!X86SelectAddress(Slot, AM)) return false;
|
|
|
|
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case Intrinsic::dbg_declare: {
|
|
|
|
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
|
|
|
|
X86AddressMode AM;
|
|
|
|
assert(DI->getAddress() && "Null address should be checked earlier!");
|
|
|
|
if (!X86SelectAddress(DI->getAddress(), AM))
|
|
|
|
return false;
|
|
|
|
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
|
|
|
|
// FIXME may need to add RegState::Debug to any registers produced,
|
|
|
|
// although ESP/EBP should be the only ones at the moment.
|
2015-04-04 03:20:26 +08:00
|
|
|
assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
|
|
|
|
"Expected inlined-at fields to agree");
|
2015-02-02 00:15:07 +08:00
|
|
|
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
|
|
|
|
.addImm(0)
|
|
|
|
.addMetadata(DI->getVariable())
|
|
|
|
.addMetadata(DI->getExpression());
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case Intrinsic::trap: {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case Intrinsic::sqrt: {
|
|
|
|
if (!Subtarget->hasSSE1())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Type *RetTy = II->getCalledFunction()->getReturnType();
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(RetTy, VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
|
|
|
|
// is not generated by FastISel yet.
|
|
|
|
// FIXME: Update this code once tablegen can handle it.
|
2018-07-13 03:58:06 +08:00
|
|
|
static const uint16_t SqrtOpc[3][2] = {
|
|
|
|
{ X86::SQRTSSr, X86::SQRTSDr },
|
|
|
|
{ X86::VSQRTSSr, X86::VSQRTSDr },
|
|
|
|
{ X86::VSQRTSSZr, X86::VSQRTSDZr },
|
2015-02-02 00:15:07 +08:00
|
|
|
};
|
2018-07-13 03:58:06 +08:00
|
|
|
unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
|
|
|
|
Subtarget->hasAVX() ? 1 :
|
|
|
|
0;
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned Opc;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: return false;
|
2018-07-13 03:58:06 +08:00
|
|
|
case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break;
|
|
|
|
case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break;
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const Value *SrcVal = II->getArgOperand(0);
|
|
|
|
unsigned SrcReg = getRegForValue(SrcVal);
|
|
|
|
|
|
|
|
if (SrcReg == 0)
|
|
|
|
return false;
|
|
|
|
|
2018-07-13 03:58:06 +08:00
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned ImplicitDefReg = 0;
|
2018-07-13 03:58:06 +08:00
|
|
|
if (AVXLevel > 0) {
|
2015-02-02 00:15:07 +08:00
|
|
|
ImplicitDefReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
|
|
|
|
ResultReg);
|
|
|
|
|
|
|
|
if (ImplicitDefReg)
|
|
|
|
MIB.addReg(ImplicitDefReg);
|
|
|
|
|
|
|
|
MIB.addReg(SrcReg);
|
|
|
|
|
|
|
|
updateValueMap(II, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case Intrinsic::sadd_with_overflow:
|
|
|
|
case Intrinsic::uadd_with_overflow:
|
|
|
|
case Intrinsic::ssub_with_overflow:
|
|
|
|
case Intrinsic::usub_with_overflow:
|
|
|
|
case Intrinsic::smul_with_overflow:
|
|
|
|
case Intrinsic::umul_with_overflow: {
|
|
|
|
// This implements the basic lowering of the xalu with overflow intrinsics
|
|
|
|
// into add/sub/mul followed by either seto or setb.
|
|
|
|
const Function *Callee = II->getCalledFunction();
|
|
|
|
auto *Ty = cast<StructType>(Callee->getReturnType());
|
|
|
|
Type *RetTy = Ty->getTypeAtIndex(0U);
|
2016-11-15 21:50:35 +08:00
|
|
|
assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
|
|
|
|
Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
|
|
|
|
"Overflow value expected to be an i1");
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(RetTy, VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (VT < MVT::i8 || VT > MVT::i64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Value *LHS = II->getArgOperand(0);
|
|
|
|
const Value *RHS = II->getArgOperand(1);
|
|
|
|
|
|
|
|
// Canonicalize immediate to the RHS.
|
|
|
|
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
|
|
|
|
isCommutativeIntrinsic(II))
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
|
|
|
unsigned BaseOpc, CondOpc;
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default: llvm_unreachable("Unexpected intrinsic!");
|
|
|
|
case Intrinsic::sadd_with_overflow:
|
2019-01-03 03:01:05 +08:00
|
|
|
BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
|
2015-02-02 00:15:07 +08:00
|
|
|
case Intrinsic::uadd_with_overflow:
|
|
|
|
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
|
|
|
|
case Intrinsic::ssub_with_overflow:
|
2019-01-03 03:01:05 +08:00
|
|
|
BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
|
2015-02-02 00:15:07 +08:00
|
|
|
case Intrinsic::usub_with_overflow:
|
|
|
|
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
|
|
|
|
case Intrinsic::smul_with_overflow:
|
|
|
|
BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
|
|
|
|
case Intrinsic::umul_with_overflow:
|
|
|
|
BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
if (LHSReg == 0)
|
|
|
|
return false;
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
// Check if we have an immediate version.
|
|
|
|
if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
|
2016-06-02 12:19:42 +08:00
|
|
|
static const uint16_t Opc[2][4] = {
|
2015-02-02 00:15:07 +08:00
|
|
|
{ X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
|
|
|
|
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
|
|
|
|
};
|
|
|
|
|
2019-01-03 03:01:05 +08:00
|
|
|
if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
|
|
|
|
CondOpc == X86::SETOr) {
|
|
|
|
// We can use INC/DEC.
|
2015-02-02 00:15:07 +08:00
|
|
|
ResultReg = createResultReg(TLI.getRegClassFor(VT));
|
2019-01-03 03:01:05 +08:00
|
|
|
bool IsDec = BaseOpc == ISD::SUB;
|
2015-02-02 00:15:07 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
|
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill));
|
|
|
|
} else
|
|
|
|
ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
|
|
|
|
CI->getZExtValue());
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned RHSReg;
|
|
|
|
bool RHSIsKill;
|
|
|
|
if (!ResultReg) {
|
|
|
|
RHSReg = getRegForValue(RHS);
|
|
|
|
if (RHSReg == 0)
|
|
|
|
return false;
|
|
|
|
RHSIsKill = hasTrivialKill(RHS);
|
|
|
|
ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
|
|
|
|
RHSIsKill);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
|
|
|
|
// it manually.
|
|
|
|
if (BaseOpc == X86ISD::UMUL && !ResultReg) {
|
2016-03-02 12:42:31 +08:00
|
|
|
static const uint16_t MULOpc[] =
|
2015-02-02 00:15:07 +08:00
|
|
|
{ X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
|
2016-03-02 12:42:31 +08:00
|
|
|
static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
|
2015-02-02 00:15:07 +08:00
|
|
|
// First copy the first operand into RAX, which is an implicit input to
|
|
|
|
// the X86::MUL*r instruction.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
|
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill));
|
|
|
|
ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
|
|
|
|
TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
|
|
|
|
} else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
|
2016-03-02 12:42:31 +08:00
|
|
|
static const uint16_t MULOpc[] =
|
2015-02-02 00:15:07 +08:00
|
|
|
{ X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
|
|
|
|
if (VT == MVT::i8) {
|
|
|
|
// Copy the first operand into AL, which is an implicit input to the
|
|
|
|
// X86::IMUL8r instruction.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), X86::AL)
|
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill));
|
|
|
|
ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
|
|
|
|
RHSIsKill);
|
|
|
|
} else
|
|
|
|
ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
|
|
|
|
TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
|
|
|
|
RHSReg, RHSIsKill);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
2016-11-15 21:29:23 +08:00
|
|
|
// Assign to a GPR since the overflow return value is lowered to a SETcc.
|
|
|
|
unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
|
2015-02-02 00:15:07 +08:00
|
|
|
assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
|
|
|
|
ResultReg2);
|
|
|
|
|
|
|
|
updateValueMap(II, ResultReg, 2);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case Intrinsic::x86_sse_cvttss2si:
|
|
|
|
case Intrinsic::x86_sse_cvttss2si64:
|
|
|
|
case Intrinsic::x86_sse2_cvttsd2si:
|
|
|
|
case Intrinsic::x86_sse2_cvttsd2si64: {
|
|
|
|
bool IsInputDouble;
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default: llvm_unreachable("Unexpected intrinsic.");
|
|
|
|
case Intrinsic::x86_sse_cvttss2si:
|
|
|
|
case Intrinsic::x86_sse_cvttss2si64:
|
|
|
|
if (!Subtarget->hasSSE1())
|
|
|
|
return false;
|
|
|
|
IsInputDouble = false;
|
|
|
|
break;
|
|
|
|
case Intrinsic::x86_sse2_cvttsd2si:
|
|
|
|
case Intrinsic::x86_sse2_cvttsd2si64:
|
|
|
|
if (!Subtarget->hasSSE2())
|
|
|
|
return false;
|
|
|
|
IsInputDouble = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
Type *RetTy = II->getCalledFunction()->getReturnType();
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(RetTy, VT))
|
|
|
|
return false;
|
|
|
|
|
2018-07-13 02:03:56 +08:00
|
|
|
static const uint16_t CvtOpc[3][2][2] = {
|
|
|
|
{ { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr },
|
|
|
|
{ X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } },
|
|
|
|
{ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr },
|
|
|
|
{ X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } },
|
|
|
|
{ { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr },
|
|
|
|
{ X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } },
|
2015-02-02 00:15:07 +08:00
|
|
|
};
|
2018-07-13 02:03:56 +08:00
|
|
|
unsigned AVXLevel = Subtarget->hasAVX512() ? 2 :
|
|
|
|
Subtarget->hasAVX() ? 1 :
|
|
|
|
0;
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned Opc;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: llvm_unreachable("Unexpected result type.");
|
2018-07-13 02:03:56 +08:00
|
|
|
case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break;
|
|
|
|
case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break;
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we can fold insertelement instructions into the convert.
|
|
|
|
const Value *Op = II->getArgOperand(0);
|
|
|
|
while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
|
|
|
|
const Value *Index = IE->getOperand(2);
|
|
|
|
if (!isa<ConstantInt>(Index))
|
|
|
|
break;
|
|
|
|
unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
|
|
|
|
|
|
|
|
if (Idx == 0) {
|
|
|
|
Op = IE->getOperand(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Op = IE->getOperand(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Reg = getRegForValue(Op);
|
|
|
|
if (Reg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
|
|
|
|
.addReg(Reg);
|
|
|
|
|
|
|
|
updateValueMap(II, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::fastLowerArguments() {
|
|
|
|
if (!FuncInfo.CanLowerReturn)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Function *F = FuncInfo.Fn;
|
|
|
|
if (F->isVarArg())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
CallingConv::ID CC = F->getCallingConv();
|
|
|
|
if (CC != CallingConv::C)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (Subtarget->isCallingConvWin64(CC))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!Subtarget->is64Bit())
|
|
|
|
return false;
|
|
|
|
|
2017-07-12 23:26:06 +08:00
|
|
|
if (Subtarget->useSoftFloat())
|
|
|
|
return false;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
// Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
|
|
|
|
unsigned GPRCnt = 0;
|
|
|
|
unsigned FPRCnt = 0;
|
|
|
|
for (auto const &Arg : F->args()) {
|
2017-04-29 02:37:16 +08:00
|
|
|
if (Arg.hasAttribute(Attribute::ByVal) ||
|
|
|
|
Arg.hasAttribute(Attribute::InReg) ||
|
|
|
|
Arg.hasAttribute(Attribute::StructRet) ||
|
|
|
|
Arg.hasAttribute(Attribute::SwiftSelf) ||
|
|
|
|
Arg.hasAttribute(Attribute::SwiftError) ||
|
|
|
|
Arg.hasAttribute(Attribute::Nest))
|
2015-02-02 00:15:07 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
Type *ArgTy = Arg.getType();
|
|
|
|
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
|
|
|
|
return false;
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT ArgVT = TLI.getValueType(DL, ArgTy);
|
2015-02-02 00:15:07 +08:00
|
|
|
if (!ArgVT.isSimple()) return false;
|
|
|
|
switch (ArgVT.getSimpleVT().SimpleTy) {
|
|
|
|
default: return false;
|
|
|
|
case MVT::i32:
|
|
|
|
case MVT::i64:
|
|
|
|
++GPRCnt;
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
|
|
|
if (!Subtarget->hasSSE1())
|
|
|
|
return false;
|
|
|
|
++FPRCnt;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (GPRCnt > 6)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (FPRCnt > 8)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const MCPhysReg GPR32ArgRegs[] = {
|
|
|
|
X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
|
|
|
|
};
|
|
|
|
static const MCPhysReg GPR64ArgRegs[] = {
|
|
|
|
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
|
|
|
|
};
|
|
|
|
static const MCPhysReg XMMArgRegs[] = {
|
|
|
|
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
|
|
|
|
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
|
|
|
|
};
|
|
|
|
|
|
|
|
unsigned GPRIdx = 0;
|
|
|
|
unsigned FPRIdx = 0;
|
|
|
|
for (auto const &Arg : F->args()) {
|
2015-07-09 10:09:04 +08:00
|
|
|
MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
|
2015-02-02 00:15:07 +08:00
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
|
|
|
|
unsigned SrcReg;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: llvm_unreachable("Unexpected value type.");
|
|
|
|
case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
|
|
|
|
case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
|
2016-08-18 04:30:52 +08:00
|
|
|
case MVT::f32: LLVM_FALLTHROUGH;
|
2015-02-02 00:15:07 +08:00
|
|
|
case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
|
|
|
|
}
|
|
|
|
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
|
|
|
|
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
|
|
|
|
// Without this, EmitLiveInCopies may eliminate the livein if its only
|
|
|
|
// use is a bitcast (which isn't turned into an instruction).
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(DstReg, getKillRegState(true));
|
|
|
|
updateValueMap(&Arg, ResultReg);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-07-14 09:52:51 +08:00
|
|
|
static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
|
|
|
|
CallingConv::ID CC,
|
|
|
|
ImmutableCallSite *CS) {
|
2015-02-02 00:15:07 +08:00
|
|
|
if (Subtarget->is64Bit())
|
|
|
|
return 0;
|
|
|
|
if (Subtarget->getTargetTriple().isOSMSVCRT())
|
|
|
|
return 0;
|
|
|
|
if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
|
|
|
|
CC == CallingConv::HiPE)
|
|
|
|
return 0;
|
2015-11-05 04:33:45 +08:00
|
|
|
|
|
|
|
if (CS)
|
2017-04-15 04:19:02 +08:00
|
|
|
if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
|
|
|
|
CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
|
2015-11-05 04:33:45 +08:00
|
|
|
return 0;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
|
|
|
auto &OutVals = CLI.OutVals;
|
|
|
|
auto &OutFlags = CLI.OutFlags;
|
|
|
|
auto &OutRegs = CLI.OutRegs;
|
|
|
|
auto &Ins = CLI.Ins;
|
|
|
|
auto &InRegs = CLI.InRegs;
|
|
|
|
CallingConv::ID CC = CLI.CallConv;
|
|
|
|
bool &IsTailCall = CLI.IsTailCall;
|
|
|
|
bool IsVarArg = CLI.IsVarArg;
|
|
|
|
const Value *Callee = CLI.Callee;
|
2015-06-23 20:21:54 +08:00
|
|
|
MCSymbol *Symbol = CLI.Symbol;
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
bool Is64Bit = Subtarget->is64Bit();
|
|
|
|
bool IsWin64 = Subtarget->isCallingConvWin64(CC);
|
|
|
|
|
2017-05-03 21:07:19 +08:00
|
|
|
const CallInst *CI =
|
|
|
|
CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
|
|
|
|
const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
|
|
|
|
|
2018-03-17 21:29:46 +08:00
|
|
|
// Call / invoke instructions with NoCfCheck attribute require special
|
|
|
|
// handling.
|
|
|
|
const auto *II =
|
|
|
|
CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr;
|
|
|
|
if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck()))
|
|
|
|
return false;
|
|
|
|
|
2017-05-03 21:07:19 +08:00
|
|
|
// Functions with no_caller_saved_registers that need special handling.
|
|
|
|
if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
|
|
|
|
(CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
|
|
|
|
return false;
|
|
|
|
|
2018-08-23 14:06:38 +08:00
|
|
|
// Functions using retpoline for indirect calls need to use SDISel.
|
|
|
|
if (Subtarget->useRetpolineIndirectCalls())
|
Introduce the "retpoline" x86 mitigation technique for variant #2 of the speculative execution vulnerabilities disclosed today, specifically identified by CVE-2017-5715, "Branch Target Injection", and is one of the two halves to Spectre..
Summary:
First, we need to explain the core of the vulnerability. Note that this
is a very incomplete description, please see the Project Zero blog post
for details:
https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
The basis for branch target injection is to direct speculative execution
of the processor to some "gadget" of executable code by poisoning the
prediction of indirect branches with the address of that gadget. The
gadget in turn contains an operation that provides a side channel for
reading data. Most commonly, this will look like a load of secret data
followed by a branch on the loaded value and then a load of some
predictable cache line. The attacker then uses timing of the processors
cache to determine which direction the branch took *in the speculative
execution*, and in turn what one bit of the loaded value was. Due to the
nature of these timing side channels and the branch predictor on Intel
processors, this allows an attacker to leak data only accessible to
a privileged domain (like the kernel) back into an unprivileged domain.
The goal is simple: avoid generating code which contains an indirect
branch that could have its prediction poisoned by an attacker. In many
cases, the compiler can simply use directed conditional branches and
a small search tree. LLVM already has support for lowering switches in
this way and the first step of this patch is to disable jump-table
lowering of switches and introduce a pass to rewrite explicit indirectbr
sequences into a switch over integers.
However, there is no fully general alternative to indirect calls. We
introduce a new construct we call a "retpoline" to implement indirect
calls in a non-speculatable way. It can be thought of loosely as
a trampoline for indirect calls which uses the RET instruction on x86.
Further, we arrange for a specific call->ret sequence which ensures the
processor predicts the return to go to a controlled, known location. The
retpoline then "smashes" the return address pushed onto the stack by the
call with the desired target of the original indirect call. The result
is a predicted return to the next instruction after a call (which can be
used to trap speculative execution within an infinite loop) and an
actual indirect branch to an arbitrary address.
On 64-bit x86 ABIs, this is especially easily done in the compiler by
using a guaranteed scratch register to pass the target into this device.
For 32-bit ABIs there isn't a guaranteed scratch register and so several
different retpoline variants are introduced to use a scratch register if
one is available in the calling convention and to otherwise use direct
stack push/pop sequences to pass the target address.
This "retpoline" mitigation is fully described in the following blog
post: https://support.google.com/faqs/answer/7625886
We also support a target feature that disables emission of the retpoline
thunk by the compiler to allow for custom thunks if users want them.
These are particularly useful in environments like kernels that
routinely do hot-patching on boot and want to hot-patch their thunk to
different code sequences. They can write this custom thunk and use
`-mretpoline-external-thunk` *in addition* to `-mretpoline`. In this
case, on x86-64 thu thunk names must be:
```
__llvm_external_retpoline_r11
```
or on 32-bit:
```
__llvm_external_retpoline_eax
__llvm_external_retpoline_ecx
__llvm_external_retpoline_edx
__llvm_external_retpoline_push
```
And the target of the retpoline is passed in the named register, or in
the case of the `push` suffix on the top of the stack via a `pushl`
instruction.
There is one other important source of indirect branches in x86 ELF
binaries: the PLT. These patches also include support for LLD to
generate PLT entries that perform a retpoline-style indirection.
The only other indirect branches remaining that we are aware of are from
precompiled runtimes (such as crt0.o and similar). The ones we have
found are not really attackable, and so we have not focused on them
here, but eventually these runtimes should also be replicated for
retpoline-ed configurations for completeness.
For kernels or other freestanding or fully static executables, the
compiler switch `-mretpoline` is sufficient to fully mitigate this
particular attack. For dynamic executables, you must compile *all*
libraries with `-mretpoline` and additionally link the dynamic
executable and all shared libraries with LLD and pass `-z retpolineplt`
(or use similar functionality from some other linker). We strongly
recommend also using `-z now` as non-lazy binding allows the
retpoline-mitigated PLT to be substantially smaller.
When manually apply similar transformations to `-mretpoline` to the
Linux kernel we observed very small performance hits to applications
running typical workloads, and relatively minor hits (approximately 2%)
even for extremely syscall-heavy applications. This is largely due to
the small number of indirect branches that occur in performance
sensitive paths of the kernel.
When using these patches on statically linked applications, especially
C++ applications, you should expect to see a much more dramatic
performance hit. For microbenchmarks that are switch, indirect-, or
virtual-call heavy we have seen overheads ranging from 10% to 50%.
However, real-world workloads exhibit substantially lower performance
impact. Notably, techniques such as PGO and ThinLTO dramatically reduce
the impact of hot indirect calls (by speculatively promoting them to
direct calls) and allow optimized search trees to be used to lower
switches. If you need to deploy these techniques in C++ applications, we
*strongly* recommend that you ensure all hot call targets are statically
linked (avoiding PLT indirection) and use both PGO and ThinLTO. Well
tuned servers using all of these techniques saw 5% - 10% overhead from
the use of retpoline.
We will add detailed documentation covering these components in
subsequent patches, but wanted to make the core functionality available
as soon as possible. Happy for more code review, but we'd really like to
get these patches landed and backported ASAP for obvious reasons. We're
planning to backport this to both 6.0 and 5.0 release streams and get
a 5.0 release with just this cherry picked ASAP for distros and vendors.
This patch is the work of a number of people over the past month: Eric, Reid,
Rui, and myself. I'm mailing it out as a single commit due to the time
sensitive nature of landing this and the need to backport it. Huge thanks to
everyone who helped out here, and everyone at Intel who helped out in
discussions about how to craft this. Also, credit goes to Paul Turner (at
Google, but not an LLVM contributor) for much of the underlying retpoline
design.
Reviewers: echristo, rnk, ruiu, craig.topper, DavidKreitzer
Subscribers: sanjoy, emaste, mcrosier, mgorny, mehdi_amini, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D41723
llvm-svn: 323155
2018-01-23 06:05:25 +08:00
|
|
|
return false;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
// Handle only C, fastcc, and webkit_js calling conventions for now.
|
|
|
|
switch (CC) {
|
|
|
|
default: return false;
|
|
|
|
case CallingConv::C:
|
|
|
|
case CallingConv::Fast:
|
|
|
|
case CallingConv::WebKit_JS:
|
2016-04-06 06:41:47 +08:00
|
|
|
case CallingConv::Swift:
|
2015-02-02 00:15:07 +08:00
|
|
|
case CallingConv::X86_FastCall:
|
2016-07-14 21:54:26 +08:00
|
|
|
case CallingConv::X86_StdCall:
|
2016-07-14 09:52:51 +08:00
|
|
|
case CallingConv::X86_ThisCall:
|
2017-07-18 04:05:19 +08:00
|
|
|
case CallingConv::Win64:
|
2015-02-02 00:15:07 +08:00
|
|
|
case CallingConv::X86_64_SysV:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Allow SelectionDAG isel to handle tail calls.
|
|
|
|
if (IsTailCall)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// fastcc with -tailcallopt is intended to provide a guaranteed
|
|
|
|
// tail call optimization. Fastisel doesn't know how to do that.
|
|
|
|
if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Don't know how to handle Win64 varargs yet. Nothing special needed for
|
|
|
|
// x86-32. Special handling for x86-64 is implemented.
|
|
|
|
if (IsVarArg && IsWin64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Don't know about inalloca yet.
|
|
|
|
if (CLI.CS && CLI.CS->hasInAllocaArgument())
|
|
|
|
return false;
|
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
for (auto Flag : CLI.OutFlags)
|
|
|
|
if (Flag.isSwiftError())
|
|
|
|
return false;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
SmallVector<MVT, 16> OutVTs;
|
|
|
|
SmallVector<unsigned, 16> ArgRegs;
|
|
|
|
|
|
|
|
// If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
|
|
|
|
// instruction. This is safe because it is common to all FastISel supported
|
|
|
|
// calling conventions on x86.
|
|
|
|
for (int i = 0, e = OutVals.size(); i != e; ++i) {
|
|
|
|
Value *&Val = OutVals[i];
|
|
|
|
ISD::ArgFlagsTy Flags = OutFlags[i];
|
|
|
|
if (auto *CI = dyn_cast<ConstantInt>(Val)) {
|
|
|
|
if (CI->getBitWidth() < 32) {
|
|
|
|
if (Flags.isSExt())
|
|
|
|
Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext()));
|
|
|
|
else
|
|
|
|
Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Passing bools around ends up doing a trunc to i1 and passing it.
|
|
|
|
// Codegen this as an argument + "and 1".
|
|
|
|
MVT VT;
|
|
|
|
auto *TI = dyn_cast<TruncInst>(Val);
|
|
|
|
unsigned ResultReg;
|
|
|
|
if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
|
|
|
|
(TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
|
|
|
|
TI->hasOneUse()) {
|
|
|
|
Value *PrevVal = TI->getOperand(0);
|
|
|
|
ResultReg = getRegForValue(PrevVal);
|
|
|
|
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!isTypeLegal(PrevVal->getType(), VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
ResultReg =
|
|
|
|
fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
|
|
|
|
} else {
|
|
|
|
if (!isTypeLegal(Val->getType(), VT))
|
|
|
|
return false;
|
|
|
|
ResultReg = getRegForValue(Val);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
ArgRegs.push_back(ResultReg);
|
|
|
|
OutVTs.push_back(VT);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Analyze operands of the call, assigning locations to each operand.
|
|
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
|
|
|
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
|
|
|
|
|
|
|
|
// Allocate shadow area for Win64
|
|
|
|
if (IsWin64)
|
|
|
|
CCInfo.AllocateStack(32, 8);
|
|
|
|
|
|
|
|
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
|
|
|
|
|
|
|
|
// Get a count of how many bytes are to be pushed on the stack.
|
Arguments spilled on the stack before a function call may have
alignment requirements, for example in the case of vectors.
These requirements are exploited by the code generator by using
move instructions that have similar alignment requirements, e.g.,
movaps on x86.
Although the code generator properly aligns the arguments with
respect to the displacement of the stack pointer it computes,
the displacement itself may cause misalignment. For example if
we have
%3 = load <16 x float>, <16 x float>* %1, align 64
call void @bar(<16 x float> %3, i32 0)
the x86 back-end emits:
movaps 32(%ecx), %xmm2
movaps (%ecx), %xmm0
movaps 16(%ecx), %xmm1
movaps 48(%ecx), %xmm3
subl $20, %esp <-- if %esp was 16-byte aligned before this instruction, it no longer will be afterwards
movaps %xmm3, (%esp) <-- movaps requires 16-byte alignment, while %esp is not aligned as such.
movl $0, 16(%esp)
calll __bar
To solve this, we need to make sure that the computed value with which
the stack pointer is changed is a multiple af the maximal alignment seen
during its computation. With this change we get proper alignment:
subl $32, %esp
movaps %xmm3, (%esp)
Differential Revision: http://reviews.llvm.org/D12337
llvm-svn: 248786
2015-09-29 18:12:57 +08:00
|
|
|
unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// Issue CALLSEQ_START
|
|
|
|
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
|
2017-05-09 21:35:13 +08:00
|
|
|
.addImm(NumBytes).addImm(0).addImm(0);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// Walk the register/memloc assignments, inserting copies/loads.
|
2015-02-03 07:03:45 +08:00
|
|
|
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
|
2015-02-02 00:15:07 +08:00
|
|
|
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
|
|
CCValAssign const &VA = ArgLocs[i];
|
|
|
|
const Value *ArgVal = OutVals[VA.getValNo()];
|
|
|
|
MVT ArgVT = OutVTs[VA.getValNo()];
|
|
|
|
|
|
|
|
if (ArgVT == MVT::x86mmx)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned ArgReg = ArgRegs[VA.getValNo()];
|
|
|
|
|
|
|
|
// Promote the value if needed.
|
|
|
|
switch (VA.getLocInfo()) {
|
|
|
|
case CCValAssign::Full: break;
|
|
|
|
case CCValAssign::SExt: {
|
|
|
|
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
|
|
|
|
"Unexpected extend");
|
2016-05-04 08:22:23 +08:00
|
|
|
|
2016-12-05 14:09:55 +08:00
|
|
|
if (ArgVT == MVT::i1)
|
2016-05-04 08:22:23 +08:00
|
|
|
return false;
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
|
|
|
|
ArgVT, ArgReg);
|
|
|
|
assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
|
|
|
|
ArgVT = VA.getLocVT();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case CCValAssign::ZExt: {
|
|
|
|
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
|
|
|
|
"Unexpected extend");
|
2016-05-04 08:22:23 +08:00
|
|
|
|
|
|
|
// Handle zero-extension from i1 to i8, which is common.
|
2016-12-05 14:09:55 +08:00
|
|
|
if (ArgVT == MVT::i1) {
|
2016-05-04 08:22:23 +08:00
|
|
|
// Set the high bits to zero.
|
|
|
|
ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
|
|
|
|
ArgVT = MVT::i8;
|
|
|
|
|
|
|
|
if (ArgReg == 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
|
|
|
|
ArgVT, ArgReg);
|
|
|
|
assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
|
|
|
|
ArgVT = VA.getLocVT();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case CCValAssign::AExt: {
|
|
|
|
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
|
|
|
|
"Unexpected extend");
|
|
|
|
bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
|
|
|
|
ArgVT, ArgReg);
|
|
|
|
if (!Emitted)
|
|
|
|
Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
|
|
|
|
ArgVT, ArgReg);
|
|
|
|
if (!Emitted)
|
|
|
|
Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
|
|
|
|
ArgVT, ArgReg);
|
|
|
|
|
|
|
|
assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
|
|
|
|
ArgVT = VA.getLocVT();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case CCValAssign::BCvt: {
|
|
|
|
ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
|
|
|
|
/*TODO: Kill=*/false);
|
|
|
|
assert(ArgReg && "Failed to emit a bitcast!");
|
|
|
|
ArgVT = VA.getLocVT();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case CCValAssign::VExt:
|
|
|
|
// VExt has not been implemented, so this should be impossible to reach
|
|
|
|
// for now. However, fallback to Selection DAG isel once implemented.
|
|
|
|
return false;
|
|
|
|
case CCValAssign::AExtUpper:
|
|
|
|
case CCValAssign::SExtUpper:
|
|
|
|
case CCValAssign::ZExtUpper:
|
|
|
|
case CCValAssign::FPExt:
|
|
|
|
llvm_unreachable("Unexpected loc info!");
|
|
|
|
case CCValAssign::Indirect:
|
|
|
|
// FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
|
|
|
|
// support this.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (VA.isRegLoc()) {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
|
|
|
|
OutRegs.push_back(VA.getLocReg());
|
|
|
|
} else {
|
|
|
|
assert(VA.isMemLoc());
|
|
|
|
|
|
|
|
// Don't emit stores for undef values.
|
|
|
|
if (isa<UndefValue>(ArgVal))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
unsigned LocMemOffset = VA.getLocMemOffset();
|
|
|
|
X86AddressMode AM;
|
|
|
|
AM.Base.Reg = RegInfo->getStackRegister();
|
|
|
|
AM.Disp = LocMemOffset;
|
|
|
|
ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
|
|
|
|
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
|
|
|
|
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
|
2015-08-12 07:09:45 +08:00
|
|
|
MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
|
|
|
|
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
|
2015-02-02 00:15:07 +08:00
|
|
|
if (Flags.isByVal()) {
|
|
|
|
X86AddressMode SrcAM;
|
|
|
|
SrcAM.Base.Reg = ArgReg;
|
|
|
|
if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
|
|
|
|
return false;
|
|
|
|
} else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
|
|
|
|
// If this is a really simple value, emit this with the Value* version
|
|
|
|
// of X86FastEmitStore. If it isn't simple, we don't want to do this,
|
|
|
|
// as it can cause us to reevaluate the argument.
|
|
|
|
if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
bool ValIsKill = hasTrivialKill(ArgVal);
|
|
|
|
if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ELF / PIC requires GOT in the EBX register before function calls via PLT
|
|
|
|
// GOT pointer.
|
|
|
|
if (Subtarget->isPICStyleGOT()) {
|
|
|
|
unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Is64Bit && IsVarArg && !IsWin64) {
|
|
|
|
// From AMD64 ABI document:
|
|
|
|
// For calls that may call functions that use varargs or stdargs
|
|
|
|
// (prototype-less calls or calls to functions containing ellipsis (...) in
|
|
|
|
// the declaration) %al is used as hidden argument to specify the number
|
|
|
|
// of SSE registers used. The contents of %al do not need to match exactly
|
|
|
|
// the number of registers, but must be an ubound on the number of SSE
|
|
|
|
// registers used and is in the range 0 - 8 inclusive.
|
|
|
|
|
|
|
|
// Count the number of XMM registers allocated.
|
|
|
|
static const MCPhysReg XMMArgRegs[] = {
|
|
|
|
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
|
|
|
|
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
|
|
|
|
};
|
2015-02-21 10:11:17 +08:00
|
|
|
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
|
2015-02-02 00:15:07 +08:00
|
|
|
assert((Subtarget->hasSSE1() || !NumXMMRegs)
|
|
|
|
&& "SSE registers cannot be used when SSE is disabled");
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
|
|
|
|
X86::AL).addImm(NumXMMRegs);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Materialize callee address in a register. FIXME: GV address can be
|
|
|
|
// handled with a CALLpcrel32 instead.
|
|
|
|
X86AddressMode CalleeAM;
|
|
|
|
if (!X86SelectCallAddress(Callee, CalleeAM))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned CalleeOp = 0;
|
|
|
|
const GlobalValue *GV = nullptr;
|
|
|
|
if (CalleeAM.GV != nullptr) {
|
|
|
|
GV = CalleeAM.GV;
|
|
|
|
} else if (CalleeAM.Base.Reg != 0) {
|
|
|
|
CalleeOp = CalleeAM.Base.Reg;
|
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Issue the call.
|
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
if (CalleeOp) {
|
|
|
|
// Register-indirect call.
|
|
|
|
unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
|
|
|
|
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
|
|
|
|
.addReg(CalleeOp);
|
|
|
|
} else {
|
|
|
|
// Direct call.
|
|
|
|
assert(GV && "Not a direct call");
|
|
|
|
// See if we need any target-specific flags on the GV operand.
|
2016-05-20 02:49:29 +08:00
|
|
|
unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
2017-08-05 08:10:43 +08:00
|
|
|
// This will be a direct call, or an indirect call through memory for
|
|
|
|
// NonLazyBind calls or dllimport calls.
|
2017-12-15 08:32:09 +08:00
|
|
|
bool NeedLoad =
|
|
|
|
OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
|
2017-08-05 08:10:43 +08:00
|
|
|
unsigned CallOpc = NeedLoad
|
|
|
|
? (Is64Bit ? X86::CALL64m : X86::CALL32m)
|
|
|
|
: (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
|
2017-08-05 08:10:43 +08:00
|
|
|
if (NeedLoad)
|
|
|
|
MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
|
2015-06-23 20:21:54 +08:00
|
|
|
if (Symbol)
|
|
|
|
MIB.addSym(Symbol, OpFlags);
|
2015-02-02 00:15:07 +08:00
|
|
|
else
|
|
|
|
MIB.addGlobalAddress(GV, 0, OpFlags);
|
2017-08-05 08:10:43 +08:00
|
|
|
if (NeedLoad)
|
|
|
|
MIB.addReg(0);
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add a register mask operand representing the call-preserved registers.
|
|
|
|
// Proper defs for return values will be added by setPhysRegsDeadExcept().
|
2015-03-12 06:42:13 +08:00
|
|
|
MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// Add an implicit use GOT pointer in EBX.
|
|
|
|
if (Subtarget->isPICStyleGOT())
|
|
|
|
MIB.addReg(X86::EBX, RegState::Implicit);
|
|
|
|
|
|
|
|
if (Is64Bit && IsVarArg && !IsWin64)
|
|
|
|
MIB.addReg(X86::AL, RegState::Implicit);
|
|
|
|
|
|
|
|
// Add implicit physical register uses to the call.
|
|
|
|
for (auto Reg : OutRegs)
|
|
|
|
MIB.addReg(Reg, RegState::Implicit);
|
|
|
|
|
|
|
|
// Issue CALLSEQ_END
|
|
|
|
unsigned NumBytesForCalleeToPop =
|
2016-07-14 09:52:51 +08:00
|
|
|
X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
|
|
|
|
TM.Options.GuaranteedTailCallOpt)
|
|
|
|
? NumBytes // Callee pops everything.
|
|
|
|
: computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
|
|
|
|
.addImm(NumBytes).addImm(NumBytesForCalleeToPop);
|
|
|
|
|
|
|
|
// Now handle call return values.
|
|
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
|
|
CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
|
|
|
|
CLI.RetTy->getContext());
|
|
|
|
CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
|
|
|
|
|
|
|
|
// Copy all of the result registers out of their specified physreg.
|
|
|
|
unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
|
|
|
|
for (unsigned i = 0; i != RVLocs.size(); ++i) {
|
|
|
|
CCValAssign &VA = RVLocs[i];
|
|
|
|
EVT CopyVT = VA.getValVT();
|
|
|
|
unsigned CopyReg = ResultReg + i;
|
2017-03-31 05:02:52 +08:00
|
|
|
unsigned SrcReg = VA.getLocReg();
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// If this is x86-64, and we disabled SSE, we can't return FP values
|
|
|
|
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
|
|
|
|
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
|
|
|
|
report_fatal_error("SSE register return with SSE disabled");
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we prefer to use the value in xmm registers, copy it out as f80 and
|
|
|
|
// use a truncate to move it from fp stack reg to xmm reg.
|
2017-03-31 05:02:52 +08:00
|
|
|
if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
|
2015-02-02 00:15:07 +08:00
|
|
|
isScalarFPTypeInSSEReg(VA.getValVT())) {
|
|
|
|
CopyVT = MVT::f80;
|
|
|
|
CopyReg = createResultReg(&X86::RFP80RegClass);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copy out the result.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2017-03-31 05:02:52 +08:00
|
|
|
TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
|
2015-02-02 00:15:07 +08:00
|
|
|
InRegs.push_back(VA.getLocReg());
|
|
|
|
|
|
|
|
// Round the f80 to the right size, which also moves it to the appropriate
|
|
|
|
// xmm register. This is accomplished by storing the f80 value in memory
|
|
|
|
// and then loading it back.
|
|
|
|
if (CopyVT != VA.getValVT()) {
|
|
|
|
EVT ResVT = VA.getValVT();
|
|
|
|
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
|
|
|
|
unsigned MemSize = ResVT.getSizeInBits()/8;
|
|
|
|
int FI = MFI.CreateStackObject(MemSize, MemSize, false);
|
|
|
|
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc)), FI)
|
|
|
|
.addReg(CopyReg);
|
|
|
|
Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
|
|
|
|
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc), ResultReg + i), FI);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CLI.ResultReg = ResultReg;
|
|
|
|
CLI.NumResultRegs = RVLocs.size();
|
|
|
|
CLI.Call = MIB;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
X86FastISel::fastSelectInstruction(const Instruction *I) {
|
|
|
|
switch (I->getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case Instruction::Load:
|
|
|
|
return X86SelectLoad(I);
|
|
|
|
case Instruction::Store:
|
|
|
|
return X86SelectStore(I);
|
|
|
|
case Instruction::Ret:
|
|
|
|
return X86SelectRet(I);
|
|
|
|
case Instruction::ICmp:
|
|
|
|
case Instruction::FCmp:
|
|
|
|
return X86SelectCmp(I);
|
|
|
|
case Instruction::ZExt:
|
|
|
|
return X86SelectZExt(I);
|
2017-09-03 02:53:46 +08:00
|
|
|
case Instruction::SExt:
|
|
|
|
return X86SelectSExt(I);
|
2015-02-02 00:15:07 +08:00
|
|
|
case Instruction::Br:
|
|
|
|
return X86SelectBranch(I);
|
|
|
|
case Instruction::LShr:
|
|
|
|
case Instruction::AShr:
|
|
|
|
case Instruction::Shl:
|
|
|
|
return X86SelectShift(I);
|
|
|
|
case Instruction::SDiv:
|
|
|
|
case Instruction::UDiv:
|
|
|
|
case Instruction::SRem:
|
|
|
|
case Instruction::URem:
|
|
|
|
return X86SelectDivRem(I);
|
|
|
|
case Instruction::Select:
|
|
|
|
return X86SelectSelect(I);
|
|
|
|
case Instruction::Trunc:
|
|
|
|
return X86SelectTrunc(I);
|
|
|
|
case Instruction::FPExt:
|
|
|
|
return X86SelectFPExt(I);
|
|
|
|
case Instruction::FPTrunc:
|
|
|
|
return X86SelectFPTrunc(I);
|
2015-02-18 07:40:58 +08:00
|
|
|
case Instruction::SIToFP:
|
|
|
|
return X86SelectSIToFP(I);
|
2018-07-14 06:09:30 +08:00
|
|
|
case Instruction::UIToFP:
|
|
|
|
return X86SelectUIToFP(I);
|
2015-02-02 00:15:07 +08:00
|
|
|
case Instruction::IntToPtr: // Deliberate fall-through.
|
|
|
|
case Instruction::PtrToInt: {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
|
|
|
|
EVT DstVT = TLI.getValueType(DL, I->getType());
|
2015-02-02 00:15:07 +08:00
|
|
|
if (DstVT.bitsGT(SrcVT))
|
|
|
|
return X86SelectZExt(I);
|
|
|
|
if (DstVT.bitsLT(SrcVT))
|
|
|
|
return X86SelectTrunc(I);
|
|
|
|
unsigned Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (Reg == 0) return false;
|
|
|
|
updateValueMap(I, Reg);
|
|
|
|
return true;
|
|
|
|
}
|
2015-10-03 00:08:05 +08:00
|
|
|
case Instruction::BitCast: {
|
|
|
|
// Select SSE2/AVX bitcasts between 128/256 bit vector types.
|
|
|
|
if (!Subtarget->hasSSE2())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
|
|
|
|
EVT DstVT = TLI.getValueType(DL, I->getType());
|
|
|
|
|
|
|
|
if (!SrcVT.isSimple() || !DstVT.isSimple())
|
|
|
|
return false;
|
|
|
|
|
2016-12-05 13:50:51 +08:00
|
|
|
MVT SVT = SrcVT.getSimpleVT();
|
|
|
|
MVT DVT = DstVT.getSimpleVT();
|
|
|
|
|
|
|
|
if (!SVT.is128BitVector() &&
|
|
|
|
!(Subtarget->hasAVX() && SVT.is256BitVector()) &&
|
|
|
|
!(Subtarget->hasAVX512() && SVT.is512BitVector() &&
|
|
|
|
(Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
|
|
|
|
DVT.getScalarSizeInBits() >= 32))))
|
2015-10-03 00:08:05 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (Reg == 0)
|
|
|
|
return false;
|
2018-07-31 03:41:25 +08:00
|
|
|
|
2015-10-03 00:08:05 +08:00
|
|
|
// No instruction is needed for conversion. Reuse the register used by
|
|
|
|
// the fist operand.
|
|
|
|
updateValueMap(I, Reg);
|
|
|
|
return true;
|
|
|
|
}
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
|
|
|
|
if (VT > MVT::i64)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
uint64_t Imm = CI->getZExtValue();
|
|
|
|
if (Imm == 0) {
|
|
|
|
unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: llvm_unreachable("Unexpected value type");
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
|
|
|
|
X86::sub_8bit);
|
|
|
|
case MVT::i16:
|
|
|
|
return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
|
|
|
|
X86::sub_16bit);
|
|
|
|
case MVT::i32:
|
|
|
|
return SrcReg;
|
2018-11-01 05:53:24 +08:00
|
|
|
case MVT::i64: {
|
|
|
|
unsigned ResultReg = createResultReg(&X86::GR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
|
|
|
|
.addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
|
|
|
|
return ResultReg;
|
|
|
|
}
|
2015-02-02 00:15:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Opc = 0;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: llvm_unreachable("Unexpected value type");
|
2017-03-29 00:35:29 +08:00
|
|
|
case MVT::i1:
|
|
|
|
VT = MVT::i8;
|
|
|
|
LLVM_FALLTHROUGH;
|
2015-02-02 00:15:07 +08:00
|
|
|
case MVT::i8: Opc = X86::MOV8ri; break;
|
|
|
|
case MVT::i16: Opc = X86::MOV16ri; break;
|
|
|
|
case MVT::i32: Opc = X86::MOV32ri; break;
|
|
|
|
case MVT::i64: {
|
|
|
|
if (isUInt<32>(Imm))
|
2018-09-22 07:14:05 +08:00
|
|
|
Opc = X86::MOV32ri64;
|
2015-02-02 00:15:07 +08:00
|
|
|
else if (isInt<32>(Imm))
|
|
|
|
Opc = X86::MOV64ri32;
|
|
|
|
else
|
|
|
|
Opc = X86::MOV64ri;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
|
|
|
|
if (CFP->isNullValue())
|
|
|
|
return fastMaterializeFloatZero(CFP);
|
|
|
|
|
|
|
|
// Can't handle alternate code models yet.
|
|
|
|
CodeModel::Model CM = TM.getCodeModel();
|
|
|
|
if (CM != CodeModel::Small && CM != CodeModel::Large)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Get opcode and regclass of the output for the given load instruction.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
const TargetRegisterClass *RC = nullptr;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: return 0;
|
|
|
|
case MVT::f32:
|
|
|
|
if (X86ScalarSSEf32) {
|
2017-10-29 10:18:41 +08:00
|
|
|
Opc = Subtarget->hasAVX512()
|
|
|
|
? X86::VMOVSSZrm
|
|
|
|
: Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
|
2015-02-02 00:15:07 +08:00
|
|
|
} else {
|
|
|
|
Opc = X86::LD_Fp32m;
|
|
|
|
RC = &X86::RFP32RegClass;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MVT::f64:
|
|
|
|
if (X86ScalarSSEf64) {
|
2017-10-29 10:18:41 +08:00
|
|
|
Opc = Subtarget->hasAVX512()
|
|
|
|
? X86::VMOVSDZrm
|
|
|
|
: Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
|
2017-10-29 13:14:26 +08:00
|
|
|
RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
|
2015-02-02 00:15:07 +08:00
|
|
|
} else {
|
|
|
|
Opc = X86::LD_Fp64m;
|
|
|
|
RC = &X86::RFP64RegClass;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MVT::f80:
|
|
|
|
// No f80 support yet.
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// MachineConstantPool wants an explicit alignment.
|
|
|
|
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
|
|
|
|
if (Align == 0) {
|
|
|
|
// Alignment of vector types. FIXME!
|
|
|
|
Align = DL.getTypeAllocSize(CFP->getType());
|
|
|
|
}
|
|
|
|
|
|
|
|
// x86-32 PIC requires a PIC base register for constant pools.
|
|
|
|
unsigned PICBase = 0;
|
2016-05-20 20:20:10 +08:00
|
|
|
unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
|
|
|
|
if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
|
2015-02-02 00:15:07 +08:00
|
|
|
PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
|
2016-05-20 20:20:10 +08:00
|
|
|
else if (OpFlag == X86II::MO_GOTOFF)
|
2015-02-02 00:15:07 +08:00
|
|
|
PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
|
2016-05-20 20:20:10 +08:00
|
|
|
else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
|
2015-02-02 00:15:07 +08:00
|
|
|
PICBase = X86::RIP;
|
|
|
|
|
|
|
|
// Create the load from the constant pool.
|
|
|
|
unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
|
|
|
|
if (CM == CodeModel::Large) {
|
|
|
|
unsigned AddrReg = createResultReg(&X86::GR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
|
|
|
|
AddrReg)
|
|
|
|
.addConstantPoolIndex(CPI, 0, OpFlag);
|
|
|
|
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc), ResultReg);
|
|
|
|
addDirectMem(MIB, AddrReg);
|
|
|
|
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
|
2015-08-12 07:09:45 +08:00
|
|
|
MachinePointerInfo::getConstantPool(*FuncInfo.MF),
|
|
|
|
MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
|
2015-02-02 00:15:07 +08:00
|
|
|
MIB->addMemOperand(*FuncInfo.MF, MMO);
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc), ResultReg),
|
|
|
|
CPI, PICBase, OpFlag);
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
|
|
|
|
// Can't handle alternate code models yet.
|
|
|
|
if (TM.getCodeModel() != CodeModel::Small)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Materialize addresses with LEA/MOV instructions.
|
|
|
|
X86AddressMode AM;
|
|
|
|
if (X86SelectAddress(GV, AM)) {
|
|
|
|
// If the expression is just a basereg, then we're done, otherwise we need
|
|
|
|
// to emit an LEA.
|
|
|
|
if (AM.BaseType == X86AddressMode::RegBase &&
|
|
|
|
AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
|
|
|
|
return AM.Base.Reg;
|
|
|
|
|
|
|
|
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
|
|
|
|
if (TM.getRelocationModel() == Reloc::Static &&
|
2015-07-09 10:09:04 +08:00
|
|
|
TLI.getPointerTy(DL) == MVT::i64) {
|
2015-02-02 00:15:07 +08:00
|
|
|
// The displacement code could be more than 32 bits away so we need to use
|
|
|
|
// an instruction with a 64 bit immediate
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
|
|
|
|
ResultReg)
|
|
|
|
.addGlobalAddress(GV);
|
|
|
|
} else {
|
2015-07-09 10:09:04 +08:00
|
|
|
unsigned Opc =
|
|
|
|
TLI.getPointerTy(DL) == MVT::i32
|
|
|
|
? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
|
|
|
|
: X86::LEA64r;
|
2015-02-02 00:15:07 +08:00
|
|
|
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc), ResultReg), AM);
|
|
|
|
}
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT CEVT = TLI.getValueType(DL, C->getType(), true);
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
// Only handle simple types.
|
|
|
|
if (!CEVT.isSimple())
|
|
|
|
return 0;
|
|
|
|
MVT VT = CEVT.getSimpleVT();
|
|
|
|
|
|
|
|
if (const auto *CI = dyn_cast<ConstantInt>(C))
|
|
|
|
return X86MaterializeInt(CI, VT);
|
|
|
|
else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
|
|
|
|
return X86MaterializeFP(CFP, VT);
|
|
|
|
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
|
|
|
|
return X86MaterializeGV(GV, VT);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
|
|
|
|
// Fail on dynamic allocas. At this point, getRegForValue has already
|
|
|
|
// checked its CSE maps, so if we're here trying to handle a dynamic
|
|
|
|
// alloca, we're not going to succeed. X86SelectAddress has a
|
|
|
|
// check for dynamic allocas, because it's called directly from
|
|
|
|
// various places, but targetMaterializeAlloca also needs a check
|
|
|
|
// in order to avoid recursion between getRegForValue,
|
|
|
|
// X86SelectAddrss, and targetMaterializeAlloca.
|
|
|
|
if (!FuncInfo.StaticAllocaMap.count(C))
|
|
|
|
return 0;
|
|
|
|
assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
|
|
|
|
|
|
|
|
X86AddressMode AM;
|
|
|
|
if (!X86SelectAddress(C, AM))
|
|
|
|
return 0;
|
2015-07-09 10:09:04 +08:00
|
|
|
unsigned Opc =
|
|
|
|
TLI.getPointerTy(DL) == MVT::i32
|
|
|
|
? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
|
|
|
|
: X86::LEA64r;
|
|
|
|
const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc), ResultReg), AM);
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(CF->getType(), VT))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Get opcode and regclass for the given zero.
|
2017-11-01 08:47:45 +08:00
|
|
|
bool HasAVX512 = Subtarget->hasAVX512();
|
2015-02-02 00:15:07 +08:00
|
|
|
unsigned Opc = 0;
|
|
|
|
const TargetRegisterClass *RC = nullptr;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: return 0;
|
|
|
|
case MVT::f32:
|
|
|
|
if (X86ScalarSSEf32) {
|
2017-11-01 08:47:45 +08:00
|
|
|
Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
|
|
|
|
RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
|
2015-02-02 00:15:07 +08:00
|
|
|
} else {
|
|
|
|
Opc = X86::LD_Fp032;
|
|
|
|
RC = &X86::RFP32RegClass;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MVT::f64:
|
|
|
|
if (X86ScalarSSEf64) {
|
2017-11-01 08:47:45 +08:00
|
|
|
Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
|
|
|
|
RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
|
2015-02-02 00:15:07 +08:00
|
|
|
} else {
|
|
|
|
Opc = X86::LD_Fp064;
|
|
|
|
RC = &X86::RFP64RegClass;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MVT::f80:
|
|
|
|
// No f80 support yet.
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
|
|
|
|
const LoadInst *LI) {
|
|
|
|
const Value *Ptr = LI->getPointerOperand();
|
|
|
|
X86AddressMode AM;
|
|
|
|
if (!X86SelectAddress(Ptr, AM))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const X86InstrInfo &XII = (const X86InstrInfo &)TII;
|
|
|
|
|
|
|
|
unsigned Size = DL.getTypeAllocSize(LI->getType());
|
|
|
|
unsigned Alignment = LI->getAlignment();
|
|
|
|
|
|
|
|
if (Alignment == 0) // Ensure that codegen never sees alignment 0
|
|
|
|
Alignment = DL.getABITypeAlignment(LI->getType());
|
|
|
|
|
|
|
|
SmallVector<MachineOperand, 8> AddrOps;
|
|
|
|
AM.getFullAddress(AddrOps);
|
|
|
|
|
2015-06-09 04:09:58 +08:00
|
|
|
MachineInstr *Result = XII.foldMemoryOperandImpl(
|
2016-06-30 08:01:54 +08:00
|
|
|
*FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
|
2015-06-09 04:09:58 +08:00
|
|
|
/*AllowCommute=*/true);
|
2015-02-02 00:15:07 +08:00
|
|
|
if (!Result)
|
|
|
|
return false;
|
|
|
|
|
2015-05-07 05:37:19 +08:00
|
|
|
// The index register could be in the wrong register class. Unfortunately,
|
|
|
|
// foldMemoryOperandImpl could have commuted the instruction so its not enough
|
|
|
|
// to just look at OpNo + the offset to the index reg. We actually need to
|
|
|
|
// scan the instruction to find the index reg and see if its the correct reg
|
|
|
|
// class.
|
2015-05-29 10:56:46 +08:00
|
|
|
unsigned OperandNo = 0;
|
|
|
|
for (MachineInstr::mop_iterator I = Result->operands_begin(),
|
|
|
|
E = Result->operands_end(); I != E; ++I, ++OperandNo) {
|
|
|
|
MachineOperand &MO = *I;
|
|
|
|
if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
|
2015-05-07 05:37:19 +08:00
|
|
|
continue;
|
|
|
|
// Found the index reg, now try to rewrite it.
|
|
|
|
unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
|
2015-05-29 10:56:46 +08:00
|
|
|
MO.getReg(), OperandNo);
|
|
|
|
if (IndexReg == MO.getReg())
|
2015-05-07 05:37:19 +08:00
|
|
|
continue;
|
2015-05-29 10:56:46 +08:00
|
|
|
MO.setReg(IndexReg);
|
2015-05-07 05:37:19 +08:00
|
|
|
}
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
|
2018-12-18 01:25:53 +08:00
|
|
|
MachineBasicBlock::iterator I(MI);
|
|
|
|
removeDeadCode(I, std::next(I));
|
2015-02-02 00:15:07 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-12-05 12:51:31 +08:00
|
|
|
unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
unsigned Op0, bool Op0IsKill,
|
|
|
|
unsigned Op1, bool Op1IsKill,
|
|
|
|
unsigned Op2, bool Op2IsKill,
|
|
|
|
unsigned Op3, bool Op3IsKill) {
|
|
|
|
const MCInstrDesc &II = TII.get(MachineInstOpcode);
|
|
|
|
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
|
|
|
|
Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
|
|
|
|
Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
|
2017-10-02 13:46:53 +08:00
|
|
|
Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
|
2016-12-05 12:51:31 +08:00
|
|
|
|
|
|
|
if (II.getNumDefs() >= 1)
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
|
|
|
|
.addReg(Op0, getKillRegState(Op0IsKill))
|
|
|
|
.addReg(Op1, getKillRegState(Op1IsKill))
|
|
|
|
.addReg(Op2, getKillRegState(Op2IsKill))
|
|
|
|
.addReg(Op3, getKillRegState(Op3IsKill));
|
|
|
|
else {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
|
|
|
|
.addReg(Op0, getKillRegState(Op0IsKill))
|
|
|
|
.addReg(Op1, getKillRegState(Op1IsKill))
|
|
|
|
.addReg(Op2, getKillRegState(Op2IsKill))
|
|
|
|
.addReg(Op3, getKillRegState(Op3IsKill));
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
|
|
|
|
}
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2015-02-02 00:15:07 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
|
|
|
|
const TargetLibraryInfo *libInfo) {
|
|
|
|
return new X86FastISel(funcInfo, libInfo);
|
|
|
|
}
|
|
|
|
}
|