2017-07-26 07:51:02 +08:00
|
|
|
//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2014-05-24 20:50:23 +08:00
|
|
|
// This file defines the AArch64-specific support for the FastISel class. Some
|
2014-03-29 18:18:08 +08:00
|
|
|
// of the target-specific code is generated by tablegen in the file
|
2014-05-24 20:50:23 +08:00
|
|
|
// AArch64GenFastISel.inc, which is #included here.
|
2014-03-29 18:18:08 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64.h"
|
2014-11-28 05:02:42 +08:00
|
|
|
#include "AArch64CallingConvention.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "AArch64RegisterInfo.h"
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64Subtarget.h"
|
|
|
|
#include "MCTargetDesc/AArch64AddressingModes.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "Utils/AArch64BaseInfo.h"
|
|
|
|
#include "llvm/ADT/APFloat.h"
|
|
|
|
#include "llvm/ADT/APInt.h"
|
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2014-08-02 02:39:24 +08:00
|
|
|
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
|
|
|
#include "llvm/CodeGen/FastISel.h"
|
|
|
|
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/CodeGen/ISDOpcodes.h"
|
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/CodeGen/MachineMemOperand.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/CodeGen/RuntimeLibcalls.h"
|
2018-03-30 01:21:10 +08:00
|
|
|
#include "llvm/CodeGen/ValueTypes.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/IR/Argument.h"
|
|
|
|
#include "llvm/IR/Attributes.h"
|
|
|
|
#include "llvm/IR/BasicBlock.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/IR/CallingConv.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/IR/Constant.h"
|
|
|
|
#include "llvm/IR/Constants.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/IR/GlobalValue.h"
|
|
|
|
#include "llvm/IR/InstrTypes.h"
|
|
|
|
#include "llvm/IR/Instruction.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/IR/Instructions.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
2017-07-26 07:51:02 +08:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2014-03-29 18:18:08 +08:00
|
|
|
#include "llvm/IR/Operator.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/IR/Type.h"
|
|
|
|
#include "llvm/IR/User.h"
|
|
|
|
#include "llvm/IR/Value.h"
|
|
|
|
#include "llvm/MC/MCInstrDesc.h"
|
|
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
2015-06-23 20:21:54 +08:00
|
|
|
#include "llvm/MC/MCSymbol.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/Support/AtomicOrdering.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/CodeGen.h"
|
2017-07-26 07:51:02 +08:00
|
|
|
#include "llvm/Support/Compiler.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2018-03-24 07:58:25 +08:00
|
|
|
#include "llvm/Support/MachineValueType.h"
|
2017-01-25 08:29:26 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <iterator>
|
|
|
|
#include <utility>
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2014-09-16 06:33:11 +08:00
|
|
|
class AArch64FastISel final : public FastISel {
|
2014-03-29 18:18:08 +08:00
|
|
|
class Address {
|
|
|
|
public:
|
2017-07-26 07:51:02 +08:00
|
|
|
using BaseKind = enum {
|
2014-03-29 18:18:08 +08:00
|
|
|
RegBase,
|
|
|
|
FrameIndexBase
|
2017-07-26 07:51:02 +08:00
|
|
|
};
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
private:
|
2017-01-25 08:29:26 +08:00
|
|
|
BaseKind Kind = RegBase;
|
|
|
|
AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
|
2014-03-29 18:18:08 +08:00
|
|
|
union {
|
|
|
|
unsigned Reg;
|
|
|
|
int FI;
|
|
|
|
} Base;
|
2017-01-25 08:29:26 +08:00
|
|
|
unsigned OffsetReg = 0;
|
|
|
|
unsigned Shift = 0;
|
|
|
|
int64_t Offset = 0;
|
|
|
|
const GlobalValue *GV = nullptr;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
public:
|
2017-01-25 08:29:26 +08:00
|
|
|
Address() { Base.Reg = 0; }
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
void setKind(BaseKind K) { Kind = K; }
|
|
|
|
BaseKind getKind() const { return Kind; }
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
|
|
|
|
AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
|
2014-03-29 18:18:08 +08:00
|
|
|
bool isRegBase() const { return Kind == RegBase; }
|
|
|
|
bool isFIBase() const { return Kind == FrameIndexBase; }
|
2017-01-25 08:29:26 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
void setReg(unsigned Reg) {
|
|
|
|
assert(isRegBase() && "Invalid base register access!");
|
|
|
|
Base.Reg = Reg;
|
|
|
|
}
|
2017-01-25 08:29:26 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned getReg() const {
|
|
|
|
assert(isRegBase() && "Invalid base register access!");
|
|
|
|
return Base.Reg;
|
|
|
|
}
|
2017-01-25 08:29:26 +08:00
|
|
|
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
void setOffsetReg(unsigned Reg) {
|
|
|
|
OffsetReg = Reg;
|
|
|
|
}
|
2017-01-25 08:29:26 +08:00
|
|
|
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
unsigned getOffsetReg() const {
|
|
|
|
return OffsetReg;
|
|
|
|
}
|
2017-01-25 08:29:26 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
void setFI(unsigned FI) {
|
|
|
|
assert(isFIBase() && "Invalid base frame index access!");
|
|
|
|
Base.FI = FI;
|
|
|
|
}
|
2017-01-25 08:29:26 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned getFI() const {
|
|
|
|
assert(isFIBase() && "Invalid base frame index access!");
|
|
|
|
return Base.FI;
|
|
|
|
}
|
2017-01-25 08:29:26 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
void setOffset(int64_t O) { Offset = O; }
|
|
|
|
int64_t getOffset() { return Offset; }
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
void setShift(unsigned S) { Shift = S; }
|
|
|
|
unsigned getShift() { return Shift; }
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-07-31 12:10:40 +08:00
|
|
|
void setGlobalValue(const GlobalValue *G) { GV = G; }
|
|
|
|
const GlobalValue *getGlobalValue() { return GV; }
|
2014-03-29 18:18:08 +08:00
|
|
|
};
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
|
2014-03-29 18:18:08 +08:00
|
|
|
/// make the right decision when generating code for different targets.
|
2014-05-24 20:50:23 +08:00
|
|
|
const AArch64Subtarget *Subtarget;
|
2014-03-29 18:18:08 +08:00
|
|
|
LLVMContext *Context;
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
bool fastLowerArguments() override;
|
|
|
|
bool fastLowerCall(CallLoweringInfo &CLI) override;
|
|
|
|
bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
|
2014-07-23 07:14:58 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
private:
|
|
|
|
// Selection routines.
|
2014-09-03 09:38:36 +08:00
|
|
|
bool selectAddSub(const Instruction *I);
|
2014-09-16 05:27:56 +08:00
|
|
|
bool selectLogicalOp(const Instruction *I);
|
2014-09-16 07:20:17 +08:00
|
|
|
bool selectLoad(const Instruction *I);
|
|
|
|
bool selectStore(const Instruction *I);
|
|
|
|
bool selectBranch(const Instruction *I);
|
|
|
|
bool selectIndirectBr(const Instruction *I);
|
|
|
|
bool selectCmp(const Instruction *I);
|
|
|
|
bool selectSelect(const Instruction *I);
|
|
|
|
bool selectFPExt(const Instruction *I);
|
|
|
|
bool selectFPTrunc(const Instruction *I);
|
|
|
|
bool selectFPToInt(const Instruction *I, bool Signed);
|
|
|
|
bool selectIntToFP(const Instruction *I, bool Signed);
|
|
|
|
bool selectRem(const Instruction *I, unsigned ISDOpcode);
|
|
|
|
bool selectRet(const Instruction *I);
|
|
|
|
bool selectTrunc(const Instruction *I);
|
|
|
|
bool selectIntExt(const Instruction *I);
|
|
|
|
bool selectMul(const Instruction *I);
|
|
|
|
bool selectShift(const Instruction *I);
|
|
|
|
bool selectBitCast(const Instruction *I);
|
2014-09-16 06:07:49 +08:00
|
|
|
bool selectFRem(const Instruction *I);
|
2014-09-18 05:55:55 +08:00
|
|
|
bool selectSDiv(const Instruction *I);
|
2014-10-16 02:58:07 +08:00
|
|
|
bool selectGetElementPtr(const Instruction *I);
|
2016-07-21 05:12:32 +08:00
|
|
|
bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Utility helper routines.
|
|
|
|
bool isTypeLegal(Type *Ty, MVT &VT);
|
2014-09-16 05:27:54 +08:00
|
|
|
bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
|
2014-08-29 08:19:21 +08:00
|
|
|
bool isValueAvailable(const Value *V) const;
|
2014-09-16 07:20:17 +08:00
|
|
|
bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
|
|
|
|
bool computeCallAddress(const Value *V, Address &Addr);
|
|
|
|
bool simplifyAddress(Address &Addr, MVT VT);
|
|
|
|
void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
|
2016-07-16 02:26:59 +08:00
|
|
|
MachineMemOperand::Flags Flags,
|
|
|
|
unsigned ScaleFactor, MachineMemOperand *MMO);
|
2014-09-16 07:20:17 +08:00
|
|
|
bool isMemCpySmall(uint64_t Len, unsigned Alignment);
|
|
|
|
bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned Alignment);
|
2014-07-31 06:04:34 +08:00
|
|
|
bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
|
|
|
|
const Value *Cond);
|
2014-10-15 04:36:02 +08:00
|
|
|
bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
|
2014-11-13 08:36:46 +08:00
|
|
|
bool optimizeSelect(const SelectInst *SI);
|
2014-11-14 04:50:44 +08:00
|
|
|
std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
|
2014-07-31 06:04:34 +08:00
|
|
|
|
2014-08-20 06:29:55 +08:00
|
|
|
// Emit helper routines.
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
|
|
|
|
const Value *RHS, bool SetFlags = false,
|
|
|
|
bool WantResult = true, bool IsZExt = false);
|
|
|
|
unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
|
|
|
|
bool SetFlags = false, bool WantResult = true);
|
|
|
|
unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
|
|
|
|
bool WantResult = true);
|
2014-08-27 08:58:30 +08:00
|
|
|
unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
|
|
|
|
AArch64_AM::ShiftExtendType ShiftType,
|
2014-09-03 09:38:36 +08:00
|
|
|
uint64_t ShiftImm, bool SetFlags = false,
|
|
|
|
bool WantResult = true);
|
2014-08-27 08:58:30 +08:00
|
|
|
unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
|
|
|
|
AArch64_AM::ShiftExtendType ExtType,
|
2014-09-03 09:38:36 +08:00
|
|
|
uint64_t ShiftImm, bool SetFlags = false,
|
|
|
|
bool WantResult = true);
|
2014-08-20 06:29:55 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Emit functions.
|
2014-10-01 03:59:35 +08:00
|
|
|
bool emitCompareAndBranch(const BranchInst *BI);
|
2014-08-20 06:29:55 +08:00
|
|
|
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
|
|
|
|
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
|
|
|
|
bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
|
|
|
|
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
|
2014-10-15 04:36:02 +08:00
|
|
|
unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
|
|
|
|
MachineMemOperand *MMO = nullptr);
|
2014-09-16 07:20:17 +08:00
|
|
|
bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
MachineMemOperand *MMO = nullptr);
|
2016-07-21 05:12:27 +08:00
|
|
|
bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
|
|
|
|
MachineMemOperand *MMO = nullptr);
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
|
|
|
|
unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
|
|
|
|
bool SetFlags = false, bool WantResult = true,
|
|
|
|
bool IsZExt = false);
|
2014-10-16 02:58:02 +08:00
|
|
|
unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
|
|
|
|
bool SetFlags = false, bool WantResult = true,
|
|
|
|
bool IsZExt = false);
|
2014-08-20 06:29:55 +08:00
|
|
|
unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
|
|
|
|
unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
|
|
|
|
unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
|
|
|
|
unsigned RHSReg, bool RHSIsKill,
|
|
|
|
AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
|
|
|
|
bool WantResult = true);
|
2014-09-04 09:29:18 +08:00
|
|
|
unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
|
|
|
|
const Value *RHS);
|
|
|
|
unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, uint64_t Imm);
|
|
|
|
unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
|
|
|
|
uint64_t ShiftImm);
|
|
|
|
unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
|
|
|
|
unsigned Op1, bool Op1IsKill);
|
|
|
|
unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
|
|
|
|
unsigned Op1, bool Op1IsKill);
|
|
|
|
unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
|
|
|
|
unsigned Op1, bool Op1IsKill);
|
2014-08-22 07:06:07 +08:00
|
|
|
unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
unsigned Op1Reg, bool Op1IsKill);
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
uint64_t Imm, bool IsZExt = true);
|
2014-08-22 07:06:07 +08:00
|
|
|
unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
unsigned Op1Reg, bool Op1IsKill);
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
uint64_t Imm, bool IsZExt = true);
|
2014-08-22 07:06:07 +08:00
|
|
|
unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
unsigned Op1Reg, bool Op1IsKill);
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
uint64_t Imm, bool IsZExt = false);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned materializeInt(const ConstantInt *CI, MVT VT);
|
|
|
|
unsigned materializeFP(const ConstantFP *CFP, MVT VT);
|
|
|
|
unsigned materializeGV(const GlobalValue *GV);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Call handling routines.
|
|
|
|
private:
|
|
|
|
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
|
2014-09-16 07:20:17 +08:00
|
|
|
bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned &NumBytes);
|
2014-09-16 07:20:17 +08:00
|
|
|
bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
public:
|
|
|
|
// Backend specific FastISel code.
|
2014-09-04 04:56:52 +08:00
|
|
|
unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
|
|
|
|
unsigned fastMaterializeConstant(const Constant *C) override;
|
|
|
|
unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-09-03 05:32:54 +08:00
|
|
|
explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
|
2015-01-30 09:10:24 +08:00
|
|
|
const TargetLibraryInfo *LibInfo)
|
2014-09-03 05:32:54 +08:00
|
|
|
: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
|
2015-01-30 09:10:24 +08:00
|
|
|
Subtarget =
|
|
|
|
&static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
|
2014-09-03 05:32:54 +08:00
|
|
|
Context = &FuncInfo.Fn->getContext();
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
bool fastSelectInstruction(const Instruction *I) override;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64GenFastISel.inc"
|
2014-03-29 18:18:08 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
#include "AArch64GenCallingConv.inc"
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Check if the sign-/zero-extend will be a noop.
|
2014-09-30 08:49:58 +08:00
|
|
|
static bool isIntExtFree(const Instruction *I) {
|
|
|
|
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
|
|
|
|
"Unexpected integer extend instruction.");
|
2014-10-07 11:39:59 +08:00
|
|
|
assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
|
|
|
|
"Unexpected value type.");
|
2014-09-30 08:49:58 +08:00
|
|
|
bool IsZExt = isa<ZExtInst>(I);
|
|
|
|
|
|
|
|
if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
|
|
|
|
if (LI->hasOneUse())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
|
|
|
|
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Determine the implicit scale factor that is applied by a memory
|
2014-09-30 08:49:54 +08:00
|
|
|
/// operation for a given value type.
|
|
|
|
static unsigned getImplicitScaleFactor(MVT VT) {
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default:
|
|
|
|
return 0; // invalid
|
|
|
|
case MVT::i1: // fall-through
|
|
|
|
case MVT::i8:
|
|
|
|
return 1;
|
|
|
|
case MVT::i16:
|
|
|
|
return 2;
|
|
|
|
case MVT::i32: // fall-through
|
|
|
|
case MVT::f32:
|
|
|
|
return 4;
|
|
|
|
case MVT::i64: // fall-through
|
|
|
|
case MVT::f64:
|
|
|
|
return 8;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
|
2014-03-29 18:18:08 +08:00
|
|
|
if (CC == CallingConv::WebKit_JS)
|
2014-05-24 20:50:23 +08:00
|
|
|
return CC_AArch64_WebKit_JS;
|
2015-01-20 01:40:05 +08:00
|
|
|
if (CC == CallingConv::GHC)
|
|
|
|
return CC_AArch64_GHC;
|
2014-05-24 20:50:23 +08:00
|
|
|
return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
|
2015-07-09 10:09:04 +08:00
|
|
|
assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
|
2014-03-29 18:18:08 +08:00
|
|
|
"Alloca should always return a pointer.");
|
|
|
|
|
|
|
|
// Don't handle dynamic allocas.
|
|
|
|
if (!FuncInfo.StaticAllocaMap.count(AI))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
DenseMap<const AllocaInst *, int>::iterator SI =
|
|
|
|
FuncInfo.StaticAllocaMap.find(AI);
|
|
|
|
|
|
|
|
if (SI != FuncInfo.StaticAllocaMap.end()) {
|
2014-08-22 04:57:57 +08:00
|
|
|
unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
|
2014-03-29 18:18:08 +08:00
|
|
|
ResultReg)
|
|
|
|
.addFrameIndex(SI->second)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
|
2014-08-16 02:55:52 +08:00
|
|
|
if (VT > MVT::i64)
|
|
|
|
return 0;
|
2014-08-20 03:44:02 +08:00
|
|
|
|
|
|
|
if (!CI->isZero())
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
|
2014-08-20 03:44:02 +08:00
|
|
|
|
|
|
|
// Create a copy from the zero register to materialize a "0" value.
|
|
|
|
const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
|
|
|
|
: &AArch64::GPR32RegClass;
|
|
|
|
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
2014-08-22 04:57:57 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
|
|
|
|
ResultReg).addReg(ZeroReg, getKillRegState(true));
|
2014-08-20 03:44:02 +08:00
|
|
|
return ResultReg;
|
2014-08-16 02:55:52 +08:00
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
|
2014-08-26 03:58:05 +08:00
|
|
|
// Positive zero (+0.0) has to be materialized with a fmov from the zero
|
|
|
|
// register, because the immediate version of fmov cannot encode zero.
|
|
|
|
if (CFP->isNullValue())
|
2014-09-04 04:56:52 +08:00
|
|
|
return fastMaterializeFloatZero(CFP);
|
2014-08-26 03:58:05 +08:00
|
|
|
|
2014-04-30 23:29:57 +08:00
|
|
|
if (VT != MVT::f32 && VT != MVT::f64)
|
|
|
|
return 0;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
const APFloat Val = CFP->getValueAPF();
|
2014-08-16 02:55:52 +08:00
|
|
|
bool Is64Bit = (VT == MVT::f64);
|
2014-03-29 18:18:08 +08:00
|
|
|
// This checks to see if we can use FMOV instructions to materialize
|
|
|
|
// a constant, otherwise we have to materialize via the constant pool.
|
|
|
|
if (TLI.isFPImmLegal(Val, VT)) {
|
2014-08-26 03:58:05 +08:00
|
|
|
int Imm =
|
|
|
|
Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
|
|
|
|
assert((Imm != -1) && "Cannot encode floating-point constant.");
|
2014-08-16 02:55:52 +08:00
|
|
|
unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-12-11 03:43:32 +08:00
|
|
|
// For the MachO large code model materialize the FP constant in code.
|
|
|
|
if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
|
|
|
|
unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
|
|
|
|
const TargetRegisterClass *RC = Is64Bit ?
|
|
|
|
&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
|
|
|
|
unsigned TmpReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
|
|
|
|
.addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
|
|
|
|
|
|
|
|
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(TmpReg, getKillRegState(true));
|
|
|
|
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Materialize via constant pool. MachineConstantPool wants an explicit
|
|
|
|
// alignment.
|
|
|
|
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
|
|
|
|
if (Align == 0)
|
|
|
|
Align = DL.getTypeAllocSize(CFP->getType());
|
|
|
|
|
2014-08-16 02:55:52 +08:00
|
|
|
unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
|
2014-08-26 03:58:05 +08:00
|
|
|
ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-08-16 02:55:52 +08:00
|
|
|
unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
|
2014-08-26 03:58:05 +08:00
|
|
|
.addReg(ADRPReg)
|
|
|
|
.addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
2014-03-29 18:18:08 +08:00
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
|
2014-05-29 02:15:43 +08:00
|
|
|
// We can't handle thread-local variables quickly yet.
|
|
|
|
if (GV->isThreadLocal())
|
|
|
|
return 0;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-05-25 03:45:41 +08:00
|
|
|
// MachO still uses GOT for large code-model accesses, but ELF requires
|
|
|
|
// movz/movk sequences, which FastISel doesn't handle yet.
|
2017-04-05 03:51:53 +08:00
|
|
|
if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
|
2014-05-25 03:45:41 +08:00
|
|
|
return 0;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
|
2014-03-29 18:18:08 +08:00
|
|
|
if (!DestEVT.isSimple())
|
|
|
|
return 0;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
|
2014-04-15 21:59:53 +08:00
|
|
|
unsigned ResultReg;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
if (OpFlags & AArch64II::MO_GOT) {
|
2014-03-29 18:18:08 +08:00
|
|
|
// ADRP + LDRX
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
|
2014-03-29 18:18:08 +08:00
|
|
|
ADRPReg)
|
2018-01-31 03:50:51 +08:00
|
|
|
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
|
2014-04-15 21:59:53 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
ResultReg = createResultReg(&AArch64::GPR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
|
2014-03-29 18:18:08 +08:00
|
|
|
ResultReg)
|
2018-01-31 03:50:51 +08:00
|
|
|
.addReg(ADRPReg)
|
|
|
|
.addGlobalAddress(GV, 0,
|
|
|
|
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
|
2014-03-29 18:18:08 +08:00
|
|
|
} else {
|
|
|
|
// ADRP + ADDX
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
|
2014-08-16 02:55:52 +08:00
|
|
|
ADRPReg)
|
2018-01-31 03:50:51 +08:00
|
|
|
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
|
2014-04-15 21:59:53 +08:00
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
|
2014-03-29 18:18:08 +08:00
|
|
|
ResultReg)
|
2018-01-31 03:50:51 +08:00
|
|
|
.addReg(ADRPReg)
|
|
|
|
.addGlobalAddress(GV, 0,
|
|
|
|
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
|
|
|
|
.addImm(0);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT CEVT = TLI.getValueType(DL, C->getType(), true);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Only handle simple types.
|
|
|
|
if (!CEVT.isSimple())
|
|
|
|
return 0;
|
|
|
|
MVT VT = CEVT.getSimpleVT();
|
|
|
|
|
2014-08-16 02:55:52 +08:00
|
|
|
if (const auto *CI = dyn_cast<ConstantInt>(C))
|
2014-09-16 07:20:17 +08:00
|
|
|
return materializeInt(CI, VT);
|
2014-08-16 02:55:52 +08:00
|
|
|
else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
|
2014-09-16 07:20:17 +08:00
|
|
|
return materializeFP(CFP, VT);
|
2014-03-29 18:18:08 +08:00
|
|
|
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
|
2014-09-16 07:20:17 +08:00
|
|
|
return materializeGV(GV);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
|
2014-08-26 03:58:05 +08:00
|
|
|
assert(CFP->isNullValue() &&
|
|
|
|
"Floating-point constant is not a positive zero.");
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(CFP->getType(), VT))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (VT != MVT::f32 && VT != MVT::f64)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
bool Is64Bit = (VT == MVT::f64);
|
|
|
|
unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
|
|
|
|
unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
|
2014-08-26 03:58:05 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Check if the multiply is by a power-of-2 constant.
|
2014-09-18 03:19:31 +08:00
|
|
|
static bool isMulPowOf2(const Value *I) {
|
|
|
|
if (const auto *MI = dyn_cast<MulOperator>(I)) {
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
|
|
|
|
if (C->getValue().isPowerOf2())
|
|
|
|
return true;
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
|
|
|
|
if (C->getValue().isPowerOf2())
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Computes the address to get to an object.
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
|
2014-08-28 07:09:40 +08:00
|
|
|
{
|
2014-04-25 13:30:21 +08:00
|
|
|
const User *U = nullptr;
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned Opcode = Instruction::UserOp1;
|
|
|
|
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
|
|
|
|
// Don't walk into other basic blocks unless the object is an alloca from
|
|
|
|
// another block, otherwise it may not have a virtual register assigned.
|
|
|
|
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
|
|
|
|
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
|
|
|
|
Opcode = I->getOpcode();
|
|
|
|
U = I;
|
|
|
|
}
|
|
|
|
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
|
|
|
|
Opcode = C->getOpcode();
|
|
|
|
U = C;
|
|
|
|
}
|
|
|
|
|
2015-08-02 06:20:21 +08:00
|
|
|
if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
|
2014-03-29 18:18:08 +08:00
|
|
|
if (Ty->getAddressSpace() > 255)
|
|
|
|
// Fast instruction selection doesn't support the special
|
|
|
|
// address spaces.
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (Opcode) {
|
|
|
|
default:
|
|
|
|
break;
|
2017-01-25 08:29:26 +08:00
|
|
|
case Instruction::BitCast:
|
2014-03-29 18:18:08 +08:00
|
|
|
// Look through bitcasts.
|
2014-09-16 07:20:17 +08:00
|
|
|
return computeAddress(U->getOperand(0), Addr, Ty);
|
2017-01-25 08:29:26 +08:00
|
|
|
|
|
|
|
case Instruction::IntToPtr:
|
2014-03-29 18:18:08 +08:00
|
|
|
// Look past no-op inttoptrs.
|
2015-07-09 10:09:04 +08:00
|
|
|
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
|
|
|
|
TLI.getPointerTy(DL))
|
2014-09-16 07:20:17 +08:00
|
|
|
return computeAddress(U->getOperand(0), Addr, Ty);
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2017-01-25 08:29:26 +08:00
|
|
|
|
|
|
|
case Instruction::PtrToInt:
|
2014-08-28 07:09:40 +08:00
|
|
|
// Look past no-op ptrtoints.
|
2015-07-09 10:09:04 +08:00
|
|
|
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
|
2014-09-16 07:20:17 +08:00
|
|
|
return computeAddress(U->getOperand(0), Addr, Ty);
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
2017-01-25 08:29:26 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
case Instruction::GetElementPtr: {
|
|
|
|
Address SavedAddr = Addr;
|
|
|
|
uint64_t TmpOffset = Addr.getOffset();
|
|
|
|
|
|
|
|
// Iterate through the GEP folding the constants into offsets where
|
|
|
|
// we can.
|
2016-01-20 08:26:52 +08:00
|
|
|
for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
|
|
|
|
GTI != E; ++GTI) {
|
|
|
|
const Value *Op = GTI.getOperand();
|
2016-12-02 10:24:42 +08:00
|
|
|
if (StructType *STy = GTI.getStructTypeOrNull()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
const StructLayout *SL = DL.getStructLayout(STy);
|
|
|
|
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
|
|
|
|
TmpOffset += SL->getElementOffset(Idx);
|
|
|
|
} else {
|
|
|
|
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
|
2017-01-25 08:29:26 +08:00
|
|
|
while (true) {
|
2014-03-29 18:18:08 +08:00
|
|
|
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
|
|
|
|
// Constant-offset addressing.
|
|
|
|
TmpOffset += CI->getSExtValue() * S;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (canFoldAddIntoGEP(U, Op)) {
|
|
|
|
// A compatible add with a constant operand. Fold the constant.
|
|
|
|
ConstantInt *CI =
|
|
|
|
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
|
|
|
|
TmpOffset += CI->getSExtValue() * S;
|
|
|
|
// Iterate on the other operand.
|
|
|
|
Op = cast<AddOperator>(Op)->getOperand(0);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
// Unsupported
|
|
|
|
goto unsupported_gep;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to grab the base operand now.
|
|
|
|
Addr.setOffset(TmpOffset);
|
2014-09-16 07:20:17 +08:00
|
|
|
if (computeAddress(U->getOperand(0), Addr, Ty))
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
|
|
|
|
// We failed, restore everything and try the other options.
|
|
|
|
Addr = SavedAddr;
|
|
|
|
|
|
|
|
unsupported_gep:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Instruction::Alloca: {
|
|
|
|
const AllocaInst *AI = cast<AllocaInst>(Obj);
|
|
|
|
DenseMap<const AllocaInst *, int>::iterator SI =
|
|
|
|
FuncInfo.StaticAllocaMap.find(AI);
|
|
|
|
if (SI != FuncInfo.StaticAllocaMap.end()) {
|
|
|
|
Addr.setKind(Address::FrameIndexBase);
|
|
|
|
Addr.setFI(SI->second);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
case Instruction::Add: {
|
2014-08-02 03:40:16 +08:00
|
|
|
// Adds of constants are common and easy enough.
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
const Value *LHS = U->getOperand(0);
|
|
|
|
const Value *RHS = U->getOperand(1);
|
|
|
|
|
|
|
|
if (isa<ConstantInt>(LHS))
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
|
|
|
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
|
2014-10-07 11:40:03 +08:00
|
|
|
Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
|
2014-09-16 07:20:17 +08:00
|
|
|
return computeAddress(LHS, Addr, Ty);
|
2014-08-02 03:40:16 +08:00
|
|
|
}
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
|
|
|
Address Backup = Addr;
|
2014-09-16 07:20:17 +08:00
|
|
|
if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
return true;
|
|
|
|
Addr = Backup;
|
|
|
|
|
2014-08-02 03:40:16 +08:00
|
|
|
break;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2014-10-07 11:40:03 +08:00
|
|
|
case Instruction::Sub: {
|
|
|
|
// Subs of constants are common and easy enough.
|
|
|
|
const Value *LHS = U->getOperand(0);
|
|
|
|
const Value *RHS = U->getOperand(1);
|
|
|
|
|
|
|
|
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
|
|
|
|
Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
|
|
|
|
return computeAddress(LHS, Addr, Ty);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2014-09-20 06:23:46 +08:00
|
|
|
case Instruction::Shl: {
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
if (Addr.getOffsetReg())
|
|
|
|
break;
|
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
|
|
|
|
if (!CI)
|
|
|
|
break;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
unsigned Val = CI->getZExtValue();
|
|
|
|
if (Val < 1 || Val > 3)
|
|
|
|
break;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
uint64_t NumBytes = 0;
|
|
|
|
if (Ty && Ty->isSized()) {
|
|
|
|
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
|
|
|
|
NumBytes = NumBits / 8;
|
|
|
|
if (!isPowerOf2_64(NumBits))
|
|
|
|
NumBytes = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NumBytes != (1ULL << Val))
|
|
|
|
break;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
Addr.setShift(Val);
|
|
|
|
Addr.setExtendType(AArch64_AM::LSL);
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
const Value *Src = U->getOperand(0);
|
2015-05-08 03:21:36 +08:00
|
|
|
if (const auto *I = dyn_cast<Instruction>(Src)) {
|
|
|
|
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
|
|
|
|
// Fold the zext or sext when it won't become a noop.
|
|
|
|
if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
|
|
|
|
if (!isIntExtFree(ZE) &&
|
|
|
|
ZE->getOperand(0)->getType()->isIntegerTy(32)) {
|
|
|
|
Addr.setExtendType(AArch64_AM::UXTW);
|
|
|
|
Src = ZE->getOperand(0);
|
|
|
|
}
|
|
|
|
} else if (const auto *SE = dyn_cast<SExtInst>(I)) {
|
|
|
|
if (!isIntExtFree(SE) &&
|
|
|
|
SE->getOperand(0)->getType()->isIntegerTy(32)) {
|
|
|
|
Addr.setExtendType(AArch64_AM::SXTW);
|
|
|
|
Src = SE->getOperand(0);
|
|
|
|
}
|
|
|
|
}
|
2014-09-30 08:49:58 +08:00
|
|
|
}
|
|
|
|
}
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
if (const auto *AI = dyn_cast<BinaryOperator>(Src))
|
|
|
|
if (AI->getOpcode() == Instruction::And) {
|
|
|
|
const Value *LHS = AI->getOperand(0);
|
|
|
|
const Value *RHS = AI->getOperand(1);
|
2014-09-18 13:40:41 +08:00
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(LHS))
|
|
|
|
if (C->getValue() == 0xffffffff)
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
|
|
|
if (C->getValue() == 0xffffffff) {
|
|
|
|
Addr.setExtendType(AArch64_AM::UXTW);
|
|
|
|
unsigned Reg = getRegForValue(LHS);
|
|
|
|
if (!Reg)
|
|
|
|
return false;
|
|
|
|
bool RegIsKill = hasTrivialKill(LHS);
|
|
|
|
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
|
|
|
|
AArch64::sub_32);
|
|
|
|
Addr.setOffsetReg(Reg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Reg = getRegForValue(Src);
|
|
|
|
if (!Reg)
|
|
|
|
return false;
|
|
|
|
Addr.setOffsetReg(Reg);
|
|
|
|
return true;
|
2014-09-20 06:23:46 +08:00
|
|
|
}
|
2014-09-18 03:19:31 +08:00
|
|
|
case Instruction::Mul: {
|
|
|
|
if (Addr.getOffsetReg())
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (!isMulPowOf2(U))
|
|
|
|
break;
|
|
|
|
|
|
|
|
const Value *LHS = U->getOperand(0);
|
|
|
|
const Value *RHS = U->getOperand(1);
|
|
|
|
|
|
|
|
// Canonicalize power-of-2 value to the RHS.
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(LHS))
|
|
|
|
if (C->getValue().isPowerOf2())
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
|
|
|
assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
|
|
|
|
const auto *C = cast<ConstantInt>(RHS);
|
|
|
|
unsigned Val = C->getValue().logBase2();
|
|
|
|
if (Val < 1 || Val > 3)
|
|
|
|
break;
|
|
|
|
|
|
|
|
uint64_t NumBytes = 0;
|
|
|
|
if (Ty && Ty->isSized()) {
|
|
|
|
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
|
|
|
|
NumBytes = NumBits / 8;
|
|
|
|
if (!isPowerOf2_64(NumBits))
|
|
|
|
NumBytes = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NumBytes != (1ULL << Val))
|
|
|
|
break;
|
|
|
|
|
|
|
|
Addr.setShift(Val);
|
|
|
|
Addr.setExtendType(AArch64_AM::LSL);
|
|
|
|
|
2014-09-20 06:23:46 +08:00
|
|
|
const Value *Src = LHS;
|
2015-05-08 03:21:36 +08:00
|
|
|
if (const auto *I = dyn_cast<Instruction>(Src)) {
|
|
|
|
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
|
|
|
|
// Fold the zext or sext when it won't become a noop.
|
|
|
|
if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
|
|
|
|
if (!isIntExtFree(ZE) &&
|
|
|
|
ZE->getOperand(0)->getType()->isIntegerTy(32)) {
|
|
|
|
Addr.setExtendType(AArch64_AM::UXTW);
|
|
|
|
Src = ZE->getOperand(0);
|
|
|
|
}
|
|
|
|
} else if (const auto *SE = dyn_cast<SExtInst>(I)) {
|
|
|
|
if (!isIntExtFree(SE) &&
|
|
|
|
SE->getOperand(0)->getType()->isIntegerTy(32)) {
|
|
|
|
Addr.setExtendType(AArch64_AM::SXTW);
|
|
|
|
Src = SE->getOperand(0);
|
|
|
|
}
|
|
|
|
}
|
2014-09-18 03:19:31 +08:00
|
|
|
}
|
2014-09-20 06:23:46 +08:00
|
|
|
}
|
2014-09-18 03:19:31 +08:00
|
|
|
|
2014-09-20 06:23:46 +08:00
|
|
|
unsigned Reg = getRegForValue(Src);
|
2014-09-18 03:19:31 +08:00
|
|
|
if (!Reg)
|
|
|
|
return false;
|
|
|
|
Addr.setOffsetReg(Reg);
|
|
|
|
return true;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
}
|
2014-09-18 13:40:41 +08:00
|
|
|
case Instruction::And: {
|
|
|
|
if (Addr.getOffsetReg())
|
|
|
|
break;
|
|
|
|
|
2014-12-10 03:44:38 +08:00
|
|
|
if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
|
2014-09-18 13:40:41 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
const Value *LHS = U->getOperand(0);
|
|
|
|
const Value *RHS = U->getOperand(1);
|
|
|
|
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(LHS))
|
|
|
|
if (C->getValue() == 0xffffffff)
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
2014-09-20 06:23:46 +08:00
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
2014-09-18 13:40:41 +08:00
|
|
|
if (C->getValue() == 0xffffffff) {
|
|
|
|
Addr.setShift(0);
|
|
|
|
Addr.setExtendType(AArch64_AM::LSL);
|
|
|
|
Addr.setExtendType(AArch64_AM::UXTW);
|
|
|
|
|
|
|
|
unsigned Reg = getRegForValue(LHS);
|
|
|
|
if (!Reg)
|
|
|
|
return false;
|
|
|
|
bool RegIsKill = hasTrivialKill(LHS);
|
|
|
|
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
|
|
|
|
AArch64::sub_32);
|
|
|
|
Addr.setOffsetReg(Reg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2014-10-07 11:40:06 +08:00
|
|
|
case Instruction::SExt:
|
|
|
|
case Instruction::ZExt: {
|
|
|
|
if (!Addr.getReg() || Addr.getOffsetReg())
|
|
|
|
break;
|
|
|
|
|
|
|
|
const Value *Src = nullptr;
|
|
|
|
// Fold the zext or sext when it won't become a noop.
|
|
|
|
if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
|
|
|
|
if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
|
|
|
|
Addr.setExtendType(AArch64_AM::UXTW);
|
|
|
|
Src = ZE->getOperand(0);
|
|
|
|
}
|
|
|
|
} else if (const auto *SE = dyn_cast<SExtInst>(U)) {
|
|
|
|
if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
|
|
|
|
Addr.setExtendType(AArch64_AM::SXTW);
|
|
|
|
Src = SE->getOperand(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Src)
|
|
|
|
break;
|
|
|
|
|
|
|
|
Addr.setShift(0);
|
|
|
|
unsigned Reg = getRegForValue(Src);
|
|
|
|
if (!Reg)
|
|
|
|
return false;
|
|
|
|
Addr.setOffsetReg(Reg);
|
|
|
|
return true;
|
|
|
|
}
|
2014-09-18 03:19:31 +08:00
|
|
|
} // end switch
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-10-28 02:21:58 +08:00
|
|
|
if (Addr.isRegBase() && !Addr.getReg()) {
|
|
|
|
unsigned Reg = getRegForValue(Obj);
|
|
|
|
if (!Reg)
|
|
|
|
return false;
|
|
|
|
Addr.setReg(Reg);
|
|
|
|
return true;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-10-28 02:21:58 +08:00
|
|
|
if (!Addr.getOffsetReg()) {
|
|
|
|
unsigned Reg = getRegForValue(Obj);
|
|
|
|
if (!Reg)
|
|
|
|
return false;
|
|
|
|
Addr.setOffsetReg(Reg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
|
2014-07-31 12:10:40 +08:00
|
|
|
const User *U = nullptr;
|
|
|
|
unsigned Opcode = Instruction::UserOp1;
|
|
|
|
bool InMBB = true;
|
|
|
|
|
|
|
|
if (const auto *I = dyn_cast<Instruction>(V)) {
|
|
|
|
Opcode = I->getOpcode();
|
|
|
|
U = I;
|
|
|
|
InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
|
|
|
|
} else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
|
|
|
|
Opcode = C->getOpcode();
|
|
|
|
U = C;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (Opcode) {
|
|
|
|
default: break;
|
|
|
|
case Instruction::BitCast:
|
|
|
|
// Look past bitcasts if its operand is in the same BB.
|
|
|
|
if (InMBB)
|
2014-09-16 07:20:17 +08:00
|
|
|
return computeCallAddress(U->getOperand(0), Addr);
|
2014-07-31 12:10:40 +08:00
|
|
|
break;
|
|
|
|
case Instruction::IntToPtr:
|
|
|
|
// Look past no-op inttoptrs if its operand is in the same BB.
|
|
|
|
if (InMBB &&
|
2015-07-09 10:09:04 +08:00
|
|
|
TLI.getValueType(DL, U->getOperand(0)->getType()) ==
|
|
|
|
TLI.getPointerTy(DL))
|
2014-09-16 07:20:17 +08:00
|
|
|
return computeCallAddress(U->getOperand(0), Addr);
|
2014-07-31 12:10:40 +08:00
|
|
|
break;
|
|
|
|
case Instruction::PtrToInt:
|
|
|
|
// Look past no-op ptrtoints if its operand is in the same BB.
|
2015-07-09 10:09:04 +08:00
|
|
|
if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
|
2014-09-16 07:20:17 +08:00
|
|
|
return computeCallAddress(U->getOperand(0), Addr);
|
2014-07-31 12:10:40 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
|
|
|
|
Addr.setGlobalValue(GV);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If all else fails, try to materialize the value in a register.
|
|
|
|
if (!Addr.getGlobalValue()) {
|
|
|
|
Addr.setReg(getRegForValue(V));
|
|
|
|
return Addr.getReg() != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT evt = TLI.getValueType(DL, Ty, true);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Only handle simple types.
|
|
|
|
if (evt == MVT::Other || !evt.isSimple())
|
|
|
|
return false;
|
|
|
|
VT = evt.getSimpleVT();
|
|
|
|
|
2014-04-30 23:29:57 +08:00
|
|
|
// This is a legal type, but it's not something we handle in fast-isel.
|
|
|
|
if (VT == MVT::f128)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Handle all other legal types, i.e. a register that will directly hold this
|
2014-03-29 18:18:08 +08:00
|
|
|
// value.
|
|
|
|
return TLI.isTypeLegal(VT);
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Determine if the value type is supported by FastISel.
|
2014-09-03 06:33:53 +08:00
|
|
|
///
|
|
|
|
/// FastISel for AArch64 can handle more value types than are legal. This adds
|
|
|
|
/// simple value type such as i1, i8, and i16.
|
2014-09-16 05:27:54 +08:00
|
|
|
bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
|
|
|
|
if (Ty->isVectorTy() && !IsVectorAllowed)
|
2014-09-03 06:33:53 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
if (isTypeLegal(Ty, VT))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// If this is a type than can be sign or zero-extended to a basic operation
|
|
|
|
// go ahead and accept it now.
|
|
|
|
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-08-29 08:19:21 +08:00
|
|
|
bool AArch64FastISel::isValueAvailable(const Value *V) const {
|
|
|
|
if (!isa<Instruction>(V))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
const auto *I = cast<Instruction>(V);
|
2016-03-01 06:50:49 +08:00
|
|
|
return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
|
2014-08-29 08:19:21 +08:00
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
|
2014-09-30 08:49:54 +08:00
|
|
|
unsigned ScaleFactor = getImplicitScaleFactor(VT);
|
|
|
|
if (!ScaleFactor)
|
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
bool ImmediateOffsetNeedsLowering = false;
|
|
|
|
bool RegisterOffsetNeedsLowering = false;
|
|
|
|
int64_t Offset = Addr.getOffset();
|
|
|
|
if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
|
|
|
|
ImmediateOffsetNeedsLowering = true;
|
|
|
|
else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
|
|
|
|
!isUInt<12>(Offset / ScaleFactor))
|
|
|
|
ImmediateOffsetNeedsLowering = true;
|
|
|
|
|
|
|
|
// Cannot encode an offset register and an immediate offset in the same
|
|
|
|
// instruction. Fold the immediate offset into the load/store instruction and
|
2015-08-09 02:27:36 +08:00
|
|
|
// emit an additional add to take care of the offset register.
|
2014-10-28 02:21:58 +08:00
|
|
|
if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
RegisterOffsetNeedsLowering = true;
|
|
|
|
|
2014-08-28 05:38:33 +08:00
|
|
|
// Cannot encode zero register as base.
|
|
|
|
if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
|
|
|
|
RegisterOffsetNeedsLowering = true;
|
|
|
|
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
// If this is a stack pointer and the offset needs to be simplified then put
|
2014-06-10 17:52:44 +08:00
|
|
|
// the alloca address into a register, set the base type back to register and
|
|
|
|
// continue. This should almost never happen.
|
2014-10-28 02:21:58 +08:00
|
|
|
if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
|
|
|
|
{
|
2014-08-22 04:57:57 +08:00
|
|
|
unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
|
2014-06-10 17:52:44 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
|
|
|
|
ResultReg)
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
.addFrameIndex(Addr.getFI())
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
2014-06-10 17:52:44 +08:00
|
|
|
Addr.setKind(Address::RegBase);
|
|
|
|
Addr.setReg(ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
if (RegisterOffsetNeedsLowering) {
|
|
|
|
unsigned ResultReg = 0;
|
2014-08-27 08:58:30 +08:00
|
|
|
if (Addr.getReg()) {
|
|
|
|
if (Addr.getExtendType() == AArch64_AM::SXTW ||
|
|
|
|
Addr.getExtendType() == AArch64_AM::UXTW )
|
|
|
|
ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
|
|
|
|
/*TODO:IsKill=*/false, Addr.getOffsetReg(),
|
|
|
|
/*TODO:IsKill=*/false, Addr.getExtendType(),
|
|
|
|
Addr.getShift());
|
|
|
|
else
|
|
|
|
ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
|
|
|
|
/*TODO:IsKill=*/false, Addr.getOffsetReg(),
|
|
|
|
/*TODO:IsKill=*/false, AArch64_AM::LSL,
|
|
|
|
Addr.getShift());
|
|
|
|
} else {
|
|
|
|
if (Addr.getExtendType() == AArch64_AM::UXTW)
|
|
|
|
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
|
|
|
|
/*Op0IsKill=*/false, Addr.getShift(),
|
|
|
|
/*IsZExt=*/true);
|
|
|
|
else if (Addr.getExtendType() == AArch64_AM::SXTW)
|
|
|
|
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
|
|
|
|
/*Op0IsKill=*/false, Addr.getShift(),
|
|
|
|
/*IsZExt=*/false);
|
|
|
|
else
|
|
|
|
ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
|
|
|
|
/*Op0IsKill=*/false, Addr.getShift());
|
|
|
|
}
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Addr.setReg(ResultReg);
|
|
|
|
Addr.setOffsetReg(0);
|
|
|
|
Addr.setShift(0);
|
2014-08-27 08:58:30 +08:00
|
|
|
Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Since the offset is too large for the load/store instruction get the
|
|
|
|
// reg+offset into a register.
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
if (ImmediateOffsetNeedsLowering) {
|
2014-09-18 15:04:49 +08:00
|
|
|
unsigned ResultReg;
|
2014-10-16 02:58:02 +08:00
|
|
|
if (Addr.getReg())
|
2014-09-18 13:40:47 +08:00
|
|
|
// Try to fold the immediate into the add instruction.
|
2014-10-16 02:58:02 +08:00
|
|
|
ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
|
|
|
|
else
|
2014-09-04 04:56:59 +08:00
|
|
|
ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
|
|
|
if (!ResultReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
Addr.setReg(ResultReg);
|
|
|
|
Addr.setOffset(0);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
void AArch64FastISel::addLoadStoreOperands(Address &Addr,
|
2014-05-24 20:50:23 +08:00
|
|
|
const MachineInstrBuilder &MIB,
|
2016-07-16 02:26:59 +08:00
|
|
|
MachineMemOperand::Flags Flags,
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
unsigned ScaleFactor,
|
|
|
|
MachineMemOperand *MMO) {
|
|
|
|
int64_t Offset = Addr.getOffset() / ScaleFactor;
|
2014-03-29 18:18:08 +08:00
|
|
|
// Frame base works a bit differently. Handle it separately.
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
if (Addr.isFIBase()) {
|
2014-03-29 18:18:08 +08:00
|
|
|
int FI = Addr.getFI();
|
|
|
|
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
|
|
|
|
// and alignment should be based on the VT.
|
2014-08-09 01:24:10 +08:00
|
|
|
MMO = FuncInfo.MF->getMachineMemOperand(
|
2015-08-12 07:09:45 +08:00
|
|
|
MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
|
|
|
|
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
|
2014-03-29 18:18:08 +08:00
|
|
|
// Now add the rest of the operands.
|
2014-08-09 01:24:10 +08:00
|
|
|
MIB.addFrameIndex(FI).addImm(Offset);
|
2014-03-29 18:18:08 +08:00
|
|
|
} else {
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
assert(Addr.isRegBase() && "Unexpected address kind.");
|
2014-08-22 04:57:57 +08:00
|
|
|
const MCInstrDesc &II = MIB->getDesc();
|
|
|
|
unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
|
|
|
|
Addr.setReg(
|
|
|
|
constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
|
|
|
|
Addr.setOffsetReg(
|
|
|
|
constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
if (Addr.getOffsetReg()) {
|
|
|
|
assert(Addr.getOffset() == 0 && "Unexpected offset");
|
|
|
|
bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
|
|
|
|
Addr.getExtendType() == AArch64_AM::SXTX;
|
|
|
|
MIB.addReg(Addr.getReg());
|
|
|
|
MIB.addReg(Addr.getOffsetReg());
|
|
|
|
MIB.addImm(IsSigned);
|
|
|
|
MIB.addImm(Addr.getShift() != 0);
|
2014-10-28 02:21:58 +08:00
|
|
|
} else
|
|
|
|
MIB.addReg(Addr.getReg()).addImm(Offset);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2014-08-09 01:24:10 +08:00
|
|
|
|
|
|
|
if (MMO)
|
|
|
|
MIB.addMemOperand(MMO);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
|
|
|
|
const Value *RHS, bool SetFlags,
|
|
|
|
bool WantResult, bool IsZExt) {
|
2014-08-20 06:29:55 +08:00
|
|
|
AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
|
2014-08-21 00:34:15 +08:00
|
|
|
bool NeedExtend = false;
|
2014-08-20 06:29:55 +08:00
|
|
|
switch (RetVT.SimpleTy) {
|
2014-08-21 00:34:15 +08:00
|
|
|
default:
|
|
|
|
return 0;
|
2014-08-20 06:29:55 +08:00
|
|
|
case MVT::i1:
|
2014-08-21 00:34:15 +08:00
|
|
|
NeedExtend = true;
|
|
|
|
break;
|
2014-08-20 06:29:55 +08:00
|
|
|
case MVT::i8:
|
2014-08-21 00:34:15 +08:00
|
|
|
NeedExtend = true;
|
|
|
|
ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
|
2014-08-20 06:29:55 +08:00
|
|
|
break;
|
|
|
|
case MVT::i16:
|
2014-08-21 00:34:15 +08:00
|
|
|
NeedExtend = true;
|
|
|
|
ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
|
|
|
|
break;
|
|
|
|
case MVT::i32: // fall-through
|
|
|
|
case MVT::i64:
|
2014-08-20 06:29:55 +08:00
|
|
|
break;
|
|
|
|
}
|
2014-08-21 00:34:15 +08:00
|
|
|
MVT SrcVT = RetVT;
|
|
|
|
RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
|
2014-08-20 06:29:55 +08:00
|
|
|
|
|
|
|
// Canonicalize immediates to the RHS first.
|
2014-10-28 03:58:36 +08:00
|
|
|
if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
|
2014-08-20 06:29:55 +08:00
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
2014-09-18 03:51:38 +08:00
|
|
|
// Canonicalize mul by power of 2 to the RHS.
|
|
|
|
if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
|
|
|
|
if (isMulPowOf2(LHS))
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
2014-08-20 06:29:55 +08:00
|
|
|
// Canonicalize shift immediate to the RHS.
|
2014-09-18 03:51:38 +08:00
|
|
|
if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
|
2014-08-20 06:29:55 +08:00
|
|
|
if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
|
|
|
|
if (isa<ConstantInt>(SI->getOperand(1)))
|
|
|
|
if (SI->getOpcode() == Instruction::Shl ||
|
|
|
|
SI->getOpcode() == Instruction::LShr ||
|
|
|
|
SI->getOpcode() == Instruction::AShr )
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
if (!LHSReg)
|
|
|
|
return 0;
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
2014-08-21 00:34:15 +08:00
|
|
|
if (NeedExtend)
|
2014-09-16 07:20:17 +08:00
|
|
|
LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
|
2014-08-20 06:29:55 +08:00
|
|
|
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
|
|
|
|
uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
|
|
|
|
if (C->isNegative())
|
2014-09-03 09:38:36 +08:00
|
|
|
ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
|
|
|
|
SetFlags, WantResult);
|
2014-08-20 06:29:55 +08:00
|
|
|
else
|
2014-09-03 09:38:36 +08:00
|
|
|
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
|
|
|
|
WantResult);
|
2014-10-28 03:58:36 +08:00
|
|
|
} else if (const auto *C = dyn_cast<Constant>(RHS))
|
|
|
|
if (C->isNullValue())
|
|
|
|
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
|
|
|
|
WantResult);
|
|
|
|
|
2014-08-20 06:29:55 +08:00
|
|
|
if (ResultReg)
|
|
|
|
return ResultReg;
|
|
|
|
|
2014-08-21 00:34:15 +08:00
|
|
|
// Only extend the RHS within the instruction if there is a valid extend type.
|
2014-09-18 03:51:38 +08:00
|
|
|
if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
|
|
|
|
isValueAvailable(RHS)) {
|
2014-08-20 06:29:55 +08:00
|
|
|
if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
|
|
|
|
if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
|
|
|
|
unsigned RHSReg = getRegForValue(SI->getOperand(0));
|
|
|
|
if (!RHSReg)
|
|
|
|
return 0;
|
|
|
|
bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
|
2014-09-03 09:38:36 +08:00
|
|
|
return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
|
|
|
|
RHSIsKill, ExtendType, C->getZExtValue(),
|
|
|
|
SetFlags, WantResult);
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
|
|
|
if (!RHSReg)
|
|
|
|
return 0;
|
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
2014-09-03 09:38:36 +08:00
|
|
|
return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
|
|
|
|
ExtendType, 0, SetFlags, WantResult);
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
|
2014-09-18 03:51:38 +08:00
|
|
|
// Check if the mul can be folded into the instruction.
|
2015-08-20 04:52:55 +08:00
|
|
|
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
|
2014-09-18 03:51:38 +08:00
|
|
|
if (isMulPowOf2(RHS)) {
|
|
|
|
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
|
|
|
|
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
|
|
|
|
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
|
|
|
|
if (C->getValue().isPowerOf2())
|
|
|
|
std::swap(MulLHS, MulRHS);
|
|
|
|
|
|
|
|
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
|
|
|
|
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
|
|
|
|
unsigned RHSReg = getRegForValue(MulLHS);
|
|
|
|
if (!RHSReg)
|
|
|
|
return 0;
|
|
|
|
bool RHSIsKill = hasTrivialKill(MulLHS);
|
2015-08-20 04:52:55 +08:00
|
|
|
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
|
|
|
|
RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
|
|
|
|
WantResult);
|
|
|
|
if (ResultReg)
|
|
|
|
return ResultReg;
|
2014-09-18 03:51:38 +08:00
|
|
|
}
|
2015-08-20 04:52:55 +08:00
|
|
|
}
|
2014-09-18 03:51:38 +08:00
|
|
|
|
2014-08-20 06:29:55 +08:00
|
|
|
// Check if the shift can be folded into the instruction.
|
2015-08-20 04:52:55 +08:00
|
|
|
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
|
2014-08-29 08:19:21 +08:00
|
|
|
if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
|
|
|
|
AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
|
|
|
|
switch (SI->getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
|
|
|
|
case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
|
|
|
|
case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
|
|
|
|
}
|
|
|
|
uint64_t ShiftVal = C->getZExtValue();
|
|
|
|
if (ShiftType != AArch64_AM::InvalidShiftExtend) {
|
|
|
|
unsigned RHSReg = getRegForValue(SI->getOperand(0));
|
|
|
|
if (!RHSReg)
|
|
|
|
return 0;
|
|
|
|
bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
|
2015-08-20 04:52:55 +08:00
|
|
|
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
|
|
|
|
RHSIsKill, ShiftType, ShiftVal, SetFlags,
|
|
|
|
WantResult);
|
|
|
|
if (ResultReg)
|
|
|
|
return ResultReg;
|
2014-08-29 08:19:21 +08:00
|
|
|
}
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
}
|
2015-08-20 04:52:55 +08:00
|
|
|
}
|
2014-08-20 06:29:55 +08:00
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
|
|
|
if (!RHSReg)
|
|
|
|
return 0;
|
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
2014-08-21 00:34:15 +08:00
|
|
|
|
|
|
|
if (NeedExtend)
|
2014-09-16 07:20:17 +08:00
|
|
|
RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
|
2014-08-21 00:34:15 +08:00
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
|
|
|
|
SetFlags, WantResult);
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg,
|
|
|
|
bool RHSIsKill, bool SetFlags,
|
|
|
|
bool WantResult) {
|
2014-08-20 06:29:55 +08:00
|
|
|
assert(LHSReg && RHSReg && "Invalid register number.");
|
|
|
|
|
2017-06-13 04:49:53 +08:00
|
|
|
if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
|
|
|
|
RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
|
|
|
|
return 0;
|
|
|
|
|
2014-08-20 06:29:55 +08:00
|
|
|
if (RetVT != MVT::i32 && RetVT != MVT::i64)
|
|
|
|
return 0;
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
static const unsigned OpcTable[2][2][2] = {
|
|
|
|
{ { AArch64::SUBWrr, AArch64::SUBXrr },
|
|
|
|
{ AArch64::ADDWrr, AArch64::ADDXrr } },
|
|
|
|
{ { AArch64::SUBSWrr, AArch64::SUBSXrr },
|
|
|
|
{ AArch64::ADDSWrr, AArch64::ADDSXrr } }
|
2014-08-20 06:29:55 +08:00
|
|
|
};
|
2014-09-03 09:38:36 +08:00
|
|
|
bool Is64Bit = RetVT == MVT::i64;
|
|
|
|
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
|
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
2014-08-20 06:29:55 +08:00
|
|
|
unsigned ResultReg;
|
2014-09-03 09:38:36 +08:00
|
|
|
if (WantResult)
|
2014-08-22 04:57:57 +08:00
|
|
|
ResultReg = createResultReg(RC);
|
2014-09-03 09:38:36 +08:00
|
|
|
else
|
|
|
|
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
|
2014-08-20 06:29:55 +08:00
|
|
|
|
2014-08-22 04:57:57 +08:00
|
|
|
const MCInstrDesc &II = TII.get(Opc);
|
|
|
|
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
|
|
|
|
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
|
2014-08-20 06:29:55 +08:00
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill))
|
|
|
|
.addReg(RHSReg, getKillRegState(RHSIsKill));
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, uint64_t Imm,
|
|
|
|
bool SetFlags, bool WantResult) {
|
2014-08-20 06:29:55 +08:00
|
|
|
assert(LHSReg && "Invalid register number.");
|
|
|
|
|
|
|
|
if (RetVT != MVT::i32 && RetVT != MVT::i64)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
unsigned ShiftImm;
|
|
|
|
if (isUInt<12>(Imm))
|
|
|
|
ShiftImm = 0;
|
|
|
|
else if ((Imm & 0xfff000) == Imm) {
|
|
|
|
ShiftImm = 12;
|
|
|
|
Imm >>= 12;
|
|
|
|
} else
|
|
|
|
return 0;
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
static const unsigned OpcTable[2][2][2] = {
|
|
|
|
{ { AArch64::SUBWri, AArch64::SUBXri },
|
|
|
|
{ AArch64::ADDWri, AArch64::ADDXri } },
|
|
|
|
{ { AArch64::SUBSWri, AArch64::SUBSXri },
|
|
|
|
{ AArch64::ADDSWri, AArch64::ADDSXri } }
|
2014-08-20 06:29:55 +08:00
|
|
|
};
|
2014-09-03 09:38:36 +08:00
|
|
|
bool Is64Bit = RetVT == MVT::i64;
|
|
|
|
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
|
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
if (SetFlags)
|
|
|
|
RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
else
|
|
|
|
RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
|
2014-08-20 06:29:55 +08:00
|
|
|
unsigned ResultReg;
|
2014-09-03 09:38:36 +08:00
|
|
|
if (WantResult)
|
2014-08-22 04:57:57 +08:00
|
|
|
ResultReg = createResultReg(RC);
|
2014-09-03 09:38:36 +08:00
|
|
|
else
|
|
|
|
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
|
2014-08-20 06:29:55 +08:00
|
|
|
|
2014-08-22 04:57:57 +08:00
|
|
|
const MCInstrDesc &II = TII.get(Opc);
|
|
|
|
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
|
2014-08-20 06:29:55 +08:00
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill))
|
|
|
|
.addImm(Imm)
|
|
|
|
.addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg,
|
|
|
|
bool RHSIsKill,
|
2014-08-27 08:58:30 +08:00
|
|
|
AArch64_AM::ShiftExtendType ShiftType,
|
2014-09-03 09:38:36 +08:00
|
|
|
uint64_t ShiftImm, bool SetFlags,
|
|
|
|
bool WantResult) {
|
2014-08-20 06:29:55 +08:00
|
|
|
assert(LHSReg && RHSReg && "Invalid register number.");
|
2017-06-13 04:49:53 +08:00
|
|
|
assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
|
|
|
|
RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
|
2014-08-20 06:29:55 +08:00
|
|
|
|
|
|
|
if (RetVT != MVT::i32 && RetVT != MVT::i64)
|
|
|
|
return 0;
|
|
|
|
|
2015-08-20 04:52:55 +08:00
|
|
|
// Don't deal with undefined shifts.
|
|
|
|
if (ShiftImm >= RetVT.getSizeInBits())
|
|
|
|
return 0;
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
static const unsigned OpcTable[2][2][2] = {
|
|
|
|
{ { AArch64::SUBWrs, AArch64::SUBXrs },
|
|
|
|
{ AArch64::ADDWrs, AArch64::ADDXrs } },
|
|
|
|
{ { AArch64::SUBSWrs, AArch64::SUBSXrs },
|
|
|
|
{ AArch64::ADDSWrs, AArch64::ADDSXrs } }
|
2014-08-20 06:29:55 +08:00
|
|
|
};
|
2014-09-03 09:38:36 +08:00
|
|
|
bool Is64Bit = RetVT == MVT::i64;
|
|
|
|
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
|
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
2014-08-20 06:29:55 +08:00
|
|
|
unsigned ResultReg;
|
2014-09-03 09:38:36 +08:00
|
|
|
if (WantResult)
|
2014-08-22 04:57:57 +08:00
|
|
|
ResultReg = createResultReg(RC);
|
2014-09-03 09:38:36 +08:00
|
|
|
else
|
|
|
|
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
|
2014-08-20 06:29:55 +08:00
|
|
|
|
2014-08-22 04:57:57 +08:00
|
|
|
const MCInstrDesc &II = TII.get(Opc);
|
|
|
|
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
|
|
|
|
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
|
2014-08-20 06:29:55 +08:00
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill))
|
|
|
|
.addReg(RHSReg, getKillRegState(RHSIsKill))
|
|
|
|
.addImm(getShifterImm(ShiftType, ShiftImm));
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg,
|
|
|
|
bool RHSIsKill,
|
2014-08-27 08:58:30 +08:00
|
|
|
AArch64_AM::ShiftExtendType ExtType,
|
2014-09-03 09:38:36 +08:00
|
|
|
uint64_t ShiftImm, bool SetFlags,
|
|
|
|
bool WantResult) {
|
2014-08-27 08:58:30 +08:00
|
|
|
assert(LHSReg && RHSReg && "Invalid register number.");
|
2017-06-13 04:49:53 +08:00
|
|
|
assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
|
|
|
|
RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
|
2014-08-27 08:58:30 +08:00
|
|
|
|
|
|
|
if (RetVT != MVT::i32 && RetVT != MVT::i64)
|
|
|
|
return 0;
|
|
|
|
|
2015-08-20 04:52:55 +08:00
|
|
|
if (ShiftImm >= 4)
|
|
|
|
return 0;
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
static const unsigned OpcTable[2][2][2] = {
|
|
|
|
{ { AArch64::SUBWrx, AArch64::SUBXrx },
|
|
|
|
{ AArch64::ADDWrx, AArch64::ADDXrx } },
|
|
|
|
{ { AArch64::SUBSWrx, AArch64::SUBSXrx },
|
|
|
|
{ AArch64::ADDSWrx, AArch64::ADDSXrx } }
|
2014-08-27 08:58:30 +08:00
|
|
|
};
|
2014-09-03 09:38:36 +08:00
|
|
|
bool Is64Bit = RetVT == MVT::i64;
|
|
|
|
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
|
|
|
|
const TargetRegisterClass *RC = nullptr;
|
|
|
|
if (SetFlags)
|
|
|
|
RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
else
|
|
|
|
RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
|
2014-08-20 06:29:55 +08:00
|
|
|
unsigned ResultReg;
|
2014-09-03 09:38:36 +08:00
|
|
|
if (WantResult)
|
2014-08-22 04:57:57 +08:00
|
|
|
ResultReg = createResultReg(RC);
|
2014-09-03 09:38:36 +08:00
|
|
|
else
|
|
|
|
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
|
2014-08-20 06:29:55 +08:00
|
|
|
|
2014-08-22 04:57:57 +08:00
|
|
|
const MCInstrDesc &II = TII.get(Opc);
|
|
|
|
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
|
|
|
|
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
|
2014-08-20 06:29:55 +08:00
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill))
|
|
|
|
.addReg(RHSReg, getKillRegState(RHSIsKill))
|
|
|
|
.addImm(getArithExtendImm(ExtType, ShiftImm));
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
|
|
|
|
Type *Ty = LHS->getType();
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT EVT = TLI.getValueType(DL, Ty, true);
|
2014-08-20 06:29:55 +08:00
|
|
|
if (!EVT.isSimple())
|
|
|
|
return false;
|
|
|
|
MVT VT = EVT.getSimpleVT();
|
|
|
|
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
case MVT::i64:
|
|
|
|
return emitICmp(VT, LHS, RHS, IsZExt);
|
|
|
|
case MVT::f32:
|
|
|
|
case MVT::f64:
|
|
|
|
return emitFCmp(VT, LHS, RHS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
|
|
|
|
bool IsZExt) {
|
2014-09-03 09:38:36 +08:00
|
|
|
return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
|
|
|
|
IsZExt) != 0;
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
|
|
|
|
uint64_t Imm) {
|
2014-09-03 09:38:36 +08:00
|
|
|
return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
|
|
|
|
/*SetFlags=*/true, /*WantResult=*/false) != 0;
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
|
|
|
|
if (RetVT != MVT::f32 && RetVT != MVT::f64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check to see if the 2nd operand is a constant that we can encode directly
|
|
|
|
// in the compare.
|
|
|
|
bool UseImm = false;
|
|
|
|
if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
|
|
|
|
if (CFP->isZero() && !CFP->isNegative())
|
|
|
|
UseImm = true;
|
|
|
|
|
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
if (!LHSReg)
|
|
|
|
return false;
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
|
|
|
if (UseImm) {
|
|
|
|
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
|
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
|
|
|
if (!RHSReg)
|
|
|
|
return false;
|
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
|
|
|
|
|
|
|
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
|
|
|
|
.addReg(LHSReg, getKillRegState(LHSIsKill))
|
|
|
|
.addReg(RHSReg, getKillRegState(RHSIsKill));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
|
|
|
|
bool SetFlags, bool WantResult, bool IsZExt) {
|
|
|
|
return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
|
|
|
|
IsZExt);
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// This method is a wrapper to simplify add emission.
|
2014-10-16 02:58:02 +08:00
|
|
|
///
|
|
|
|
/// First try to emit an add with an immediate operand using emitAddSub_ri. If
|
|
|
|
/// that fails, then try to materialize the immediate into a register and use
|
|
|
|
/// emitAddSub_rr instead.
|
|
|
|
unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
|
|
|
|
int64_t Imm) {
|
|
|
|
unsigned ResultReg;
|
|
|
|
if (Imm < 0)
|
|
|
|
ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
|
|
|
|
else
|
|
|
|
ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
|
|
|
|
|
|
|
|
if (ResultReg)
|
|
|
|
return ResultReg;
|
|
|
|
|
|
|
|
unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
|
|
|
|
if (!CReg)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
|
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
|
|
|
|
bool SetFlags, bool WantResult, bool IsZExt) {
|
|
|
|
return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
|
|
|
|
IsZExt);
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg,
|
|
|
|
bool RHSIsKill, bool WantResult) {
|
2014-09-03 09:38:36 +08:00
|
|
|
return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
|
|
|
|
RHSIsKill, /*SetFlags=*/true, WantResult);
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
|
|
|
|
bool LHSIsKill, unsigned RHSReg,
|
|
|
|
bool RHSIsKill,
|
|
|
|
AArch64_AM::ShiftExtendType ShiftType,
|
|
|
|
uint64_t ShiftImm, bool WantResult) {
|
2014-09-03 09:38:36 +08:00
|
|
|
return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
|
|
|
|
RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
|
|
|
|
WantResult);
|
2014-08-20 06:29:55 +08:00
|
|
|
}
|
|
|
|
|
2014-09-04 09:29:18 +08:00
|
|
|
unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
|
|
|
|
const Value *LHS, const Value *RHS) {
|
|
|
|
// Canonicalize immediates to the RHS first.
|
|
|
|
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
2014-09-18 03:51:38 +08:00
|
|
|
// Canonicalize mul by power-of-2 to the RHS.
|
|
|
|
if (LHS->hasOneUse() && isValueAvailable(LHS))
|
|
|
|
if (isMulPowOf2(LHS))
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
2014-09-04 09:29:18 +08:00
|
|
|
// Canonicalize shift immediate to the RHS.
|
2014-09-18 03:51:38 +08:00
|
|
|
if (LHS->hasOneUse() && isValueAvailable(LHS))
|
|
|
|
if (const auto *SI = dyn_cast<ShlOperator>(LHS))
|
2014-09-04 09:29:18 +08:00
|
|
|
if (isa<ConstantInt>(SI->getOperand(1)))
|
2014-09-18 03:51:38 +08:00
|
|
|
std::swap(LHS, RHS);
|
2014-09-04 09:29:18 +08:00
|
|
|
|
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
if (!LHSReg)
|
|
|
|
return 0;
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
|
|
|
|
uint64_t Imm = C->getZExtValue();
|
|
|
|
ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
|
|
|
|
}
|
|
|
|
if (ResultReg)
|
|
|
|
return ResultReg;
|
|
|
|
|
2014-09-18 03:51:38 +08:00
|
|
|
// Check if the mul can be folded into the instruction.
|
2015-08-20 04:52:55 +08:00
|
|
|
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
|
2014-09-18 03:51:38 +08:00
|
|
|
if (isMulPowOf2(RHS)) {
|
|
|
|
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
|
|
|
|
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
|
|
|
|
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
|
|
|
|
if (C->getValue().isPowerOf2())
|
|
|
|
std::swap(MulLHS, MulRHS);
|
|
|
|
|
|
|
|
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
|
|
|
|
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
|
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(MulLHS);
|
|
|
|
if (!RHSReg)
|
|
|
|
return 0;
|
|
|
|
bool RHSIsKill = hasTrivialKill(MulLHS);
|
2015-08-20 04:52:55 +08:00
|
|
|
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
|
|
|
|
RHSIsKill, ShiftVal);
|
|
|
|
if (ResultReg)
|
|
|
|
return ResultReg;
|
2014-09-18 03:51:38 +08:00
|
|
|
}
|
2015-08-20 04:52:55 +08:00
|
|
|
}
|
2014-09-18 03:51:38 +08:00
|
|
|
|
2014-09-04 09:29:18 +08:00
|
|
|
// Check if the shift can be folded into the instruction.
|
2015-08-20 04:52:55 +08:00
|
|
|
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
|
2014-09-18 03:51:38 +08:00
|
|
|
if (const auto *SI = dyn_cast<ShlOperator>(RHS))
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
|
|
|
|
uint64_t ShiftVal = C->getZExtValue();
|
|
|
|
unsigned RHSReg = getRegForValue(SI->getOperand(0));
|
|
|
|
if (!RHSReg)
|
|
|
|
return 0;
|
|
|
|
bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
|
2015-08-20 04:52:55 +08:00
|
|
|
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
|
|
|
|
RHSIsKill, ShiftVal);
|
|
|
|
if (ResultReg)
|
|
|
|
return ResultReg;
|
2014-09-18 03:51:38 +08:00
|
|
|
}
|
2015-08-20 04:52:55 +08:00
|
|
|
}
|
2014-09-04 09:29:18 +08:00
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
|
|
|
if (!RHSReg)
|
|
|
|
return 0;
|
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
|
|
|
|
2014-09-14 07:46:28 +08:00
|
|
|
MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
|
|
|
|
ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
|
|
|
|
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
|
|
|
|
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
|
|
|
|
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
|
|
|
|
}
|
|
|
|
return ResultReg;
|
2014-09-04 09:29:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
|
|
|
|
unsigned LHSReg, bool LHSIsKill,
|
|
|
|
uint64_t Imm) {
|
2016-05-27 19:36:04 +08:00
|
|
|
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
|
|
|
|
"ISD nodes are not consecutive!");
|
2014-09-04 09:29:18 +08:00
|
|
|
static const unsigned OpcTable[3][2] = {
|
|
|
|
{ AArch64::ANDWri, AArch64::ANDXri },
|
|
|
|
{ AArch64::ORRWri, AArch64::ORRXri },
|
|
|
|
{ AArch64::EORWri, AArch64::EORXri }
|
|
|
|
};
|
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
unsigned Opc;
|
|
|
|
unsigned RegSize;
|
2014-08-22 02:02:25 +08:00
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default:
|
|
|
|
return 0;
|
2014-09-14 07:46:28 +08:00
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
2014-09-04 09:29:18 +08:00
|
|
|
case MVT::i32: {
|
|
|
|
unsigned Idx = ISDOpc - ISD::AND;
|
|
|
|
Opc = OpcTable[Idx][0];
|
2014-08-22 02:02:25 +08:00
|
|
|
RC = &AArch64::GPR32spRegClass;
|
|
|
|
RegSize = 32;
|
|
|
|
break;
|
2014-09-04 09:29:18 +08:00
|
|
|
}
|
2014-08-22 02:02:25 +08:00
|
|
|
case MVT::i64:
|
2014-09-04 09:29:18 +08:00
|
|
|
Opc = OpcTable[ISDOpc - ISD::AND][1];
|
2014-08-22 02:02:25 +08:00
|
|
|
RC = &AArch64::GPR64spRegClass;
|
|
|
|
RegSize = 64;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
|
|
|
|
return 0;
|
|
|
|
|
2014-09-14 07:46:28 +08:00
|
|
|
unsigned ResultReg =
|
|
|
|
fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
|
|
|
|
AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
|
|
|
|
if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
|
|
|
|
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
|
|
|
|
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
|
|
|
|
}
|
|
|
|
return ResultReg;
|
2014-08-22 02:02:25 +08:00
|
|
|
}
|
|
|
|
|
2014-09-04 09:29:18 +08:00
|
|
|
unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
|
|
|
|
unsigned LHSReg, bool LHSIsKill,
|
|
|
|
unsigned RHSReg, bool RHSIsKill,
|
|
|
|
uint64_t ShiftImm) {
|
2016-05-27 19:36:04 +08:00
|
|
|
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
|
|
|
|
"ISD nodes are not consecutive!");
|
2014-09-04 09:29:18 +08:00
|
|
|
static const unsigned OpcTable[3][2] = {
|
|
|
|
{ AArch64::ANDWrs, AArch64::ANDXrs },
|
|
|
|
{ AArch64::ORRWrs, AArch64::ORRXrs },
|
|
|
|
{ AArch64::EORWrs, AArch64::EORXrs }
|
|
|
|
};
|
2015-08-20 04:52:55 +08:00
|
|
|
|
|
|
|
// Don't deal with undefined shifts.
|
|
|
|
if (ShiftImm >= RetVT.getSizeInBits())
|
|
|
|
return 0;
|
|
|
|
|
2014-09-04 09:29:18 +08:00
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
unsigned Opc;
|
|
|
|
switch (RetVT.SimpleTy) {
|
2014-09-14 07:46:28 +08:00
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
Opc = OpcTable[ISDOpc - ISD::AND][0];
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::i64:
|
|
|
|
Opc = OpcTable[ISDOpc - ISD::AND][1];
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
break;
|
2014-09-04 09:29:18 +08:00
|
|
|
}
|
2014-09-14 07:46:28 +08:00
|
|
|
unsigned ResultReg =
|
|
|
|
fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
|
|
|
|
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
|
|
|
|
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
|
|
|
|
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
|
|
|
|
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
|
|
|
|
}
|
|
|
|
return ResultReg;
|
2014-09-04 09:29:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
|
|
|
|
uint64_t Imm) {
|
|
|
|
return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
|
|
|
|
}
|
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
|
|
|
|
bool WantZExt, MachineMemOperand *MMO) {
|
2015-07-02 01:58:53 +08:00
|
|
|
if (!TLI.allowsMisalignedMemoryAccesses(VT))
|
2015-06-15 23:48:44 +08:00
|
|
|
return 0;
|
|
|
|
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
// Simplify this down to something we can handle.
|
2014-09-16 07:20:17 +08:00
|
|
|
if (!simplifyAddress(Addr, VT))
|
2014-10-15 04:36:02 +08:00
|
|
|
return 0;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-09-30 08:49:54 +08:00
|
|
|
unsigned ScaleFactor = getImplicitScaleFactor(VT);
|
|
|
|
if (!ScaleFactor)
|
|
|
|
llvm_unreachable("Unexpected value type.");
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
|
|
|
|
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
bool UseScaled = true;
|
|
|
|
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
|
|
|
|
UseScaled = false;
|
|
|
|
ScaleFactor = 1;
|
|
|
|
}
|
|
|
|
|
2014-10-07 11:39:59 +08:00
|
|
|
static const unsigned GPOpcTable[2][8][4] = {
|
2014-09-30 08:49:58 +08:00
|
|
|
// Sign-extend.
|
2014-10-07 11:39:59 +08:00
|
|
|
{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
|
|
|
|
AArch64::LDURXi },
|
|
|
|
{ AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
|
2014-09-30 08:49:58 +08:00
|
|
|
AArch64::LDURXi },
|
2014-10-07 11:39:59 +08:00
|
|
|
{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
|
2014-09-30 08:49:58 +08:00
|
|
|
AArch64::LDRXui },
|
2014-10-07 11:39:59 +08:00
|
|
|
{ AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
|
|
|
|
AArch64::LDRXui },
|
|
|
|
{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
|
|
|
|
AArch64::LDRXroX },
|
|
|
|
{ AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
|
2014-09-30 08:49:58 +08:00
|
|
|
AArch64::LDRXroX },
|
2014-10-07 11:39:59 +08:00
|
|
|
{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
|
2014-09-30 08:49:58 +08:00
|
|
|
AArch64::LDRXroW },
|
2014-10-07 11:39:59 +08:00
|
|
|
{ AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
|
|
|
|
AArch64::LDRXroW }
|
2014-09-30 08:49:58 +08:00
|
|
|
},
|
|
|
|
// Zero-extend.
|
|
|
|
{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
|
|
|
|
AArch64::LDURXi },
|
2014-10-07 11:39:59 +08:00
|
|
|
{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
|
|
|
|
AArch64::LDURXi },
|
2014-09-30 08:49:58 +08:00
|
|
|
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
|
|
|
|
AArch64::LDRXui },
|
2014-10-07 11:39:59 +08:00
|
|
|
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
|
|
|
|
AArch64::LDRXui },
|
|
|
|
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
|
|
|
|
AArch64::LDRXroX },
|
2014-09-30 08:49:58 +08:00
|
|
|
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
|
|
|
|
AArch64::LDRXroX },
|
2014-10-07 11:39:59 +08:00
|
|
|
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
|
|
|
|
AArch64::LDRXroW },
|
2014-09-30 08:49:58 +08:00
|
|
|
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
|
|
|
|
AArch64::LDRXroW }
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static const unsigned FPOpcTable[4][2] = {
|
|
|
|
{ AArch64::LDURSi, AArch64::LDURDi },
|
|
|
|
{ AArch64::LDRSui, AArch64::LDRDui },
|
|
|
|
{ AArch64::LDRSroX, AArch64::LDRDroX },
|
|
|
|
{ AArch64::LDRSroW, AArch64::LDRDroW }
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
};
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
unsigned Opc;
|
|
|
|
const TargetRegisterClass *RC;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
|
|
|
|
Addr.getOffsetReg();
|
|
|
|
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
|
|
|
|
if (Addr.getExtendType() == AArch64_AM::UXTW ||
|
|
|
|
Addr.getExtendType() == AArch64_AM::SXTW)
|
|
|
|
Idx++;
|
|
|
|
|
2014-10-07 11:39:59 +08:00
|
|
|
bool IsRet64Bit = RetVT == MVT::i64;
|
2014-03-29 18:18:08 +08:00
|
|
|
switch (VT.SimpleTy) {
|
2014-09-30 08:49:58 +08:00
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected value type.");
|
|
|
|
case MVT::i1: // Intentional fall-through.
|
|
|
|
case MVT::i8:
|
2014-10-07 11:39:59 +08:00
|
|
|
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
|
|
|
|
RC = (IsRet64Bit && !WantZExt) ?
|
|
|
|
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
|
2014-09-30 08:49:58 +08:00
|
|
|
break;
|
|
|
|
case MVT::i16:
|
2014-10-07 11:39:59 +08:00
|
|
|
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
|
|
|
|
RC = (IsRet64Bit && !WantZExt) ?
|
|
|
|
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
|
2014-09-30 08:49:58 +08:00
|
|
|
break;
|
|
|
|
case MVT::i32:
|
2014-10-07 11:39:59 +08:00
|
|
|
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
|
|
|
|
RC = (IsRet64Bit && !WantZExt) ?
|
|
|
|
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
|
2014-09-30 08:49:58 +08:00
|
|
|
break;
|
|
|
|
case MVT::i64:
|
2014-10-07 11:39:59 +08:00
|
|
|
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
|
2014-09-30 08:49:58 +08:00
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::f32:
|
|
|
|
Opc = FPOpcTable[Idx][0];
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
break;
|
|
|
|
case MVT::f64:
|
|
|
|
Opc = FPOpcTable[Idx][1];
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
break;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create the base instruction, then add the operands.
|
2014-10-15 04:36:02 +08:00
|
|
|
unsigned ResultReg = createResultReg(RC);
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(Opc), ResultReg);
|
2014-09-16 07:20:17 +08:00
|
|
|
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
// Loading an i1 requires special handling.
|
|
|
|
if (VT == MVT::i1) {
|
|
|
|
unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
|
|
|
|
assert(ANDReg && "Unexpected AND instruction emission failure.");
|
|
|
|
ResultReg = ANDReg;
|
|
|
|
}
|
|
|
|
|
2014-10-07 11:39:59 +08:00
|
|
|
// For zero-extending loads to 64bit we emit a 32bit load and then convert
|
2014-10-15 04:36:02 +08:00
|
|
|
// the 32bit reg to a 64bit reg.
|
2014-10-07 11:39:59 +08:00
|
|
|
if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
|
|
|
|
unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG), Reg64)
|
|
|
|
.addImm(0)
|
|
|
|
.addReg(ResultReg, getKillRegState(true))
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
ResultReg = Reg64;
|
|
|
|
}
|
2014-10-15 04:36:02 +08:00
|
|
|
return ResultReg;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
bool AArch64FastISel::selectAddSub(const Instruction *I) {
|
|
|
|
MVT VT;
|
2014-09-16 05:27:56 +08:00
|
|
|
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
|
2014-09-03 09:38:36 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-16 05:27:56 +08:00
|
|
|
if (VT.isVector())
|
|
|
|
return selectOperator(I, I->getOpcode());
|
|
|
|
|
2014-09-03 09:38:36 +08:00
|
|
|
unsigned ResultReg;
|
2014-09-16 05:27:56 +08:00
|
|
|
switch (I->getOpcode()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected instruction.");
|
|
|
|
case Instruction::Add:
|
2014-09-03 09:38:36 +08:00
|
|
|
ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
|
2014-09-16 05:27:56 +08:00
|
|
|
break;
|
|
|
|
case Instruction::Sub:
|
2014-09-03 09:38:36 +08:00
|
|
|
ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
|
2014-09-16 05:27:56 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
2014-09-03 09:38:36 +08:00
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-09-03 09:38:36 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 05:27:56 +08:00
|
|
|
bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
|
2014-09-04 09:29:18 +08:00
|
|
|
MVT VT;
|
2014-09-16 05:27:56 +08:00
|
|
|
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
|
2014-09-04 09:29:18 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-16 05:27:56 +08:00
|
|
|
if (VT.isVector())
|
|
|
|
return selectOperator(I, I->getOpcode());
|
|
|
|
|
|
|
|
unsigned ResultReg;
|
|
|
|
switch (I->getOpcode()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected instruction.");
|
|
|
|
case Instruction::And:
|
|
|
|
ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
|
|
|
|
break;
|
|
|
|
case Instruction::Or:
|
|
|
|
ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
|
|
|
|
break;
|
|
|
|
case Instruction::Xor:
|
|
|
|
ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
|
|
|
|
break;
|
|
|
|
}
|
2014-09-04 09:29:18 +08:00
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectLoad(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
MVT VT;
|
|
|
|
// Verify we have a legal type before going any further. Currently, we handle
|
|
|
|
// simple types that will directly fit in a register (i32/f32/i64/f64) or
|
|
|
|
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
|
2014-09-16 05:27:54 +08:00
|
|
|
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
|
|
|
|
cast<LoadInst>(I)->isAtomic())
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
const Value *SV = I->getOperand(0);
|
|
|
|
if (TLI.supportSwiftError()) {
|
|
|
|
// Swifterror values can come from either a function parameter with
|
|
|
|
// swifterror attribute or an alloca with swifterror attribute.
|
|
|
|
if (const Argument *Arg = dyn_cast<Argument>(SV)) {
|
|
|
|
if (Arg->hasSwiftErrorAttr())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
|
|
|
|
if (Alloca->isSwiftError())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// See if we can handle this address.
|
|
|
|
Address Addr;
|
2014-09-16 07:20:17 +08:00
|
|
|
if (!computeAddress(I->getOperand(0), Addr, I->getType()))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
// Fold the following sign-/zero-extend into the load instruction.
|
2014-09-30 08:49:58 +08:00
|
|
|
bool WantZExt = true;
|
2014-10-07 11:39:59 +08:00
|
|
|
MVT RetVT = VT;
|
2014-10-15 04:36:02 +08:00
|
|
|
const Value *IntExtVal = nullptr;
|
2014-10-07 11:39:59 +08:00
|
|
|
if (I->hasOneUse()) {
|
|
|
|
if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
|
2014-10-15 04:36:02 +08:00
|
|
|
if (isTypeSupported(ZE->getType(), RetVT))
|
|
|
|
IntExtVal = ZE;
|
|
|
|
else
|
2014-10-07 11:39:59 +08:00
|
|
|
RetVT = VT;
|
|
|
|
} else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
|
2014-10-15 04:36:02 +08:00
|
|
|
if (isTypeSupported(SE->getType(), RetVT))
|
|
|
|
IntExtVal = SE;
|
|
|
|
else
|
2014-10-07 11:39:59 +08:00
|
|
|
RetVT = VT;
|
|
|
|
WantZExt = false;
|
|
|
|
}
|
|
|
|
}
|
2014-09-30 08:49:58 +08:00
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
unsigned ResultReg =
|
|
|
|
emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
|
|
|
|
if (!ResultReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
// There are a few different cases we have to handle, because the load or the
|
|
|
|
// sign-/zero-extend might not be selected by FastISel if we fall-back to
|
|
|
|
// SelectionDAG. There is also an ordering issue when both instructions are in
|
|
|
|
// different basic blocks.
|
|
|
|
// 1.) The load instruction is selected by FastISel, but the integer extend
|
|
|
|
// not. This usually happens when the integer extend is in a different
|
|
|
|
// basic block and SelectionDAG took over for that basic block.
|
|
|
|
// 2.) The load instruction is selected before the integer extend. This only
|
|
|
|
// happens when the integer extend is in a different basic block.
|
|
|
|
// 3.) The load instruction is selected by SelectionDAG and the integer extend
|
|
|
|
// by FastISel. This happens if there are instructions between the load
|
|
|
|
// and the integer extend that couldn't be selected by FastISel.
|
|
|
|
if (IntExtVal) {
|
|
|
|
// The integer extend hasn't been emitted yet. FastISel or SelectionDAG
|
|
|
|
// could select it. Emit a copy to subreg if necessary. FastISel will remove
|
|
|
|
// it when it selects the integer extend.
|
|
|
|
unsigned Reg = lookUpRegForValue(IntExtVal);
|
2015-04-10 04:00:46 +08:00
|
|
|
auto *MI = MRI.getUniqueVRegDef(Reg);
|
|
|
|
if (!MI) {
|
2014-10-15 04:36:02 +08:00
|
|
|
if (RetVT == MVT::i64 && VT <= MVT::i32) {
|
|
|
|
if (WantZExt) {
|
|
|
|
// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
|
2018-12-18 01:25:53 +08:00
|
|
|
MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
|
|
|
|
ResultReg = std::prev(I)->getOperand(0).getReg();
|
|
|
|
removeDeadCode(I, std::next(I));
|
2014-10-15 04:36:02 +08:00
|
|
|
} else
|
|
|
|
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
|
|
|
|
/*IsKill=*/true,
|
|
|
|
AArch64::sub_32);
|
|
|
|
}
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The integer extend has already been emitted - delete all the instructions
|
|
|
|
// that have been emitted by the integer extend lowering code and use the
|
|
|
|
// result from the load instruction directly.
|
2015-04-10 04:00:46 +08:00
|
|
|
while (MI) {
|
2014-10-15 04:36:02 +08:00
|
|
|
Reg = 0;
|
|
|
|
for (auto &Opnd : MI->uses()) {
|
|
|
|
if (Opnd.isReg()) {
|
|
|
|
Reg = Opnd.getReg();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-12-18 01:25:53 +08:00
|
|
|
MachineBasicBlock::iterator I(MI);
|
|
|
|
removeDeadCode(I, std::next(I));
|
2015-04-10 04:00:46 +08:00
|
|
|
MI = nullptr;
|
|
|
|
if (Reg)
|
|
|
|
MI = MRI.getUniqueVRegDef(Reg);
|
2014-10-15 04:36:02 +08:00
|
|
|
}
|
|
|
|
updateValueMap(IntExtVal, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-07-21 05:12:27 +08:00
|
|
|
bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
|
|
|
|
unsigned AddrReg,
|
|
|
|
MachineMemOperand *MMO) {
|
|
|
|
unsigned Opc;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default: return false;
|
|
|
|
case MVT::i8: Opc = AArch64::STLRB; break;
|
|
|
|
case MVT::i16: Opc = AArch64::STLRH; break;
|
|
|
|
case MVT::i32: Opc = AArch64::STLRW; break;
|
|
|
|
case MVT::i64: Opc = AArch64::STLRX; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const MCInstrDesc &II = TII.get(Opc);
|
|
|
|
SrcReg = constrainOperandRegClass(II, SrcReg, 0);
|
|
|
|
AddrReg = constrainOperandRegClass(II, AddrReg, 1);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
|
|
|
|
.addReg(SrcReg)
|
|
|
|
.addReg(AddrReg)
|
|
|
|
.addMemOperand(MMO);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
MachineMemOperand *MMO) {
|
2015-07-02 01:58:53 +08:00
|
|
|
if (!TLI.allowsMisalignedMemoryAccesses(VT))
|
2015-06-15 23:48:44 +08:00
|
|
|
return false;
|
|
|
|
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
// Simplify this down to something we can handle.
|
2014-09-16 07:20:17 +08:00
|
|
|
if (!simplifyAddress(Addr, VT))
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-30 08:49:54 +08:00
|
|
|
unsigned ScaleFactor = getImplicitScaleFactor(VT);
|
|
|
|
if (!ScaleFactor)
|
|
|
|
llvm_unreachable("Unexpected value type.");
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
|
|
|
|
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
bool UseScaled = true;
|
|
|
|
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
|
|
|
|
UseScaled = false;
|
|
|
|
ScaleFactor = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const unsigned OpcTable[4][6] = {
|
|
|
|
{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
|
|
|
|
AArch64::STURSi, AArch64::STURDi },
|
|
|
|
{ AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
|
|
|
|
AArch64::STRSui, AArch64::STRDui },
|
|
|
|
{ AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
|
|
|
|
AArch64::STRSroX, AArch64::STRDroX },
|
|
|
|
{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
|
|
|
|
AArch64::STRSroW, AArch64::STRDroW }
|
|
|
|
};
|
|
|
|
|
|
|
|
unsigned Opc;
|
2014-03-29 18:18:08 +08:00
|
|
|
bool VTIsi1 = false;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
|
|
|
|
Addr.getOffsetReg();
|
|
|
|
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
|
|
|
|
if (Addr.getExtendType() == AArch64_AM::UXTW ||
|
|
|
|
Addr.getExtendType() == AArch64_AM::SXTW)
|
|
|
|
Idx++;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
switch (VT.SimpleTy) {
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
default: llvm_unreachable("Unexpected value type.");
|
2017-07-07 21:03:28 +08:00
|
|
|
case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
|
Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).
Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.
Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.
For Example:
lsl x1, x1, #3 --> ldr x0, [x0, x1, lsl #3]
ldr x0, [x0, x1]
sxtw x1, w1
lsl x1, x1, #3 --> ldr x0, [x0, x1, sxtw #3]
ldr x0, [x0, x1]
llvm-svn: 216013
2014-08-20 03:44:17 +08:00
|
|
|
case MVT::i8: Opc = OpcTable[Idx][0]; break;
|
|
|
|
case MVT::i16: Opc = OpcTable[Idx][1]; break;
|
|
|
|
case MVT::i32: Opc = OpcTable[Idx][2]; break;
|
|
|
|
case MVT::i64: Opc = OpcTable[Idx][3]; break;
|
|
|
|
case MVT::f32: Opc = OpcTable[Idx][4]; break;
|
|
|
|
case MVT::f64: Opc = OpcTable[Idx][5]; break;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Storing an i1 requires special handling.
|
2014-08-28 05:04:52 +08:00
|
|
|
if (VTIsi1 && SrcReg != AArch64::WZR) {
|
2014-09-04 09:29:18 +08:00
|
|
|
unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
|
2014-08-22 02:02:25 +08:00
|
|
|
assert(ANDReg && "Unexpected AND instruction emission failure.");
|
2014-03-29 18:18:08 +08:00
|
|
|
SrcReg = ANDReg;
|
|
|
|
}
|
|
|
|
// Create the base instruction, then add the operands.
|
2014-08-22 04:57:57 +08:00
|
|
|
const MCInstrDesc &II = TII.get(Opc);
|
|
|
|
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
|
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
|
2014-09-16 07:20:17 +08:00
|
|
|
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
|
2014-08-09 01:24:10 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectStore(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
MVT VT;
|
2014-08-28 05:04:52 +08:00
|
|
|
const Value *Op0 = I->getOperand(0);
|
2014-03-29 18:18:08 +08:00
|
|
|
// Verify we have a legal type before going any further. Currently, we handle
|
|
|
|
// simple types that will directly fit in a register (i32/f32/i64/f64) or
|
|
|
|
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
|
2016-07-21 05:12:27 +08:00
|
|
|
if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
const Value *PtrV = I->getOperand(1);
|
|
|
|
if (TLI.supportSwiftError()) {
|
|
|
|
// Swifterror values can come from either a function parameter with
|
|
|
|
// swifterror attribute or an alloca with swifterror attribute.
|
|
|
|
if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
|
|
|
|
if (Arg->hasSwiftErrorAttr())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
|
|
|
|
if (Alloca->isSwiftError())
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-28 05:04:52 +08:00
|
|
|
// Get the value to be stored into a register. Use the zero register directly
|
2014-08-28 05:40:50 +08:00
|
|
|
// when possible to avoid an unnecessary copy and a wasted register.
|
2014-08-28 05:04:52 +08:00
|
|
|
unsigned SrcReg = 0;
|
|
|
|
if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
|
|
|
|
if (CI->isZero())
|
|
|
|
SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
|
|
|
|
} else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
|
|
|
|
if (CF->isZero() && !CF->isNegative()) {
|
|
|
|
VT = MVT::getIntegerVT(VT.getSizeInBits());
|
|
|
|
SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!SrcReg)
|
|
|
|
SrcReg = getRegForValue(Op0);
|
|
|
|
|
|
|
|
if (!SrcReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2016-07-21 05:12:27 +08:00
|
|
|
auto *SI = cast<StoreInst>(I);
|
|
|
|
|
|
|
|
// Try to emit a STLR for seq_cst/release.
|
|
|
|
if (SI->isAtomic()) {
|
|
|
|
AtomicOrdering Ord = SI->getOrdering();
|
|
|
|
// The non-atomic instructions are sufficient for relaxed stores.
|
|
|
|
if (isReleaseOrStronger(Ord)) {
|
|
|
|
// The STLR addressing mode only supports a base reg; pass that directly.
|
|
|
|
unsigned AddrReg = getRegForValue(PtrV);
|
|
|
|
return emitStoreRelease(VT, SrcReg, AddrReg,
|
|
|
|
createMachineMemOperandFor(I));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// See if we can handle this address.
|
|
|
|
Address Addr;
|
2016-07-21 05:12:27 +08:00
|
|
|
if (!computeAddress(PtrV, Addr, Op0->getType()))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
|
2014-03-29 18:18:08 +08:00
|
|
|
switch (Pred) {
|
|
|
|
case CmpInst::FCMP_ONE:
|
|
|
|
case CmpInst::FCMP_UEQ:
|
|
|
|
default:
|
|
|
|
// AL is our "false" for now. The other two need more compares.
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::AL;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_EQ:
|
|
|
|
case CmpInst::FCMP_OEQ:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::EQ;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_SGT:
|
|
|
|
case CmpInst::FCMP_OGT:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::GT;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_SGE:
|
|
|
|
case CmpInst::FCMP_OGE:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::GE;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_UGT:
|
|
|
|
case CmpInst::FCMP_UGT:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::HI;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::FCMP_OLT:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::MI;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_ULE:
|
|
|
|
case CmpInst::FCMP_OLE:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::LS;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::FCMP_ORD:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::VC;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::FCMP_UNO:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::VS;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::FCMP_UGE:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::PL;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_SLT:
|
|
|
|
case CmpInst::FCMP_ULT:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::LT;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_SLE:
|
|
|
|
case CmpInst::FCMP_ULE:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::LE;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::FCMP_UNE:
|
|
|
|
case CmpInst::ICMP_NE:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::NE;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_UGE:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::HS;
|
2014-03-29 18:18:08 +08:00
|
|
|
case CmpInst::ICMP_ULT:
|
2014-05-24 20:50:23 +08:00
|
|
|
return AArch64CC::LO;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Try to emit a combined compare-and-branch instruction.
|
2014-10-01 03:59:35 +08:00
|
|
|
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
|
Introduce control flow speculation tracking pass for AArch64
The pass implements tracking of control flow miss-speculation into a "taint"
register. That taint register can then be used to mask off registers with
sensitive data when executing under miss-speculation, a.k.a. "transient
execution".
This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
At the moment, it implements the tracking of miss-speculation of control
flow into a taint register, but doesn't implement a mechanism yet to then
use that taint register to mask off vulnerable data in registers (something
for a follow-on improvement). Possible strategies to mask out vulnerable
data that can be implemented on top of this are:
- speculative load hardening to automatically mask of data loaded
in registers.
- using intrinsics to mask of data in registers as indicated by the
programmer (see https://lwn.net/Articles/759423/).
For AArch64, the following implementation choices are made.
Some of these are different than the implementation choices made in
the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
the instruction set characteristics result in different trade-offs.
- The speculation hardening is done after register allocation. With a
relative abundance of registers, one register is reserved (X16) to be
the taint register. X16 is expected to not clash with other register
reservation mechanisms with very high probability because:
. The AArch64 ABI doesn't guarantee X16 to be retained across any call.
. The only way to request X16 to be used as a programmer is through
inline assembly. In the rare case a function explicitly demands to
use X16/W16, this pass falls back to hardening against speculation
by inserting a DSB SYS/ISB barrier pair which will prevent control
flow speculation.
- It is easy to insert mask operations at this late stage as we have
mask operations available that don't set flags.
- The taint variable contains all-ones when no miss-speculation is detected,
and contains all-zeros when miss-speculation is detected. Therefore, when
masking, an AND instruction (which only changes the register to be masked,
no other side effects) can easily be inserted anywhere that's needed.
- The tracking of miss-speculation is done by using a data-flow conditional
select instruction (CSEL) to evaluate the flags that were also used to
make conditional branch direction decisions. Speculation of the CSEL
instruction can be limited with a CSDB instruction - so the combination of
CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
aren't speculated. When conditional branch direction gets miss-speculated,
the semantics of the inserted CSEL instruction is such that the taint
register will contain all zero bits.
One key requirement for this to work is that the conditional branch is
followed by an execution of the CSEL instruction, where the CSEL
instruction needs to use the same flags status as the conditional branch.
This means that the conditional branches must not be implemented as one
of the AArch64 conditional branches that do not use the flags as input
(CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
selectors to not produce these instructions when speculation hardening
is enabled. This pass will assert if it does encounter such an instruction.
- On function call boundaries, the miss-speculation state is transferred from
the taint register X16 to be encoded in the SP register as value 0.
Future extensions/improvements could be:
- Implement this functionality using full speculation barriers, akin to the
x86-slh-lfence option. This may be more useful for the intrinsics-based
approach than for the SLH approach to masking.
Note that this pass already inserts the full speculation barriers if the
function for some niche reason makes use of X16/W16.
- no indirect branch misprediction gets protected/instrumented; but this
could be done for some indirect branches, such as switch jump tables.
Differential Revision: https://reviews.llvm.org/D54896
llvm-svn: 349456
2018-12-18 16:50:02 +08:00
|
|
|
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
|
|
|
|
// will not be produced, as they are conditional branch instructions that do
|
|
|
|
// not set flags.
|
|
|
|
if (FuncInfo.MF->getFunction().hasFnAttribute(
|
|
|
|
Attribute::SpeculativeLoadHardening))
|
|
|
|
return false;
|
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
|
|
|
|
const CmpInst *CI = cast<CmpInst>(BI->getCondition());
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
2014-09-18 02:05:34 +08:00
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
const Value *LHS = CI->getOperand(0);
|
|
|
|
const Value *RHS = CI->getOperand(1);
|
|
|
|
|
2014-10-28 03:38:05 +08:00
|
|
|
MVT VT;
|
|
|
|
if (!isTypeSupported(LHS->getType(), VT))
|
|
|
|
return false;
|
2014-09-18 02:05:34 +08:00
|
|
|
|
2014-10-28 03:38:05 +08:00
|
|
|
unsigned BW = VT.getSizeInBits();
|
|
|
|
if (BW > 64)
|
2014-09-18 02:05:34 +08:00
|
|
|
return false;
|
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
|
|
|
|
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
|
2014-09-18 02:05:34 +08:00
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
// Try to take advantage of fallthrough opportunities.
|
|
|
|
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
|
|
|
|
std::swap(TBB, FBB);
|
|
|
|
Predicate = CmpInst::getInversePredicate(Predicate);
|
|
|
|
}
|
2014-09-18 02:05:34 +08:00
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
int TestBit = -1;
|
|
|
|
bool IsCmpNE;
|
2014-11-25 12:16:15 +08:00
|
|
|
switch (Predicate) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case CmpInst::ICMP_EQ:
|
|
|
|
case CmpInst::ICMP_NE:
|
|
|
|
if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
|
|
|
|
std::swap(LHS, RHS);
|
2014-10-01 03:59:35 +08:00
|
|
|
|
2014-11-25 12:16:15 +08:00
|
|
|
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
|
2014-10-01 03:59:35 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
|
2014-10-28 03:16:48 +08:00
|
|
|
if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
|
2014-10-01 03:59:35 +08:00
|
|
|
const Value *AndLHS = AI->getOperand(0);
|
|
|
|
const Value *AndRHS = AI->getOperand(1);
|
|
|
|
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
|
|
|
|
if (C->getValue().isPowerOf2())
|
|
|
|
std::swap(AndLHS, AndRHS);
|
|
|
|
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
|
|
|
|
if (C->getValue().isPowerOf2()) {
|
|
|
|
TestBit = C->getValue().logBase2();
|
|
|
|
LHS = AndLHS;
|
|
|
|
}
|
|
|
|
}
|
2014-10-28 03:46:23 +08:00
|
|
|
|
|
|
|
if (VT == MVT::i1)
|
|
|
|
TestBit = 0;
|
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
IsCmpNE = Predicate == CmpInst::ICMP_NE;
|
2014-11-25 12:16:15 +08:00
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_SLT:
|
|
|
|
case CmpInst::ICMP_SGE:
|
|
|
|
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
|
2014-10-01 03:59:35 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
TestBit = BW - 1;
|
2014-11-25 12:16:15 +08:00
|
|
|
IsCmpNE = Predicate == CmpInst::ICMP_SLT;
|
|
|
|
break;
|
|
|
|
case CmpInst::ICMP_SGT:
|
|
|
|
case CmpInst::ICMP_SLE:
|
2014-10-01 03:59:35 +08:00
|
|
|
if (!isa<ConstantInt>(RHS))
|
|
|
|
return false;
|
|
|
|
|
2014-11-25 12:16:15 +08:00
|
|
|
if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
|
2014-10-01 03:59:35 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
TestBit = BW - 1;
|
2014-11-25 12:16:15 +08:00
|
|
|
IsCmpNE = Predicate == CmpInst::ICMP_SLE;
|
|
|
|
break;
|
|
|
|
} // end switch
|
2014-10-01 03:59:35 +08:00
|
|
|
|
|
|
|
static const unsigned OpcTable[2][2][2] = {
|
|
|
|
{ {AArch64::CBZW, AArch64::CBZX },
|
|
|
|
{AArch64::CBNZW, AArch64::CBNZX} },
|
|
|
|
{ {AArch64::TBZW, AArch64::TBZX },
|
|
|
|
{AArch64::TBNZW, AArch64::TBNZX} }
|
|
|
|
};
|
|
|
|
|
|
|
|
bool IsBitTest = TestBit != -1;
|
|
|
|
bool Is64Bit = BW == 64;
|
|
|
|
if (TestBit < 32 && TestBit >= 0)
|
|
|
|
Is64Bit = false;
|
2014-10-28 03:16:48 +08:00
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
|
|
|
|
const MCInstrDesc &II = TII.get(Opc);
|
|
|
|
|
|
|
|
unsigned SrcReg = getRegForValue(LHS);
|
|
|
|
if (!SrcReg)
|
|
|
|
return false;
|
|
|
|
bool SrcIsKill = hasTrivialKill(LHS);
|
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
if (BW == 64 && !Is64Bit)
|
2014-10-01 03:59:35 +08:00
|
|
|
SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
|
|
|
|
AArch64::sub_32);
|
|
|
|
|
2014-10-28 03:38:05 +08:00
|
|
|
if ((BW < 32) && !IsBitTest)
|
|
|
|
SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
|
2014-10-24 17:54:41 +08:00
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
// Emit the combined compare and branch instruction.
|
2014-10-15 04:36:02 +08:00
|
|
|
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
|
2014-10-01 03:59:35 +08:00
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
|
|
|
|
.addReg(SrcReg, getKillRegState(SrcIsKill));
|
|
|
|
if (IsBitTest)
|
|
|
|
MIB.addImm(TestBit);
|
|
|
|
MIB.addMBB(TBB);
|
|
|
|
|
2015-08-26 09:38:00 +08:00
|
|
|
finishCondBranch(BI->getParent(), TBB, FBB);
|
2014-10-01 03:59:35 +08:00
|
|
|
return true;
|
2014-09-18 02:05:34 +08:00
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectBranch(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
const BranchInst *BI = cast<BranchInst>(I);
|
2014-09-04 01:58:10 +08:00
|
|
|
if (BI->isUnconditional()) {
|
|
|
|
MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
|
2014-09-04 04:56:52 +08:00
|
|
|
fastEmitBranch(MSucc, BI->getDebugLoc());
|
2014-09-04 01:58:10 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
|
|
|
|
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
|
|
|
|
|
|
|
|
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
|
2014-09-18 01:46:47 +08:00
|
|
|
if (CI->hasOneUse() && isValueAvailable(CI)) {
|
|
|
|
// Try to optimize or fold the cmp.
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
|
|
|
switch (Predicate) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_FALSE:
|
|
|
|
fastEmitBranch(FBB, DbgLoc);
|
|
|
|
return true;
|
|
|
|
case CmpInst::FCMP_TRUE:
|
|
|
|
fastEmitBranch(TBB, DbgLoc);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-10-01 03:59:35 +08:00
|
|
|
// Try to emit a combined compare-and-branch first.
|
|
|
|
if (emitCompareAndBranch(BI))
|
|
|
|
return true;
|
|
|
|
|
2014-09-18 01:46:47 +08:00
|
|
|
// Try to take advantage of fallthrough opportunities.
|
|
|
|
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
|
|
|
|
std::swap(TBB, FBB);
|
|
|
|
Predicate = CmpInst::getInversePredicate(Predicate);
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Emit the cmp.
|
2014-08-20 06:29:55 +08:00
|
|
|
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-18 01:46:47 +08:00
|
|
|
// FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
|
|
|
|
// instruction.
|
2015-12-04 01:19:58 +08:00
|
|
|
AArch64CC::CondCode CC = getCompareCC(Predicate);
|
2014-09-18 01:46:47 +08:00
|
|
|
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
|
|
|
|
switch (Predicate) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UEQ:
|
|
|
|
ExtraCC = AArch64CC::EQ;
|
|
|
|
CC = AArch64CC::VS;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ONE:
|
|
|
|
ExtraCC = AArch64CC::MI;
|
|
|
|
CC = AArch64CC::GT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
|
|
|
|
|
|
|
|
// Emit the extra branch for FCMP_UEQ and FCMP_ONE.
|
|
|
|
if (ExtraCC != AArch64CC::AL) {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
|
|
|
|
.addImm(ExtraCC)
|
|
|
|
.addMBB(TBB);
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Emit the branch.
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
|
2014-03-29 18:18:08 +08:00
|
|
|
.addImm(CC)
|
|
|
|
.addMBB(TBB);
|
2014-08-02 02:39:24 +08:00
|
|
|
|
2015-08-26 09:38:00 +08:00
|
|
|
finishCondBranch(BI->getParent(), TBB, FBB);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
2014-09-18 01:46:47 +08:00
|
|
|
} else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
|
2014-03-29 18:18:08 +08:00
|
|
|
uint64_t Imm = CI->getZExtValue();
|
|
|
|
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
|
2014-03-29 18:18:08 +08:00
|
|
|
.addMBB(Target);
|
2014-08-02 02:39:24 +08:00
|
|
|
|
2015-11-24 16:51:23 +08:00
|
|
|
// Obtain the branch probability and add the target to the successor list.
|
Create a new interface addSuccessorWithoutWeight(MBB*) in MBB to add successors when optimization is disabled.
When optimization is disabled, edge weights that are stored in MBB won't be used so that we don't have to store them. Currently, this is done by adding successors with default weight 0, and if all successors have default weights, the weight list will be empty. But that the weight list is empty doesn't mean disabled optimization (as is stated several times in MachineBasicBlock.cpp): it may also mean all successors just have default weights.
We should discourage using default weights when adding successors, because it is very easy for users to forget update the correct edge weights instead of using default ones (one exception is that the MBB only has one successor). In order to detect such usages, it is better to differentiate using default weights from the case when optimizations is disabled.
In this patch, a new interface addSuccessorWithoutWeight(MBB*) is created for when optimization is disabled. In this case, MBB will try to maintain an empty weight list, but it cannot guarantee this as for many uses of addSuccessor() whether optimization is disabled or not is not checked. But it can guarantee that if optimization is enabled, then the weight list always has the same size of the successor list.
Differential revision: http://reviews.llvm.org/D13963
llvm-svn: 251429
2015-10-28 01:59:36 +08:00
|
|
|
if (FuncInfo.BPI) {
|
2015-11-24 16:51:23 +08:00
|
|
|
auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
|
|
|
|
BI->getParent(), Target->getBasicBlock());
|
|
|
|
FuncInfo.MBB->addSuccessor(Target, BranchProbability);
|
Create a new interface addSuccessorWithoutWeight(MBB*) in MBB to add successors when optimization is disabled.
When optimization is disabled, edge weights that are stored in MBB won't be used so that we don't have to store them. Currently, this is done by adding successors with default weight 0, and if all successors have default weights, the weight list will be empty. But that the weight list is empty doesn't mean disabled optimization (as is stated several times in MachineBasicBlock.cpp): it may also mean all successors just have default weights.
We should discourage using default weights when adding successors, because it is very easy for users to forget update the correct edge weights instead of using default ones (one exception is that the MBB only has one successor). In order to detect such usages, it is better to differentiate using default weights from the case when optimizations is disabled.
In this patch, a new interface addSuccessorWithoutWeight(MBB*) is created for when optimization is disabled. In this case, MBB will try to maintain an empty weight list, but it cannot guarantee this as for many uses of addSuccessor() whether optimization is disabled or not is not checked. But it can guarantee that if optimization is enabled, then the weight list always has the same size of the successor list.
Differential revision: http://reviews.llvm.org/D13963
llvm-svn: 251429
2015-10-28 01:59:36 +08:00
|
|
|
} else
|
2015-11-24 16:51:23 +08:00
|
|
|
FuncInfo.MBB->addSuccessorWithoutProb(Target);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
2015-12-04 01:19:58 +08:00
|
|
|
} else {
|
|
|
|
AArch64CC::CondCode CC = AArch64CC::NE;
|
|
|
|
if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
|
|
|
|
// Fake request the condition, otherwise the intrinsic might be completely
|
|
|
|
// optimized away.
|
|
|
|
unsigned CondReg = getRegForValue(BI->getCondition());
|
|
|
|
if (!CondReg)
|
|
|
|
return false;
|
2014-07-31 06:04:34 +08:00
|
|
|
|
2015-12-04 01:19:58 +08:00
|
|
|
// Emit the branch.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
|
|
|
|
.addImm(CC)
|
|
|
|
.addMBB(TBB);
|
2014-08-02 02:39:24 +08:00
|
|
|
|
2015-12-04 01:19:58 +08:00
|
|
|
finishCondBranch(BI->getParent(), TBB, FBB);
|
|
|
|
return true;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned CondReg = getRegForValue(BI->getCondition());
|
|
|
|
if (CondReg == 0)
|
|
|
|
return false;
|
2014-08-20 06:29:55 +08:00
|
|
|
bool CondRegIsKill = hasTrivialKill(BI->getCondition());
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-12-04 01:19:58 +08:00
|
|
|
// i1 conditions come as i32 values, test the lowest bit with tb(n)z.
|
|
|
|
unsigned Opcode = AArch64::TBNZW;
|
2014-03-29 18:18:08 +08:00
|
|
|
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
|
|
|
|
std::swap(TBB, FBB);
|
2015-12-04 01:19:58 +08:00
|
|
|
Opcode = AArch64::TBZW;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2015-12-04 01:19:58 +08:00
|
|
|
const MCInstrDesc &II = TII.get(Opcode);
|
|
|
|
unsigned ConstrainedCondReg
|
|
|
|
= constrainOperandRegClass(II, CondReg, II.getNumDefs());
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
|
|
|
|
.addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
|
|
|
|
.addImm(0)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addMBB(TBB);
|
2014-08-02 02:39:24 +08:00
|
|
|
|
2015-08-26 09:38:00 +08:00
|
|
|
finishCondBranch(BI->getParent(), TBB, FBB);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
|
|
|
|
unsigned AddrReg = getRegForValue(BI->getOperand(0));
|
|
|
|
if (AddrReg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Emit the indirect branch.
|
2014-08-22 04:57:57 +08:00
|
|
|
const MCInstrDesc &II = TII.get(AArch64::BR);
|
|
|
|
AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Make sure the CFG is up-to-date.
|
2015-08-06 01:43:01 +08:00
|
|
|
for (auto *Succ : BI->successors())
|
|
|
|
FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectCmp(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
const CmpInst *CI = cast<CmpInst>(I);
|
|
|
|
|
2015-11-07 07:16:53 +08:00
|
|
|
// Vectors of i1 are weird: bail out.
|
|
|
|
if (CI->getType()->isVectorTy())
|
|
|
|
return false;
|
|
|
|
|
2014-09-16 04:47:16 +08:00
|
|
|
// Try to optimize or fold the cmp.
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
switch (Predicate) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_FALSE:
|
|
|
|
ResultReg = createResultReg(&AArch64::GPR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(AArch64::WZR, getKillRegState(true));
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_TRUE:
|
|
|
|
ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ResultReg) {
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Emit the cmp.
|
2014-08-20 06:29:55 +08:00
|
|
|
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-16 04:47:16 +08:00
|
|
|
ResultReg = createResultReg(&AArch64::GPR32RegClass);
|
|
|
|
|
|
|
|
// FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
|
|
|
|
// condition codes are inverted, because they are used by CSINC.
|
|
|
|
static unsigned CondCodeTable[2][2] = {
|
|
|
|
{ AArch64CC::NE, AArch64CC::VC },
|
|
|
|
{ AArch64CC::PL, AArch64CC::LE }
|
|
|
|
};
|
|
|
|
unsigned *CondCodes = nullptr;
|
|
|
|
switch (Predicate) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UEQ:
|
|
|
|
CondCodes = &CondCodeTable[0][0];
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ONE:
|
|
|
|
CondCodes = &CondCodeTable[1][0];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CondCodes) {
|
|
|
|
unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
|
|
|
|
TmpReg1)
|
|
|
|
.addReg(AArch64::WZR, getKillRegState(true))
|
|
|
|
.addReg(AArch64::WZR, getKillRegState(true))
|
|
|
|
.addImm(CondCodes[0]);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
|
|
|
|
ResultReg)
|
|
|
|
.addReg(TmpReg1, getKillRegState(true))
|
|
|
|
.addReg(AArch64::WZR, getKillRegState(true))
|
|
|
|
.addImm(CondCodes[1]);
|
|
|
|
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Now set a register based on the comparison.
|
2014-09-16 04:47:16 +08:00
|
|
|
AArch64CC::CondCode CC = getCompareCC(Predicate);
|
|
|
|
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
|
2014-05-24 20:50:23 +08:00
|
|
|
AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
|
2014-03-29 18:18:08 +08:00
|
|
|
ResultReg)
|
2014-09-16 04:47:16 +08:00
|
|
|
.addReg(AArch64::WZR, getKillRegState(true))
|
|
|
|
.addReg(AArch64::WZR, getKillRegState(true))
|
2014-03-29 18:18:08 +08:00
|
|
|
.addImm(invertedCC);
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
|
2014-11-13 08:36:46 +08:00
|
|
|
/// value.
|
|
|
|
bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
|
|
|
|
if (!SI->getType()->isIntegerTy(1))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Value *Src1Val, *Src2Val;
|
|
|
|
unsigned Opc = 0;
|
|
|
|
bool NeedExtraOp = false;
|
|
|
|
if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
|
|
|
|
if (CI->isOne()) {
|
|
|
|
Src1Val = SI->getCondition();
|
|
|
|
Src2Val = SI->getFalseValue();
|
|
|
|
Opc = AArch64::ORRWrr;
|
|
|
|
} else {
|
|
|
|
assert(CI->isZero());
|
|
|
|
Src1Val = SI->getFalseValue();
|
|
|
|
Src2Val = SI->getCondition();
|
|
|
|
Opc = AArch64::BICWrr;
|
|
|
|
}
|
|
|
|
} else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
|
|
|
|
if (CI->isOne()) {
|
|
|
|
Src1Val = SI->getCondition();
|
|
|
|
Src2Val = SI->getTrueValue();
|
|
|
|
Opc = AArch64::ORRWrr;
|
|
|
|
NeedExtraOp = true;
|
|
|
|
} else {
|
|
|
|
assert(CI->isZero());
|
|
|
|
Src1Val = SI->getCondition();
|
|
|
|
Src2Val = SI->getTrueValue();
|
|
|
|
Opc = AArch64::ANDWrr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Opc)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Src1Reg = getRegForValue(Src1Val);
|
|
|
|
if (!Src1Reg)
|
|
|
|
return false;
|
|
|
|
bool Src1IsKill = hasTrivialKill(Src1Val);
|
|
|
|
|
|
|
|
unsigned Src2Reg = getRegForValue(Src2Val);
|
|
|
|
if (!Src2Reg)
|
|
|
|
return false;
|
|
|
|
bool Src2IsKill = hasTrivialKill(Src2Val);
|
|
|
|
|
|
|
|
if (NeedExtraOp) {
|
|
|
|
Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
|
|
|
|
Src1IsKill = true;
|
|
|
|
}
|
2015-05-02 05:34:57 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
|
2014-11-13 08:36:46 +08:00
|
|
|
Src1IsKill, Src2Reg, Src2IsKill);
|
|
|
|
updateValueMap(SI, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectSelect(const Instruction *I) {
|
2014-11-13 08:36:38 +08:00
|
|
|
assert(isa<SelectInst>(I) && "Expected a select instruction.");
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeSupported(I->getType(), VT))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-11-13 08:36:38 +08:00
|
|
|
unsigned Opc;
|
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default:
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2014-11-13 08:36:38 +08:00
|
|
|
case MVT::i1:
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
2014-08-22 04:57:57 +08:00
|
|
|
case MVT::i32:
|
2014-11-13 08:36:38 +08:00
|
|
|
Opc = AArch64::CSELWr;
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
break;
|
2014-08-22 04:57:57 +08:00
|
|
|
case MVT::i64:
|
2014-11-13 08:36:38 +08:00
|
|
|
Opc = AArch64::CSELXr;
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
break;
|
2014-08-22 04:57:57 +08:00
|
|
|
case MVT::f32:
|
2014-11-13 08:36:38 +08:00
|
|
|
Opc = AArch64::FCSELSrrr;
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
break;
|
2014-08-22 04:57:57 +08:00
|
|
|
case MVT::f64:
|
2014-11-13 08:36:38 +08:00
|
|
|
Opc = AArch64::FCSELDrrr;
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
break;
|
2014-07-31 06:04:37 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-11-13 08:36:38 +08:00
|
|
|
const SelectInst *SI = cast<SelectInst>(I);
|
2014-07-31 06:04:37 +08:00
|
|
|
const Value *Cond = SI->getCondition();
|
|
|
|
AArch64CC::CondCode CC = AArch64CC::NE;
|
2014-11-13 08:36:43 +08:00
|
|
|
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
|
2014-05-04 01:27:06 +08:00
|
|
|
|
2014-11-13 08:36:46 +08:00
|
|
|
if (optimizeSelect(SI))
|
|
|
|
return true;
|
|
|
|
|
2014-11-13 08:36:38 +08:00
|
|
|
// Try to pickup the flags, so we don't have to emit another compare.
|
|
|
|
if (foldXALUIntrinsic(CC, I, Cond)) {
|
|
|
|
// Fake request the condition to force emission of the XALU intrinsic.
|
|
|
|
unsigned CondReg = getRegForValue(Cond);
|
|
|
|
if (!CondReg)
|
|
|
|
return false;
|
2014-11-13 08:36:43 +08:00
|
|
|
} else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
|
|
|
|
isValueAvailable(Cond)) {
|
|
|
|
const auto *Cmp = cast<CmpInst>(Cond);
|
|
|
|
// Try to optimize or fold the cmp.
|
|
|
|
CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
|
|
|
|
const Value *FoldSelect = nullptr;
|
|
|
|
switch (Predicate) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_FALSE:
|
|
|
|
FoldSelect = SI->getFalseValue();
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_TRUE:
|
|
|
|
FoldSelect = SI->getTrueValue();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (FoldSelect) {
|
|
|
|
unsigned SrcReg = getRegForValue(FoldSelect);
|
|
|
|
if (!SrcReg)
|
|
|
|
return false;
|
|
|
|
unsigned UseReg = lookUpRegForValue(SI);
|
|
|
|
if (UseReg)
|
|
|
|
MRI.clearKillFlags(UseReg);
|
|
|
|
|
|
|
|
updateValueMap(I, SrcReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit the cmp.
|
|
|
|
if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
|
|
|
|
CC = getCompareCC(Predicate);
|
|
|
|
switch (Predicate) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_UEQ:
|
|
|
|
ExtraCC = AArch64CC::EQ;
|
|
|
|
CC = AArch64CC::VS;
|
|
|
|
break;
|
|
|
|
case CmpInst::FCMP_ONE:
|
|
|
|
ExtraCC = AArch64CC::MI;
|
|
|
|
CC = AArch64CC::GT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
|
2014-11-13 08:36:38 +08:00
|
|
|
} else {
|
|
|
|
unsigned CondReg = getRegForValue(Cond);
|
|
|
|
if (!CondReg)
|
|
|
|
return false;
|
|
|
|
bool CondIsKill = hasTrivialKill(Cond);
|
2014-07-31 06:04:37 +08:00
|
|
|
|
2015-05-01 06:27:20 +08:00
|
|
|
const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
|
|
|
|
CondReg = constrainOperandRegClass(II, CondReg, 1);
|
|
|
|
|
2014-11-13 08:36:38 +08:00
|
|
|
// Emit a TST instruction (ANDS wzr, reg, #imm).
|
2015-05-01 06:27:20 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
|
2014-11-13 08:36:38 +08:00
|
|
|
AArch64::WZR)
|
|
|
|
.addReg(CondReg, getKillRegState(CondIsKill))
|
|
|
|
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
|
2014-07-31 06:04:37 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-11-13 08:36:38 +08:00
|
|
|
unsigned Src1Reg = getRegForValue(SI->getTrueValue());
|
|
|
|
bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
|
2014-07-31 06:04:37 +08:00
|
|
|
|
2014-11-13 08:36:38 +08:00
|
|
|
unsigned Src2Reg = getRegForValue(SI->getFalseValue());
|
|
|
|
bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
|
2014-07-31 06:04:37 +08:00
|
|
|
|
2014-11-13 08:36:38 +08:00
|
|
|
if (!Src1Reg || !Src2Reg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-11-13 08:36:43 +08:00
|
|
|
if (ExtraCC != AArch64CC::AL) {
|
|
|
|
Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
|
|
|
|
Src2IsKill, ExtraCC);
|
|
|
|
Src2IsKill = true;
|
|
|
|
}
|
2014-11-13 08:36:38 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
|
|
|
|
Src2IsKill, CC);
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectFPExt(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
Value *V = I->getOperand(0);
|
|
|
|
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Op = getRegForValue(V);
|
|
|
|
if (Op == 0)
|
|
|
|
return false;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
|
2014-03-29 18:18:08 +08:00
|
|
|
ResultReg).addReg(Op);
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
Value *V = I->getOperand(0);
|
|
|
|
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Op = getRegForValue(V);
|
|
|
|
if (Op == 0)
|
|
|
|
return false;
|
|
|
|
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
|
2014-03-29 18:18:08 +08:00
|
|
|
ResultReg).addReg(Op);
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// FPToUI and FPToSI
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
|
2014-03-29 18:18:08 +08:00
|
|
|
MVT DestVT;
|
|
|
|
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned SrcReg = getRegForValue(I->getOperand(0));
|
|
|
|
if (SrcReg == 0)
|
|
|
|
return false;
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
|
2017-06-10 06:40:50 +08:00
|
|
|
if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
|
2014-04-30 23:29:57 +08:00
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
unsigned Opc;
|
|
|
|
if (SrcVT == MVT::f64) {
|
|
|
|
if (Signed)
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
|
2014-03-29 18:18:08 +08:00
|
|
|
} else {
|
|
|
|
if (Signed)
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2014-04-15 21:59:53 +08:00
|
|
|
unsigned ResultReg = createResultReg(
|
2014-05-24 20:50:23 +08:00
|
|
|
DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
|
|
|
|
.addReg(SrcReg);
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
|
2014-03-29 18:18:08 +08:00
|
|
|
MVT DestVT;
|
|
|
|
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
|
|
|
|
return false;
|
2017-06-10 06:40:50 +08:00
|
|
|
// Let regular ISEL handle FP16
|
|
|
|
if (DestVT == MVT::f16)
|
|
|
|
return false;
|
|
|
|
|
2017-01-25 08:29:26 +08:00
|
|
|
assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
|
|
|
|
"Unexpected value type.");
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
unsigned SrcReg = getRegForValue(I->getOperand(0));
|
2014-08-22 04:57:57 +08:00
|
|
|
if (!SrcReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2014-08-22 04:57:57 +08:00
|
|
|
bool SrcIsKill = hasTrivialKill(I->getOperand(0));
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Handle sign-extension.
|
|
|
|
if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
|
|
|
|
SrcReg =
|
2014-09-16 07:20:17 +08:00
|
|
|
emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
|
2014-08-22 04:57:57 +08:00
|
|
|
if (!SrcReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2014-08-22 04:57:57 +08:00
|
|
|
SrcIsKill = true;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Opc;
|
|
|
|
if (SrcVT == MVT::i64) {
|
|
|
|
if (Signed)
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
|
2014-03-29 18:18:08 +08:00
|
|
|
} else {
|
|
|
|
if (Signed)
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-05-24 20:50:23 +08:00
|
|
|
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
|
2014-08-22 04:57:57 +08:00
|
|
|
SrcIsKill);
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
bool AArch64FastISel::fastLowerArguments() {
|
2014-08-05 13:43:48 +08:00
|
|
|
if (!FuncInfo.CanLowerReturn)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Function *F = FuncInfo.Fn;
|
|
|
|
if (F->isVarArg())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
CallingConv::ID CC = F->getCallingConv();
|
2016-08-27 03:28:17 +08:00
|
|
|
if (CC != CallingConv::C && CC != CallingConv::Swift)
|
2014-08-05 13:43:48 +08:00
|
|
|
return false;
|
|
|
|
|
2018-09-23 06:17:50 +08:00
|
|
|
if (Subtarget->hasCustomCallingConv())
|
|
|
|
return false;
|
|
|
|
|
2014-09-16 08:25:30 +08:00
|
|
|
// Only handle simple cases of up to 8 GPR and FPR each.
|
2014-08-05 13:43:48 +08:00
|
|
|
unsigned GPRCnt = 0;
|
|
|
|
unsigned FPRCnt = 0;
|
|
|
|
for (auto const &Arg : F->args()) {
|
2017-04-29 02:37:16 +08:00
|
|
|
if (Arg.hasAttribute(Attribute::ByVal) ||
|
|
|
|
Arg.hasAttribute(Attribute::InReg) ||
|
|
|
|
Arg.hasAttribute(Attribute::StructRet) ||
|
|
|
|
Arg.hasAttribute(Attribute::SwiftSelf) ||
|
|
|
|
Arg.hasAttribute(Attribute::SwiftError) ||
|
|
|
|
Arg.hasAttribute(Attribute::Nest))
|
2014-08-05 13:43:48 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
Type *ArgTy = Arg.getType();
|
2014-09-16 08:25:30 +08:00
|
|
|
if (ArgTy->isStructTy() || ArgTy->isArrayTy())
|
2014-08-05 13:43:48 +08:00
|
|
|
return false;
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT ArgVT = TLI.getValueType(DL, ArgTy);
|
2014-09-16 08:25:30 +08:00
|
|
|
if (!ArgVT.isSimple())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MVT VT = ArgVT.getSimpleVT().SimpleTy;
|
|
|
|
if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (VT.isVector() &&
|
|
|
|
(!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (VT >= MVT::i1 && VT <= MVT::i64)
|
2014-08-05 13:43:48 +08:00
|
|
|
++GPRCnt;
|
2014-09-16 08:25:30 +08:00
|
|
|
else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
|
|
|
|
VT.is128BitVector())
|
2014-08-05 13:43:48 +08:00
|
|
|
++FPRCnt;
|
2014-09-16 08:25:30 +08:00
|
|
|
else
|
|
|
|
return false;
|
2014-08-05 13:43:48 +08:00
|
|
|
|
|
|
|
if (GPRCnt > 8 || FPRCnt > 8)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-09-16 08:25:30 +08:00
|
|
|
static const MCPhysReg Registers[6][8] = {
|
2014-08-05 13:43:48 +08:00
|
|
|
{ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
|
|
|
|
AArch64::W5, AArch64::W6, AArch64::W7 },
|
|
|
|
{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
|
|
|
|
AArch64::X5, AArch64::X6, AArch64::X7 },
|
|
|
|
{ AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
|
|
|
|
AArch64::H5, AArch64::H6, AArch64::H7 },
|
|
|
|
{ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
|
|
|
|
AArch64::S5, AArch64::S6, AArch64::S7 },
|
|
|
|
{ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
|
2014-09-16 08:25:30 +08:00
|
|
|
AArch64::D5, AArch64::D6, AArch64::D7 },
|
|
|
|
{ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
|
|
|
|
AArch64::Q5, AArch64::Q6, AArch64::Q7 }
|
2014-08-05 13:43:48 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
unsigned GPRIdx = 0;
|
|
|
|
unsigned FPRIdx = 0;
|
|
|
|
for (auto const &Arg : F->args()) {
|
2015-07-09 10:09:04 +08:00
|
|
|
MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
|
2014-08-05 13:43:48 +08:00
|
|
|
unsigned SrcReg;
|
2014-09-16 08:25:30 +08:00
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
if (VT >= MVT::i1 && VT <= MVT::i32) {
|
|
|
|
SrcReg = Registers[0][GPRIdx++];
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
VT = MVT::i32;
|
|
|
|
} else if (VT == MVT::i64) {
|
|
|
|
SrcReg = Registers[1][GPRIdx++];
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
} else if (VT == MVT::f16) {
|
|
|
|
SrcReg = Registers[2][FPRIdx++];
|
|
|
|
RC = &AArch64::FPR16RegClass;
|
|
|
|
} else if (VT == MVT::f32) {
|
|
|
|
SrcReg = Registers[3][FPRIdx++];
|
|
|
|
RC = &AArch64::FPR32RegClass;
|
|
|
|
} else if ((VT == MVT::f64) || VT.is64BitVector()) {
|
|
|
|
SrcReg = Registers[4][FPRIdx++];
|
|
|
|
RC = &AArch64::FPR64RegClass;
|
|
|
|
} else if (VT.is128BitVector()) {
|
|
|
|
SrcReg = Registers[5][FPRIdx++];
|
|
|
|
RC = &AArch64::FPR128RegClass;
|
|
|
|
} else
|
|
|
|
llvm_unreachable("Unexpected value type.");
|
2014-08-05 13:43:48 +08:00
|
|
|
|
|
|
|
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
|
|
|
|
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
|
|
|
|
// Without this, EmitLiveInCopies may eliminate the livein if its only
|
|
|
|
// use is a bitcast (which isn't turned into an instruction).
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
2014-08-22 04:57:57 +08:00
|
|
|
.addReg(DstReg, getKillRegState(true));
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(&Arg, ResultReg);
|
2014-08-05 13:43:48 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
|
2014-07-23 07:14:58 +08:00
|
|
|
SmallVectorImpl<MVT> &OutVTs,
|
|
|
|
unsigned &NumBytes) {
|
|
|
|
CallingConv::ID CC = CLI.CallConv;
|
2014-03-29 18:18:08 +08:00
|
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
2014-08-07 02:45:26 +08:00
|
|
|
CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
|
2014-07-23 07:14:58 +08:00
|
|
|
CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Get a count of how many bytes are to be pushed on the stack.
|
|
|
|
NumBytes = CCInfo.getNextStackOffset();
|
|
|
|
|
|
|
|
// Issue CALLSEQ_START
|
|
|
|
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
|
2017-05-09 21:35:13 +08:00
|
|
|
.addImm(NumBytes).addImm(0);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Process the args.
|
2015-08-04 03:04:32 +08:00
|
|
|
for (CCValAssign &VA : ArgLocs) {
|
2014-07-23 07:14:58 +08:00
|
|
|
const Value *ArgVal = CLI.OutVals[VA.getValNo()];
|
|
|
|
MVT ArgVT = OutVTs[VA.getValNo()];
|
|
|
|
|
|
|
|
unsigned ArgReg = getRegForValue(ArgVal);
|
|
|
|
if (!ArgReg)
|
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Handle arg promotion: SExt, ZExt, AExt.
|
|
|
|
switch (VA.getLocInfo()) {
|
|
|
|
case CCValAssign::Full:
|
|
|
|
break;
|
|
|
|
case CCValAssign::SExt: {
|
|
|
|
MVT DestVT = VA.getLocVT();
|
|
|
|
MVT SrcVT = ArgVT;
|
2014-09-16 07:20:17 +08:00
|
|
|
ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
|
2014-07-23 07:14:58 +08:00
|
|
|
if (!ArgReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case CCValAssign::AExt:
|
|
|
|
// Intentional fall-through.
|
|
|
|
case CCValAssign::ZExt: {
|
|
|
|
MVT DestVT = VA.getLocVT();
|
|
|
|
MVT SrcVT = ArgVT;
|
2014-09-16 07:20:17 +08:00
|
|
|
ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
|
2014-07-23 07:14:58 +08:00
|
|
|
if (!ArgReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown arg promotion!");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now copy/store arg to correct locations.
|
|
|
|
if (VA.isRegLoc() && !VA.needsCustom()) {
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2014-07-23 07:14:58 +08:00
|
|
|
TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
|
|
|
|
CLI.OutRegs.push_back(VA.getLocReg());
|
2014-03-29 18:18:08 +08:00
|
|
|
} else if (VA.needsCustom()) {
|
|
|
|
// FIXME: Handle custom args.
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
assert(VA.isMemLoc() && "Assuming store on stack.");
|
|
|
|
|
2014-07-31 08:11:11 +08:00
|
|
|
// Don't emit stores for undef values.
|
|
|
|
if (isa<UndefValue>(ArgVal))
|
|
|
|
continue;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Need to store on the stack.
|
2014-06-03 21:54:53 +08:00
|
|
|
unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
|
2014-05-08 20:53:50 +08:00
|
|
|
|
|
|
|
unsigned BEAlign = 0;
|
|
|
|
if (ArgSize < 8 && !Subtarget->isLittleEndian())
|
|
|
|
BEAlign = 8 - ArgSize;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
Address Addr;
|
|
|
|
Addr.setKind(Address::RegBase);
|
2014-05-24 20:50:23 +08:00
|
|
|
Addr.setReg(AArch64::SP);
|
2014-05-08 20:53:50 +08:00
|
|
|
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-08-09 01:24:10 +08:00
|
|
|
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
|
|
|
|
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
|
2015-08-12 07:09:45 +08:00
|
|
|
MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
|
|
|
|
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
|
2014-08-09 01:24:10 +08:00
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
if (!emitStore(ArgVT, ArgReg, Addr, MMO))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
|
2014-07-24 04:03:13 +08:00
|
|
|
unsigned NumBytes) {
|
2014-07-23 07:14:58 +08:00
|
|
|
CallingConv::ID CC = CLI.CallConv;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Issue CALLSEQ_END
|
|
|
|
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
|
2014-07-23 07:14:58 +08:00
|
|
|
.addImm(NumBytes).addImm(0);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Now the return value.
|
|
|
|
if (RetVT != MVT::isVoid) {
|
|
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
2014-08-07 02:45:26 +08:00
|
|
|
CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
|
2014-03-29 18:18:08 +08:00
|
|
|
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
|
|
|
|
|
|
|
|
// Only handle a single return value.
|
|
|
|
if (RVLocs.size() != 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Copy all of the result registers out of their specified physreg.
|
|
|
|
MVT CopyVT = RVLocs[0].getValVT();
|
2015-04-17 05:19:36 +08:00
|
|
|
|
|
|
|
// TODO: Handle big-endian results
|
|
|
|
if (CopyVT.isVector() && !Subtarget->isLittleEndian())
|
|
|
|
return false;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2014-07-23 07:14:58 +08:00
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
2014-08-22 04:57:57 +08:00
|
|
|
.addReg(RVLocs[0].getLocReg());
|
2014-07-23 07:14:58 +08:00
|
|
|
CLI.InRegs.push_back(RVLocs[0].getLocReg());
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
CLI.ResultReg = ResultReg;
|
|
|
|
CLI.NumResultRegs = 1;
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
2014-07-23 07:14:58 +08:00
|
|
|
CallingConv::ID CC = CLI.CallConv;
|
2014-08-14 07:23:58 +08:00
|
|
|
bool IsTailCall = CLI.IsTailCall;
|
2014-07-23 07:14:58 +08:00
|
|
|
bool IsVarArg = CLI.IsVarArg;
|
|
|
|
const Value *Callee = CLI.Callee;
|
2015-06-23 20:21:54 +08:00
|
|
|
MCSymbol *Symbol = CLI.Symbol;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-06-23 20:21:54 +08:00
|
|
|
if (!Callee && !Symbol)
|
2014-09-16 06:07:49 +08:00
|
|
|
return false;
|
|
|
|
|
2014-08-14 07:23:58 +08:00
|
|
|
// Allow SelectionDAG isel to handle tail calls.
|
|
|
|
if (IsTailCall)
|
|
|
|
return false;
|
|
|
|
|
2014-07-31 12:10:40 +08:00
|
|
|
CodeModel::Model CM = TM.getCodeModel();
|
2017-04-05 03:51:53 +08:00
|
|
|
// Only support the small-addressing and large code models.
|
|
|
|
if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
|
2014-07-31 12:10:40 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// FIXME: Add large code model support for ELF.
|
|
|
|
if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Let SDISel handle vararg functions.
|
2014-07-23 07:14:58 +08:00
|
|
|
if (IsVarArg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
// FIXME: Only handle *simple* calls for now.
|
2014-03-29 18:18:08 +08:00
|
|
|
MVT RetVT;
|
2014-07-23 07:14:58 +08:00
|
|
|
if (CLI.RetTy->isVoidTy())
|
2014-03-29 18:18:08 +08:00
|
|
|
RetVT = MVT::isVoid;
|
2014-07-23 07:14:58 +08:00
|
|
|
else if (!isTypeLegal(CLI.RetTy, RetVT))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
for (auto Flag : CLI.OutFlags)
|
2016-03-30 01:37:21 +08:00
|
|
|
if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
|
2016-04-12 05:08:06 +08:00
|
|
|
Flag.isSwiftSelf() || Flag.isSwiftError())
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
// Set up the argument vectors.
|
|
|
|
SmallVector<MVT, 16> OutVTs;
|
|
|
|
OutVTs.reserve(CLI.OutVals.size());
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
for (auto *Val : CLI.OutVals) {
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(Val->getType(), VT) &&
|
|
|
|
!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// We don't handle vector parameters yet.
|
2014-07-23 07:14:58 +08:00
|
|
|
if (VT.isVector() || VT.getSizeInBits() > 64)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
OutVTs.push_back(VT);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-07-31 12:10:40 +08:00
|
|
|
Address Addr;
|
2014-09-16 07:20:17 +08:00
|
|
|
if (Callee && !computeCallAddress(Callee, Addr))
|
2014-07-31 12:10:40 +08:00
|
|
|
return false;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Handle the arguments now that we've gotten them.
|
|
|
|
unsigned NumBytes;
|
2014-09-16 07:20:17 +08:00
|
|
|
if (!processCallArgs(CLI, OutVTs, NumBytes))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
[AArch64] Support reserving x1-7 registers.
Summary:
Reserving registers x1-7 is used to support CONFIG_ARM64_LSE_ATOMICS in Linux kernel. This change adds support for reserving registers x1 through x7.
Reviewers: javed.absar, phosek, srhines, nickdesaulniers, efriedma
Reviewed By: nickdesaulniers, efriedma
Subscribers: niravd, jfb, manojgupta, nickdesaulniers, jyknight, efriedma, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D48580
llvm-svn: 341706
2018-09-08 04:58:57 +08:00
|
|
|
const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
|
|
|
|
if (RegInfo->isAnyArgRegReserved(*MF))
|
|
|
|
RegInfo->emitReservedArgRegCallError(*MF);
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Issue the call.
|
|
|
|
MachineInstrBuilder MIB;
|
2017-04-05 03:51:53 +08:00
|
|
|
if (Subtarget->useSmallAddressing()) {
|
2014-08-30 07:48:06 +08:00
|
|
|
const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
|
|
|
|
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
|
2015-06-23 20:21:54 +08:00
|
|
|
if (Symbol)
|
|
|
|
MIB.addSym(Symbol, 0);
|
2014-07-31 12:10:40 +08:00
|
|
|
else if (Addr.getGlobalValue())
|
|
|
|
MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
|
2014-08-30 07:48:06 +08:00
|
|
|
else if (Addr.getReg()) {
|
|
|
|
unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
|
|
|
|
MIB.addReg(Reg);
|
|
|
|
} else
|
2014-07-31 12:10:40 +08:00
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
unsigned CallReg = 0;
|
2015-06-23 20:21:54 +08:00
|
|
|
if (Symbol) {
|
2014-07-31 12:10:40 +08:00
|
|
|
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
|
|
|
|
ADRPReg)
|
2015-06-23 20:21:54 +08:00
|
|
|
.addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
|
2014-07-31 12:10:40 +08:00
|
|
|
|
|
|
|
CallReg = createResultReg(&AArch64::GPR64RegClass);
|
2015-06-23 20:21:54 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(AArch64::LDRXui), CallReg)
|
|
|
|
.addReg(ADRPReg)
|
|
|
|
.addSym(Symbol,
|
|
|
|
AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
2014-09-16 07:20:17 +08:00
|
|
|
} else if (Addr.getGlobalValue())
|
|
|
|
CallReg = materializeGV(Addr.getGlobalValue());
|
|
|
|
else if (Addr.getReg())
|
2014-07-31 12:10:40 +08:00
|
|
|
CallReg = Addr.getReg();
|
|
|
|
|
|
|
|
if (!CallReg)
|
|
|
|
return false;
|
|
|
|
|
2014-08-30 07:48:06 +08:00
|
|
|
const MCInstrDesc &II = TII.get(AArch64::BLR);
|
|
|
|
CallReg = constrainOperandRegClass(II, CallReg, 0);
|
|
|
|
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
|
2014-07-31 12:10:40 +08:00
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Add implicit physical register uses to the call.
|
2014-07-23 07:14:58 +08:00
|
|
|
for (auto Reg : CLI.OutRegs)
|
|
|
|
MIB.addReg(Reg, RegState::Implicit);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Add a register mask with the call-preserved registers.
|
|
|
|
// Proper defs for return values will be added by setPhysRegsDeadExcept().
|
2015-03-12 06:42:13 +08:00
|
|
|
MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-07-31 12:10:40 +08:00
|
|
|
CLI.Call = MIB;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Finish off the call including any return values.
|
2014-09-16 07:20:17 +08:00
|
|
|
return finishCall(CLI, RetVT, NumBytes);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
|
2014-03-29 18:18:08 +08:00
|
|
|
if (Alignment)
|
|
|
|
return Len / Alignment <= 4;
|
|
|
|
else
|
|
|
|
return Len < 32;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
|
2014-05-24 20:50:23 +08:00
|
|
|
uint64_t Len, unsigned Alignment) {
|
2014-03-29 18:18:08 +08:00
|
|
|
// Make sure we don't bloat code by inlining very large memcpy's.
|
2014-09-16 07:20:17 +08:00
|
|
|
if (!isMemCpySmall(Len, Alignment))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
int64_t UnscaledOffset = 0;
|
|
|
|
Address OrigDest = Dest;
|
|
|
|
Address OrigSrc = Src;
|
|
|
|
|
|
|
|
while (Len) {
|
|
|
|
MVT VT;
|
|
|
|
if (!Alignment || Alignment >= 8) {
|
|
|
|
if (Len >= 8)
|
|
|
|
VT = MVT::i64;
|
|
|
|
else if (Len >= 4)
|
|
|
|
VT = MVT::i32;
|
|
|
|
else if (Len >= 2)
|
|
|
|
VT = MVT::i16;
|
|
|
|
else {
|
|
|
|
VT = MVT::i8;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Bound based on alignment.
|
|
|
|
if (Len >= 4 && Alignment == 4)
|
|
|
|
VT = MVT::i32;
|
|
|
|
else if (Len >= 2 && Alignment == 2)
|
|
|
|
VT = MVT::i16;
|
|
|
|
else {
|
|
|
|
VT = MVT::i8;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
unsigned ResultReg = emitLoad(VT, VT, Src);
|
|
|
|
if (!ResultReg)
|
2014-06-10 17:52:40 +08:00
|
|
|
return false;
|
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
if (!emitStore(VT, ResultReg, Dest))
|
2014-06-10 17:52:40 +08:00
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
int64_t Size = VT.getSizeInBits() / 8;
|
|
|
|
Len -= Size;
|
|
|
|
UnscaledOffset += Size;
|
|
|
|
|
|
|
|
// We need to recompute the unscaled offset for each iteration.
|
|
|
|
Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
|
|
|
|
Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Check if it is possible to fold the condition from the XALU intrinsic
|
2014-07-31 06:04:34 +08:00
|
|
|
/// into the user. The condition code will only be updated on success.
|
|
|
|
bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
|
|
|
|
const Instruction *I,
|
|
|
|
const Value *Cond) {
|
|
|
|
if (!isa<ExtractValueInst>(Cond))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const auto *EV = cast<ExtractValueInst>(Cond);
|
|
|
|
if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
|
|
|
|
MVT RetVT;
|
|
|
|
const Function *Callee = II->getCalledFunction();
|
|
|
|
Type *RetTy =
|
|
|
|
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
|
|
|
|
if (!isTypeLegal(RetTy, RetVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (RetVT != MVT::i32 && RetVT != MVT::i64)
|
|
|
|
return false;
|
|
|
|
|
2014-09-18 15:26:26 +08:00
|
|
|
const Value *LHS = II->getArgOperand(0);
|
|
|
|
const Value *RHS = II->getArgOperand(1);
|
|
|
|
|
|
|
|
// Canonicalize immediate to the RHS.
|
|
|
|
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
|
|
|
|
isCommutativeIntrinsic(II))
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
|
|
|
// Simplify multiplies.
|
2015-05-21 01:16:39 +08:00
|
|
|
Intrinsic::ID IID = II->getIntrinsicID();
|
2014-09-18 15:26:26 +08:00
|
|
|
switch (IID) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case Intrinsic::smul_with_overflow:
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
|
|
|
if (C->getValue() == 2)
|
|
|
|
IID = Intrinsic::sadd_with_overflow;
|
|
|
|
break;
|
|
|
|
case Intrinsic::umul_with_overflow:
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
|
|
|
if (C->getValue() == 2)
|
|
|
|
IID = Intrinsic::uadd_with_overflow;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-07-31 06:04:34 +08:00
|
|
|
AArch64CC::CondCode TmpCC;
|
2014-09-18 15:26:26 +08:00
|
|
|
switch (IID) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case Intrinsic::sadd_with_overflow:
|
|
|
|
case Intrinsic::ssub_with_overflow:
|
|
|
|
TmpCC = AArch64CC::VS;
|
|
|
|
break;
|
|
|
|
case Intrinsic::uadd_with_overflow:
|
|
|
|
TmpCC = AArch64CC::HS;
|
|
|
|
break;
|
|
|
|
case Intrinsic::usub_with_overflow:
|
|
|
|
TmpCC = AArch64CC::LO;
|
|
|
|
break;
|
|
|
|
case Intrinsic::smul_with_overflow:
|
|
|
|
case Intrinsic::umul_with_overflow:
|
|
|
|
TmpCC = AArch64CC::NE;
|
|
|
|
break;
|
2014-07-31 06:04:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check if both instructions are in the same basic block.
|
2014-09-18 01:46:47 +08:00
|
|
|
if (!isValueAvailable(II))
|
2014-07-31 06:04:34 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
// Make sure nothing is in the way
|
2015-10-14 04:02:15 +08:00
|
|
|
BasicBlock::const_iterator Start(I);
|
|
|
|
BasicBlock::const_iterator End(II);
|
2014-07-31 06:04:34 +08:00
|
|
|
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
|
|
|
|
// We only expect extractvalue instructions between the intrinsic and the
|
|
|
|
// instruction to be selected.
|
|
|
|
if (!isa<ExtractValueInst>(Itr))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check that the extractvalue operand comes from the intrinsic.
|
|
|
|
const auto *EVI = cast<ExtractValueInst>(Itr);
|
|
|
|
if (EVI->getAggregateOperand() != II)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
CC = TmpCC;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
|
2014-03-29 18:18:08 +08:00
|
|
|
// FIXME: Handle more intrinsics.
|
2014-07-23 07:14:58 +08:00
|
|
|
switch (II->getIntrinsicID()) {
|
2014-07-26 01:47:14 +08:00
|
|
|
default: return false;
|
|
|
|
case Intrinsic::frameaddress: {
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
|
|
|
|
MFI.setFrameAddressIsTaken(true);
|
2014-07-26 01:47:14 +08:00
|
|
|
|
2017-04-01 07:12:24 +08:00
|
|
|
const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
|
2014-07-26 01:47:14 +08:00
|
|
|
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
|
2014-08-22 04:57:57 +08:00
|
|
|
unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
|
2014-07-26 01:47:14 +08:00
|
|
|
// Recursively load frame address
|
|
|
|
// ldr x0, [fp]
|
|
|
|
// ldr x0, [x0]
|
|
|
|
// ldr x0, [x0]
|
|
|
|
// ...
|
|
|
|
unsigned DestReg;
|
|
|
|
unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
|
|
|
|
while (Depth--) {
|
2014-09-04 04:56:59 +08:00
|
|
|
DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
|
2014-08-22 04:57:57 +08:00
|
|
|
SrcReg, /*IsKill=*/true, 0);
|
|
|
|
assert(DestReg && "Unexpected LDR instruction emission failure.");
|
2014-07-26 01:47:14 +08:00
|
|
|
SrcReg = DestReg;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(II, SrcReg);
|
2014-07-26 01:47:14 +08:00
|
|
|
return true;
|
|
|
|
}
|
[COFF, ARM64] Implement Intrinsic.sponentry for AArch64
Summary: This patch adds Intrinsic.sponentry. This intrinsic is required to correctly support setjmp for AArch64 Windows platform.
Patch by: Yin Ma (yinma@codeaurora.org)
Reviewers: mgrang, ssijaric, eli.friedman, TomTan, mstorsjo, rnk, compnerd, efriedma
Reviewed By: efriedma
Subscribers: efriedma, javed.absar, kristof.beyls, chrib, llvm-commits
Differential Revision: https://reviews.llvm.org/D53996
llvm-svn: 345909
2018-11-02 07:22:25 +08:00
|
|
|
case Intrinsic::sponentry: {
|
|
|
|
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
|
|
|
|
|
|
|
|
// SP = FP + Fixed Object + 16
|
|
|
|
int FI = MFI.CreateFixedObject(4, 0, false);
|
|
|
|
unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(AArch64::ADDXri), ResultReg)
|
|
|
|
.addFrameIndex(FI)
|
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
|
|
|
|
|
|
|
updateValueMap(II, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
case Intrinsic::memcpy:
|
|
|
|
case Intrinsic::memmove: {
|
2014-07-23 07:14:58 +08:00
|
|
|
const auto *MTI = cast<MemTransferInst>(II);
|
2014-03-29 18:18:08 +08:00
|
|
|
// Don't handle volatile.
|
2014-07-23 07:14:58 +08:00
|
|
|
if (MTI->isVolatile())
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-08-28 07:09:40 +08:00
|
|
|
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
|
2014-03-29 18:18:08 +08:00
|
|
|
// we would emit dead code because we don't currently handle memmoves.
|
2014-07-23 07:14:58 +08:00
|
|
|
bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
|
|
|
|
if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
|
2014-03-29 18:18:08 +08:00
|
|
|
// Small memcpy's are common enough that we want to do them without a call
|
|
|
|
// if possible.
|
2014-07-23 07:14:58 +08:00
|
|
|
uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
|
[AArch64FastISel] Replace deprecated calls to MemoryIntrinsic::getAlignment() (NFCI)
Summary:
This change is part of step five in the series of changes to remove alignment argument from
memcpy/memmove/memset in favour of alignment attributes. In particular, this changes
AArch64FastISel to cease using the old getAlignment() API of MemoryIntrinsic in favour of getting
source & dest specific alignments through the new API.
Steps:
Step 1) Remove alignment parameter and create alignment parameter attributes for
memcpy/memmove/memset. ( rL322965, rC322964, rL322963 )
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments. ( rL323597 )
Step 3) Update Clang to use the new IRBuilder API. ( rC323617 )
Step 4) Update Polly to use the new IRBuilder API. ( rL323618 )
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use [get|set]DestAlignment()
and [get|set]SourceAlignment() instead. ( rL323886, r323891, rL324148, rL324273, rL324278,
rL324384, rL324395, rL324402, rL324626, rL324642, rL324653, rL324654 )
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reference
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
llvm-svn: 324773
2018-02-10 05:49:29 +08:00
|
|
|
unsigned Alignment = MinAlign(MTI->getDestAlignment(),
|
|
|
|
MTI->getSourceAlignment());
|
2014-09-16 07:20:17 +08:00
|
|
|
if (isMemCpySmall(Len, Alignment)) {
|
2014-03-29 18:18:08 +08:00
|
|
|
Address Dest, Src;
|
2014-09-16 07:20:17 +08:00
|
|
|
if (!computeAddress(MTI->getRawDest(), Dest) ||
|
|
|
|
!computeAddress(MTI->getRawSource(), Src))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2014-09-16 07:20:17 +08:00
|
|
|
if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
if (!MTI->getLength()->getType()->isIntegerTy(64))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
|
2014-03-29 18:18:08 +08:00
|
|
|
// Fast instruction selection doesn't support the special
|
|
|
|
// address spaces.
|
|
|
|
return false;
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
case Intrinsic::memset: {
|
2014-07-23 07:14:58 +08:00
|
|
|
const MemSetInst *MSI = cast<MemSetInst>(II);
|
2014-03-29 18:18:08 +08:00
|
|
|
// Don't handle volatile.
|
2014-07-23 07:14:58 +08:00
|
|
|
if (MSI->isVolatile())
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
if (!MSI->getLength()->getType()->isIntegerTy(64))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-07-23 07:14:58 +08:00
|
|
|
if (MSI->getDestAddressSpace() > 255)
|
2014-03-29 18:18:08 +08:00
|
|
|
// Fast instruction selection doesn't support the special
|
|
|
|
// address spaces.
|
|
|
|
return false;
|
|
|
|
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2014-09-16 06:33:06 +08:00
|
|
|
case Intrinsic::sin:
|
|
|
|
case Intrinsic::cos:
|
|
|
|
case Intrinsic::pow: {
|
|
|
|
MVT RetVT;
|
|
|
|
if (!isTypeLegal(II->getType(), RetVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (RetVT != MVT::f32 && RetVT != MVT::f64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
static const RTLIB::Libcall LibCallTable[3][2] = {
|
|
|
|
{ RTLIB::SIN_F32, RTLIB::SIN_F64 },
|
|
|
|
{ RTLIB::COS_F32, RTLIB::COS_F64 },
|
|
|
|
{ RTLIB::POW_F32, RTLIB::POW_F64 }
|
|
|
|
};
|
|
|
|
RTLIB::Libcall LC;
|
|
|
|
bool Is64Bit = RetVT == MVT::f64;
|
|
|
|
switch (II->getIntrinsicID()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected intrinsic.");
|
|
|
|
case Intrinsic::sin:
|
|
|
|
LC = LibCallTable[0][Is64Bit];
|
|
|
|
break;
|
|
|
|
case Intrinsic::cos:
|
|
|
|
LC = LibCallTable[1][Is64Bit];
|
|
|
|
break;
|
|
|
|
case Intrinsic::pow:
|
|
|
|
LC = LibCallTable[2][Is64Bit];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ArgListTy Args;
|
|
|
|
Args.reserve(II->getNumArgOperands());
|
|
|
|
|
|
|
|
// Populate the argument list.
|
|
|
|
for (auto &Arg : II->arg_operands()) {
|
|
|
|
ArgListEntry Entry;
|
|
|
|
Entry.Val = Arg;
|
|
|
|
Entry.Ty = Arg->getType();
|
|
|
|
Args.push_back(Entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
CallLoweringInfo CLI;
|
2015-06-23 20:21:54 +08:00
|
|
|
MCContext &Ctx = MF->getContext();
|
|
|
|
CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
|
2014-09-16 06:33:06 +08:00
|
|
|
TLI.getLibcallName(LC), std::move(Args));
|
|
|
|
if (!lowerCallTo(CLI))
|
|
|
|
return false;
|
|
|
|
updateValueMap(II, CLI.ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
2014-11-12 07:10:44 +08:00
|
|
|
case Intrinsic::fabs: {
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(II->getType(), VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Opc;
|
|
|
|
switch (VT.SimpleTy) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case MVT::f32:
|
|
|
|
Opc = AArch64::FABSSr;
|
|
|
|
break;
|
|
|
|
case MVT::f64:
|
|
|
|
Opc = AArch64::FABSDr;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
unsigned SrcReg = getRegForValue(II->getOperand(0));
|
|
|
|
if (!SrcReg)
|
|
|
|
return false;
|
|
|
|
bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
|
|
|
|
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
|
|
|
|
.addReg(SrcReg, getKillRegState(SrcRegIsKill));
|
|
|
|
updateValueMap(II, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
2017-01-25 08:29:26 +08:00
|
|
|
case Intrinsic::trap:
|
2014-05-24 20:50:23 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
|
2014-03-29 18:18:08 +08:00
|
|
|
.addImm(1);
|
|
|
|
return true;
|
2017-01-25 08:29:26 +08:00
|
|
|
|
2014-07-31 14:25:33 +08:00
|
|
|
case Intrinsic::sqrt: {
|
|
|
|
Type *RetTy = II->getCalledFunction()->getReturnType();
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(RetTy, VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Op0Reg = getRegForValue(II->getOperand(0));
|
|
|
|
if (!Op0Reg)
|
|
|
|
return false;
|
|
|
|
bool Op0IsKill = hasTrivialKill(II->getOperand(0));
|
|
|
|
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
|
2014-07-31 14:25:33 +08:00
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(II, ResultReg);
|
2014-07-31 14:25:33 +08:00
|
|
|
return true;
|
|
|
|
}
|
2014-07-31 06:04:31 +08:00
|
|
|
case Intrinsic::sadd_with_overflow:
|
|
|
|
case Intrinsic::uadd_with_overflow:
|
|
|
|
case Intrinsic::ssub_with_overflow:
|
|
|
|
case Intrinsic::usub_with_overflow:
|
|
|
|
case Intrinsic::smul_with_overflow:
|
|
|
|
case Intrinsic::umul_with_overflow: {
|
|
|
|
// This implements the basic lowering of the xalu with overflow intrinsics.
|
|
|
|
const Function *Callee = II->getCalledFunction();
|
|
|
|
auto *Ty = cast<StructType>(Callee->getReturnType());
|
|
|
|
Type *RetTy = Ty->getTypeAtIndex(0U);
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(RetTy, VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (VT != MVT::i32 && VT != MVT::i64)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const Value *LHS = II->getArgOperand(0);
|
|
|
|
const Value *RHS = II->getArgOperand(1);
|
|
|
|
// Canonicalize immediate to the RHS.
|
|
|
|
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
|
|
|
|
isCommutativeIntrinsic(II))
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
|
2014-09-18 15:04:54 +08:00
|
|
|
// Simplify multiplies.
|
2015-05-21 01:16:39 +08:00
|
|
|
Intrinsic::ID IID = II->getIntrinsicID();
|
2014-09-18 15:04:54 +08:00
|
|
|
switch (IID) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case Intrinsic::smul_with_overflow:
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
|
|
|
if (C->getValue() == 2) {
|
|
|
|
IID = Intrinsic::sadd_with_overflow;
|
|
|
|
RHS = LHS;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Intrinsic::umul_with_overflow:
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
|
|
|
if (C->getValue() == 2) {
|
|
|
|
IID = Intrinsic::uadd_with_overflow;
|
|
|
|
RHS = LHS;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-08-20 06:29:55 +08:00
|
|
|
unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
|
2014-07-31 06:04:31 +08:00
|
|
|
AArch64CC::CondCode CC = AArch64CC::Invalid;
|
2014-09-18 15:04:54 +08:00
|
|
|
switch (IID) {
|
2014-07-31 06:04:31 +08:00
|
|
|
default: llvm_unreachable("Unexpected intrinsic!");
|
|
|
|
case Intrinsic::sadd_with_overflow:
|
2014-09-03 09:38:36 +08:00
|
|
|
ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
|
|
|
|
CC = AArch64CC::VS;
|
|
|
|
break;
|
2014-07-31 06:04:31 +08:00
|
|
|
case Intrinsic::uadd_with_overflow:
|
2014-09-03 09:38:36 +08:00
|
|
|
ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
|
|
|
|
CC = AArch64CC::HS;
|
|
|
|
break;
|
2014-07-31 06:04:31 +08:00
|
|
|
case Intrinsic::ssub_with_overflow:
|
2014-09-03 09:38:36 +08:00
|
|
|
ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
|
|
|
|
CC = AArch64CC::VS;
|
|
|
|
break;
|
2014-07-31 06:04:31 +08:00
|
|
|
case Intrinsic::usub_with_overflow:
|
2014-09-03 09:38:36 +08:00
|
|
|
ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
|
|
|
|
CC = AArch64CC::LO;
|
|
|
|
break;
|
2014-07-31 06:04:31 +08:00
|
|
|
case Intrinsic::smul_with_overflow: {
|
|
|
|
CC = AArch64CC::NE;
|
2014-08-20 06:29:55 +08:00
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
if (!LHSReg)
|
|
|
|
return false;
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
2014-08-01 09:25:55 +08:00
|
|
|
if (!RHSReg)
|
|
|
|
return false;
|
2014-08-20 06:29:55 +08:00
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
2014-08-01 09:25:55 +08:00
|
|
|
|
2014-07-31 06:04:31 +08:00
|
|
|
if (VT == MVT::i32) {
|
2014-09-16 07:20:17 +08:00
|
|
|
MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
|
|
|
|
/*IsKill=*/false, 32);
|
2014-09-04 04:56:59 +08:00
|
|
|
MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
|
2014-07-31 06:04:31 +08:00
|
|
|
AArch64::sub_32);
|
2014-09-04 04:56:59 +08:00
|
|
|
ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
|
2014-07-31 06:04:31 +08:00
|
|
|
AArch64::sub_32);
|
2014-08-20 06:29:55 +08:00
|
|
|
emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
|
|
|
|
AArch64_AM::ASR, 31, /*WantResult=*/false);
|
2014-07-31 06:04:31 +08:00
|
|
|
} else {
|
|
|
|
assert(VT == MVT::i64 && "Unexpected value type.");
|
2015-05-02 04:57:11 +08:00
|
|
|
// LHSReg and RHSReg cannot be killed by this Mul, since they are
|
|
|
|
// reused in the next instruction.
|
|
|
|
MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
|
|
|
|
/*IsKill=*/false);
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
|
2014-07-31 06:04:31 +08:00
|
|
|
RHSReg, RHSIsKill);
|
2014-08-20 06:29:55 +08:00
|
|
|
emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
|
|
|
|
AArch64_AM::ASR, 63, /*WantResult=*/false);
|
2014-07-31 06:04:31 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Intrinsic::umul_with_overflow: {
|
|
|
|
CC = AArch64CC::NE;
|
2014-08-20 06:29:55 +08:00
|
|
|
unsigned LHSReg = getRegForValue(LHS);
|
|
|
|
if (!LHSReg)
|
|
|
|
return false;
|
|
|
|
bool LHSIsKill = hasTrivialKill(LHS);
|
|
|
|
|
|
|
|
unsigned RHSReg = getRegForValue(RHS);
|
2014-08-01 09:25:55 +08:00
|
|
|
if (!RHSReg)
|
|
|
|
return false;
|
2014-08-20 06:29:55 +08:00
|
|
|
bool RHSIsKill = hasTrivialKill(RHS);
|
2014-08-01 09:25:55 +08:00
|
|
|
|
2014-07-31 06:04:31 +08:00
|
|
|
if (VT == MVT::i32) {
|
2014-09-16 07:20:17 +08:00
|
|
|
MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
|
2014-08-20 06:29:55 +08:00
|
|
|
emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
|
|
|
|
/*IsKill=*/false, AArch64_AM::LSR, 32,
|
|
|
|
/*WantResult=*/false);
|
2014-09-04 04:56:59 +08:00
|
|
|
MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
|
2014-07-31 06:04:31 +08:00
|
|
|
AArch64::sub_32);
|
|
|
|
} else {
|
|
|
|
assert(VT == MVT::i64 && "Unexpected value type.");
|
2015-05-02 04:57:11 +08:00
|
|
|
// LHSReg and RHSReg cannot be killed by this Mul, since they are
|
|
|
|
// reused in the next instruction.
|
|
|
|
MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
|
|
|
|
/*IsKill=*/false);
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
|
2014-07-31 06:04:31 +08:00
|
|
|
RHSReg, RHSIsKill);
|
2014-08-20 06:29:55 +08:00
|
|
|
emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
|
|
|
|
/*IsKill=*/false, /*WantResult=*/false);
|
2014-07-31 06:04:31 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-20 06:29:55 +08:00
|
|
|
if (MulReg) {
|
|
|
|
ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
|
2014-07-31 06:04:31 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2014-08-20 06:29:55 +08:00
|
|
|
TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
|
|
|
|
}
|
2014-07-31 06:04:31 +08:00
|
|
|
|
2018-09-21 23:47:41 +08:00
|
|
|
if (!ResultReg1)
|
|
|
|
return false;
|
|
|
|
|
2014-09-04 04:56:59 +08:00
|
|
|
ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
|
2014-08-22 04:57:57 +08:00
|
|
|
AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
|
|
|
|
/*IsKill=*/true, getInvertedCondCode(CC));
|
2014-10-04 11:50:10 +08:00
|
|
|
(void)ResultReg2;
|
2014-08-20 06:29:55 +08:00
|
|
|
assert((ResultReg1 + 1) == ResultReg2 &&
|
|
|
|
"Nonconsecutive result registers.");
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(II, ResultReg1, 2);
|
2014-07-31 06:04:31 +08:00
|
|
|
return true;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectRet(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
const ReturnInst *Ret = cast<ReturnInst>(I);
|
|
|
|
const Function &F = *I->getParent()->getParent();
|
|
|
|
|
|
|
|
if (!FuncInfo.CanLowerReturn)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (F.isVarArg())
|
|
|
|
return false;
|
|
|
|
|
2016-04-12 05:08:06 +08:00
|
|
|
if (TLI.supportSwiftError() &&
|
|
|
|
F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
|
|
|
|
return false;
|
|
|
|
|
2015-12-17 05:04:19 +08:00
|
|
|
if (TLI.supportSplitCSR(FuncInfo.MF))
|
|
|
|
return false;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Build a list of return value registers.
|
|
|
|
SmallVector<unsigned, 4> RetRegs;
|
|
|
|
|
|
|
|
if (Ret->getNumOperands() > 0) {
|
|
|
|
CallingConv::ID CC = F.getCallingConv();
|
|
|
|
SmallVector<ISD::OutputArg, 4> Outs;
|
2018-07-28 21:25:19 +08:00
|
|
|
GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
// Analyze operands of the call, assigning locations to each operand.
|
|
|
|
SmallVector<CCValAssign, 16> ValLocs;
|
2014-08-07 02:45:26 +08:00
|
|
|
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
|
2014-05-24 20:50:23 +08:00
|
|
|
CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
|
|
|
|
: RetCC_AArch64_AAPCS;
|
2014-03-29 18:18:08 +08:00
|
|
|
CCInfo.AnalyzeReturn(Outs, RetCC);
|
|
|
|
|
|
|
|
// Only handle a single return value for now.
|
|
|
|
if (ValLocs.size() != 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
CCValAssign &VA = ValLocs[0];
|
|
|
|
const Value *RV = Ret->getOperand(0);
|
|
|
|
|
|
|
|
// Don't bother handling odd stuff for now.
|
2014-09-16 07:40:10 +08:00
|
|
|
if ((VA.getLocInfo() != CCValAssign::Full) &&
|
|
|
|
(VA.getLocInfo() != CCValAssign::BCvt))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2014-09-16 07:40:10 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Only handle register returns for now.
|
|
|
|
if (!VA.isRegLoc())
|
|
|
|
return false;
|
2014-09-16 07:40:10 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned Reg = getRegForValue(RV);
|
|
|
|
if (Reg == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned SrcReg = Reg + VA.getValNo();
|
|
|
|
unsigned DestReg = VA.getLocReg();
|
|
|
|
// Avoid a cross-class copy. This is very unlikely.
|
|
|
|
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
|
|
|
|
return false;
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT RVEVT = TLI.getValueType(DL, RV->getType());
|
2014-03-29 18:18:08 +08:00
|
|
|
if (!RVEVT.isSimple())
|
|
|
|
return false;
|
2014-05-07 20:33:55 +08:00
|
|
|
|
|
|
|
// Vectors (of > 1 lane) in big endian need tricky handling.
|
2014-09-16 07:40:10 +08:00
|
|
|
if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
|
|
|
|
!Subtarget->isLittleEndian())
|
2014-05-07 20:33:55 +08:00
|
|
|
return false;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
MVT RVVT = RVEVT.getSimpleVT();
|
2014-04-30 23:29:57 +08:00
|
|
|
if (RVVT == MVT::f128)
|
|
|
|
return false;
|
2014-09-16 07:40:10 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
MVT DestVT = VA.getValVT();
|
|
|
|
// Special handling for extended integers.
|
|
|
|
if (RVVT != DestVT) {
|
|
|
|
if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
|
|
|
|
return false;
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool IsZExt = Outs[0].Flags.isZExt();
|
|
|
|
SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
|
2014-03-29 18:18:08 +08:00
|
|
|
if (SrcReg == 0)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make the copy.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
|
|
|
|
|
|
|
|
// Add register to return instruction.
|
|
|
|
RetRegs.push_back(VA.getLocReg());
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2014-05-24 20:50:23 +08:00
|
|
|
TII.get(AArch64::RET_ReallyLR));
|
2015-08-04 03:04:32 +08:00
|
|
|
for (unsigned RetReg : RetRegs)
|
|
|
|
MIB.addReg(RetReg, RegState::Implicit);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectTrunc(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
Type *DestTy = I->getType();
|
|
|
|
Value *Op = I->getOperand(0);
|
|
|
|
Type *SrcTy = Op->getType();
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
|
|
|
|
EVT DestEVT = TLI.getValueType(DL, DestTy, true);
|
2014-03-29 18:18:08 +08:00
|
|
|
if (!SrcEVT.isSimple())
|
|
|
|
return false;
|
|
|
|
if (!DestEVT.isSimple())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MVT SrcVT = SrcEVT.getSimpleVT();
|
|
|
|
MVT DestVT = DestEVT.getSimpleVT();
|
|
|
|
|
|
|
|
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
|
|
|
|
SrcVT != MVT::i8)
|
|
|
|
return false;
|
|
|
|
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
|
|
|
|
DestVT != MVT::i1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned SrcReg = getRegForValue(Op);
|
|
|
|
if (!SrcReg)
|
|
|
|
return false;
|
2014-08-22 02:02:25 +08:00
|
|
|
bool SrcIsKill = hasTrivialKill(Op);
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2015-08-07 06:13:48 +08:00
|
|
|
// If we're truncating from i64 to a smaller non-legal type then generate an
|
|
|
|
// AND. Otherwise, we know the high bits are undefined and a truncate only
|
|
|
|
// generate a COPY. We cannot mark the source register also as result
|
|
|
|
// register, because this can incorrectly transfer the kill flag onto the
|
|
|
|
// source register.
|
|
|
|
unsigned ResultReg;
|
2014-03-29 18:18:08 +08:00
|
|
|
if (SrcVT == MVT::i64) {
|
2015-08-07 06:13:48 +08:00
|
|
|
uint64_t Mask = 0;
|
|
|
|
switch (DestVT.SimpleTy) {
|
|
|
|
default:
|
|
|
|
// Trunc i64 to i32 is handled by the target-independent fast-isel.
|
|
|
|
return false;
|
|
|
|
case MVT::i1:
|
|
|
|
Mask = 0x1;
|
|
|
|
break;
|
|
|
|
case MVT::i8:
|
|
|
|
Mask = 0xff;
|
|
|
|
break;
|
|
|
|
case MVT::i16:
|
|
|
|
Mask = 0xffff;
|
|
|
|
break;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
// Issue an extract_subreg to get the lower 32-bits.
|
2015-08-07 06:13:48 +08:00
|
|
|
unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
|
|
|
|
AArch64::sub_32);
|
|
|
|
// Create the AND instruction which performs the actual truncation.
|
|
|
|
ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
|
|
|
|
assert(ResultReg && "Unexpected AND instruction emission failure.");
|
|
|
|
} else {
|
|
|
|
ResultReg = createResultReg(&AArch64::GPR32RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(SrcReg, getKillRegState(SrcIsKill));
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
|
2014-03-29 18:18:08 +08:00
|
|
|
assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
|
|
|
|
DestVT == MVT::i64) &&
|
|
|
|
"Unexpected value type.");
|
|
|
|
// Handle i8 and i16 as i32.
|
|
|
|
if (DestVT == MVT::i8 || DestVT == MVT::i16)
|
|
|
|
DestVT = MVT::i32;
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
if (IsZExt) {
|
2014-09-04 09:29:18 +08:00
|
|
|
unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
|
2014-08-22 02:02:25 +08:00
|
|
|
assert(ResultReg && "Unexpected AND instruction emission failure.");
|
2014-03-29 18:18:08 +08:00
|
|
|
if (DestVT == MVT::i64) {
|
|
|
|
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
|
|
|
|
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
2014-03-29 18:18:08 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2014-05-24 20:50:23 +08:00
|
|
|
TII.get(AArch64::SUBREG_TO_REG), Reg64)
|
2014-03-29 18:18:08 +08:00
|
|
|
.addImm(0)
|
|
|
|
.addReg(ResultReg)
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(AArch64::sub_32);
|
2014-03-29 18:18:08 +08:00
|
|
|
ResultReg = Reg64;
|
|
|
|
}
|
|
|
|
return ResultReg;
|
|
|
|
} else {
|
|
|
|
if (DestVT == MVT::i64) {
|
|
|
|
// FIXME: We're SExt i1 to i64.
|
|
|
|
return 0;
|
|
|
|
}
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
|
2014-08-22 04:57:57 +08:00
|
|
|
/*TODO:IsKill=*/false, 0, 0);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
|
2014-07-31 06:04:25 +08:00
|
|
|
unsigned Op1, bool Op1IsKill) {
|
|
|
|
unsigned Opc, ZReg;
|
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default: return 0;
|
|
|
|
case MVT::i8:
|
|
|
|
case MVT::i16:
|
|
|
|
case MVT::i32:
|
|
|
|
RetVT = MVT::i32;
|
|
|
|
Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
|
|
|
|
case MVT::i64:
|
|
|
|
Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
|
|
|
|
}
|
|
|
|
|
2014-08-22 04:57:57 +08:00
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
|
2014-08-22 04:57:57 +08:00
|
|
|
/*IsKill=*/ZReg, true);
|
2014-07-31 06:04:25 +08:00
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
|
2014-07-31 06:04:25 +08:00
|
|
|
unsigned Op1, bool Op1IsKill) {
|
|
|
|
if (RetVT != MVT::i64)
|
|
|
|
return 0;
|
|
|
|
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
|
2014-08-22 04:57:57 +08:00
|
|
|
Op0, Op0IsKill, Op1, Op1IsKill,
|
|
|
|
AArch64::XZR, /*IsKill=*/true);
|
2014-07-31 06:04:25 +08:00
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
|
2014-07-31 06:04:25 +08:00
|
|
|
unsigned Op1, bool Op1IsKill) {
|
|
|
|
if (RetVT != MVT::i64)
|
|
|
|
return 0;
|
|
|
|
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
|
2014-08-22 04:57:57 +08:00
|
|
|
Op0, Op0IsKill, Op1, Op1IsKill,
|
|
|
|
AArch64::XZR, /*IsKill=*/true);
|
2014-07-31 06:04:25 +08:00
|
|
|
}
|
|
|
|
|
2014-08-22 07:06:07 +08:00
|
|
|
unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
unsigned Op1Reg, bool Op1IsKill) {
|
|
|
|
unsigned Opc = 0;
|
|
|
|
bool NeedTrunc = false;
|
|
|
|
uint64_t Mask = 0;
|
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default: return 0;
|
|
|
|
case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
|
|
|
|
case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
|
|
|
|
case MVT::i32: Opc = AArch64::LSLVWr; break;
|
|
|
|
case MVT::i64: Opc = AArch64::LSLVXr; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
if (NeedTrunc) {
|
2014-09-04 09:29:18 +08:00
|
|
|
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
|
2014-08-22 07:06:07 +08:00
|
|
|
Op1IsKill = true;
|
|
|
|
}
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
|
2014-08-22 07:06:07 +08:00
|
|
|
Op1IsKill);
|
|
|
|
if (NeedTrunc)
|
2014-09-04 09:29:18 +08:00
|
|
|
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
|
2014-08-22 07:06:07 +08:00
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
|
|
|
|
bool Op0IsKill, uint64_t Shift,
|
2014-11-19 05:20:17 +08:00
|
|
|
bool IsZExt) {
|
2014-08-27 08:58:26 +08:00
|
|
|
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
|
|
|
|
"Unexpected source/return type pair.");
|
2014-09-23 05:08:53 +08:00
|
|
|
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
|
|
|
|
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
|
|
|
|
"Unexpected source value type.");
|
2014-08-27 08:58:26 +08:00
|
|
|
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
|
|
|
|
RetVT == MVT::i64) && "Unexpected return value type.");
|
|
|
|
|
|
|
|
bool Is64Bit = (RetVT == MVT::i64);
|
|
|
|
unsigned RegSize = Is64Bit ? 64 : 32;
|
|
|
|
unsigned DstBits = RetVT.getSizeInBits();
|
|
|
|
unsigned SrcBits = SrcVT.getSizeInBits();
|
2014-11-19 03:58:59 +08:00
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
|
|
|
|
// Just emit a copy for "zero" shifts.
|
|
|
|
if (Shift == 0) {
|
2014-11-19 05:20:17 +08:00
|
|
|
if (RetVT == SrcVT) {
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(Op0, getKillRegState(Op0IsKill));
|
|
|
|
return ResultReg;
|
|
|
|
} else
|
|
|
|
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
|
2014-11-19 03:58:59 +08:00
|
|
|
}
|
2014-07-31 06:04:22 +08:00
|
|
|
|
2014-08-27 08:58:26 +08:00
|
|
|
// Don't deal with undefined shifts.
|
|
|
|
if (Shift >= DstBits)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// For immediate shifts we can fold the zero-/sign-extension into the shift.
|
|
|
|
// {S|U}BFM Wd, Wn, #r, #s
|
|
|
|
// Wd<32+s-r,32-r> = Wn<s:0> when r > s
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = shl i16 %1, 4
|
|
|
|
// Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
|
|
|
|
// 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
|
|
|
|
// 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = shl i16 %1, 8
|
|
|
|
// Wd<32+7-24,32-24> = Wn<7:0>
|
|
|
|
// 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
|
|
|
|
// 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
|
|
|
|
// 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = shl i16 %1, 12
|
|
|
|
// Wd<32+3-20,32-20> = Wn<3:0>
|
|
|
|
// 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
|
|
|
|
// 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
|
|
|
|
// 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
|
|
|
|
|
|
|
|
unsigned ImmR = RegSize - Shift;
|
|
|
|
// Limit the width to the length of the source type.
|
|
|
|
unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
|
|
|
|
static const unsigned OpcTable[2][2] = {
|
|
|
|
{AArch64::SBFMWri, AArch64::SBFMXri},
|
|
|
|
{AArch64::UBFMWri, AArch64::UBFMXri}
|
|
|
|
};
|
2014-11-19 05:20:17 +08:00
|
|
|
unsigned Opc = OpcTable[IsZExt][Is64Bit];
|
2014-08-27 08:58:26 +08:00
|
|
|
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
|
|
|
|
unsigned TmpReg = MRI.createVirtualRegister(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addReg(Op0, getKillRegState(Op0IsKill))
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
Op0 = TmpReg;
|
|
|
|
Op0IsKill = true;
|
|
|
|
}
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
|
2014-07-31 06:04:22 +08:00
|
|
|
}
|
|
|
|
|
2014-08-22 07:06:07 +08:00
|
|
|
unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
unsigned Op1Reg, bool Op1IsKill) {
|
|
|
|
unsigned Opc = 0;
|
|
|
|
bool NeedTrunc = false;
|
|
|
|
uint64_t Mask = 0;
|
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default: return 0;
|
|
|
|
case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
|
|
|
|
case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
|
|
|
|
case MVT::i32: Opc = AArch64::LSRVWr; break;
|
|
|
|
case MVT::i64: Opc = AArch64::LSRVXr; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
if (NeedTrunc) {
|
2014-09-04 09:29:18 +08:00
|
|
|
Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
|
|
|
|
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
|
2014-08-22 07:06:07 +08:00
|
|
|
Op0IsKill = Op1IsKill = true;
|
|
|
|
}
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
|
2014-08-22 07:06:07 +08:00
|
|
|
Op1IsKill);
|
|
|
|
if (NeedTrunc)
|
2014-09-04 09:29:18 +08:00
|
|
|
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
|
2014-08-22 07:06:07 +08:00
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
|
|
|
|
bool Op0IsKill, uint64_t Shift,
|
|
|
|
bool IsZExt) {
|
|
|
|
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
|
|
|
|
"Unexpected source/return type pair.");
|
2014-11-19 06:38:42 +08:00
|
|
|
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
|
|
|
|
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
|
|
|
|
"Unexpected source value type.");
|
2014-08-27 08:58:26 +08:00
|
|
|
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
|
|
|
|
RetVT == MVT::i64) && "Unexpected return value type.");
|
|
|
|
|
|
|
|
bool Is64Bit = (RetVT == MVT::i64);
|
|
|
|
unsigned RegSize = Is64Bit ? 64 : 32;
|
|
|
|
unsigned DstBits = RetVT.getSizeInBits();
|
|
|
|
unsigned SrcBits = SrcVT.getSizeInBits();
|
2014-11-19 03:58:59 +08:00
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
|
|
|
|
// Just emit a copy for "zero" shifts.
|
|
|
|
if (Shift == 0) {
|
2014-11-19 05:20:17 +08:00
|
|
|
if (RetVT == SrcVT) {
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(Op0, getKillRegState(Op0IsKill));
|
|
|
|
return ResultReg;
|
|
|
|
} else
|
|
|
|
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
|
2014-11-19 03:58:59 +08:00
|
|
|
}
|
2014-08-27 08:58:26 +08:00
|
|
|
|
|
|
|
// Don't deal with undefined shifts.
|
|
|
|
if (Shift >= DstBits)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// For immediate shifts we can fold the zero-/sign-extension into the shift.
|
|
|
|
// {S|U}BFM Wd, Wn, #r, #s
|
|
|
|
// Wd<s-r:0> = Wn<s:r> when r <= s
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = lshr i16 %1, 4
|
|
|
|
// Wd<7-4:0> = Wn<7:4>
|
|
|
|
// 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = lshr i16 %1, 8
|
|
|
|
// Wd<7-7,0> = Wn<7:7>
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = lshr i16 %1, 12
|
|
|
|
// Wd<7-7,0> = Wn<7:7> <- clamp r to 7
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
|
|
|
|
|
|
|
|
if (Shift >= SrcBits && IsZExt)
|
2014-09-16 07:20:17 +08:00
|
|
|
return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
|
2014-08-27 08:58:26 +08:00
|
|
|
|
|
|
|
// It is not possible to fold a sign-extend into the LShr instruction. In this
|
|
|
|
// case emit a sign-extend.
|
|
|
|
if (!IsZExt) {
|
2014-09-16 07:20:17 +08:00
|
|
|
Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
|
2014-08-27 08:58:26 +08:00
|
|
|
if (!Op0)
|
|
|
|
return 0;
|
|
|
|
Op0IsKill = true;
|
|
|
|
SrcVT = RetVT;
|
|
|
|
SrcBits = SrcVT.getSizeInBits();
|
|
|
|
IsZExt = true;
|
2014-07-31 06:04:22 +08:00
|
|
|
}
|
|
|
|
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
|
|
|
|
unsigned ImmS = SrcBits - 1;
|
|
|
|
static const unsigned OpcTable[2][2] = {
|
|
|
|
{AArch64::SBFMWri, AArch64::SBFMXri},
|
|
|
|
{AArch64::UBFMWri, AArch64::UBFMXri}
|
|
|
|
};
|
|
|
|
unsigned Opc = OpcTable[IsZExt][Is64Bit];
|
|
|
|
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
|
|
|
|
unsigned TmpReg = MRI.createVirtualRegister(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addReg(Op0, getKillRegState(Op0IsKill))
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
Op0 = TmpReg;
|
|
|
|
Op0IsKill = true;
|
|
|
|
}
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
|
2014-07-31 06:04:22 +08:00
|
|
|
}
|
|
|
|
|
2014-08-22 07:06:07 +08:00
|
|
|
unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
|
|
|
|
unsigned Op1Reg, bool Op1IsKill) {
|
|
|
|
unsigned Opc = 0;
|
|
|
|
bool NeedTrunc = false;
|
|
|
|
uint64_t Mask = 0;
|
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default: return 0;
|
|
|
|
case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
|
|
|
|
case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
|
|
|
|
case MVT::i32: Opc = AArch64::ASRVWr; break;
|
|
|
|
case MVT::i64: Opc = AArch64::ASRVXr; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
if (NeedTrunc) {
|
2014-09-16 07:20:17 +08:00
|
|
|
Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
|
2014-09-04 09:29:18 +08:00
|
|
|
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
|
2014-08-22 07:06:07 +08:00
|
|
|
Op0IsKill = Op1IsKill = true;
|
|
|
|
}
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
|
2014-08-22 07:06:07 +08:00
|
|
|
Op1IsKill);
|
|
|
|
if (NeedTrunc)
|
2014-09-04 09:29:18 +08:00
|
|
|
ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
|
2014-08-22 07:06:07 +08:00
|
|
|
return ResultReg;
|
|
|
|
}
|
|
|
|
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
|
|
|
|
bool Op0IsKill, uint64_t Shift,
|
|
|
|
bool IsZExt) {
|
|
|
|
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
|
|
|
|
"Unexpected source/return type pair.");
|
2014-11-19 06:41:49 +08:00
|
|
|
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
|
|
|
|
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
|
|
|
|
"Unexpected source value type.");
|
2014-08-27 08:58:26 +08:00
|
|
|
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
|
|
|
|
RetVT == MVT::i64) && "Unexpected return value type.");
|
|
|
|
|
|
|
|
bool Is64Bit = (RetVT == MVT::i64);
|
|
|
|
unsigned RegSize = Is64Bit ? 64 : 32;
|
|
|
|
unsigned DstBits = RetVT.getSizeInBits();
|
|
|
|
unsigned SrcBits = SrcVT.getSizeInBits();
|
2014-11-19 03:58:59 +08:00
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
|
|
|
|
|
|
|
// Just emit a copy for "zero" shifts.
|
|
|
|
if (Shift == 0) {
|
2014-11-19 05:20:17 +08:00
|
|
|
if (RetVT == SrcVT) {
|
|
|
|
unsigned ResultReg = createResultReg(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(TargetOpcode::COPY), ResultReg)
|
|
|
|
.addReg(Op0, getKillRegState(Op0IsKill));
|
|
|
|
return ResultReg;
|
|
|
|
} else
|
|
|
|
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
|
2014-11-19 03:58:59 +08:00
|
|
|
}
|
2014-07-31 06:04:22 +08:00
|
|
|
|
2014-08-27 08:58:26 +08:00
|
|
|
// Don't deal with undefined shifts.
|
|
|
|
if (Shift >= DstBits)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// For immediate shifts we can fold the zero-/sign-extension into the shift.
|
|
|
|
// {S|U}BFM Wd, Wn, #r, #s
|
|
|
|
// Wd<s-r:0> = Wn<s:r> when r <= s
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = ashr i16 %1, 4
|
|
|
|
// Wd<7-4:0> = Wn<7:4>
|
|
|
|
// 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = ashr i16 %1, 8
|
|
|
|
// Wd<7-7,0> = Wn<7:7>
|
|
|
|
// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
|
|
|
|
|
|
|
|
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
|
|
|
|
// %2 = ashr i16 %1, 12
|
|
|
|
// Wd<7-7,0> = Wn<7:7> <- clamp r to 7
|
|
|
|
// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
|
|
|
|
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
|
|
|
|
|
|
|
|
if (Shift >= SrcBits && IsZExt)
|
2014-09-16 07:20:17 +08:00
|
|
|
return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
|
2014-08-27 08:58:26 +08:00
|
|
|
|
|
|
|
unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
|
|
|
|
unsigned ImmS = SrcBits - 1;
|
|
|
|
static const unsigned OpcTable[2][2] = {
|
|
|
|
{AArch64::SBFMWri, AArch64::SBFMXri},
|
|
|
|
{AArch64::UBFMWri, AArch64::UBFMXri}
|
|
|
|
};
|
|
|
|
unsigned Opc = OpcTable[IsZExt][Is64Bit];
|
|
|
|
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
|
|
|
|
unsigned TmpReg = MRI.createVirtualRegister(RC);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addReg(Op0, getKillRegState(Op0IsKill))
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
Op0 = TmpReg;
|
|
|
|
Op0IsKill = true;
|
|
|
|
}
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
|
2014-07-31 06:04:22 +08:00
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
|
|
|
|
bool IsZExt) {
|
2014-03-29 18:18:08 +08:00
|
|
|
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
|
2014-07-08 05:37:51 +08:00
|
|
|
|
2014-07-10 01:54:32 +08:00
|
|
|
// FastISel does not have plumbing to deal with extensions where the SrcVT or
|
|
|
|
// DestVT are odd things, so test to make sure that they are both types we can
|
|
|
|
// handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
|
|
|
|
// bail out to SelectionDAG.
|
|
|
|
if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
|
|
|
|
(DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
|
|
|
|
((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
|
|
|
|
(SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
|
2014-07-08 05:37:51 +08:00
|
|
|
return 0;
|
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned Opc;
|
|
|
|
unsigned Imm = 0;
|
|
|
|
|
|
|
|
switch (SrcVT.SimpleTy) {
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
case MVT::i1:
|
2014-09-16 07:20:17 +08:00
|
|
|
return emiti1Ext(SrcReg, DestVT, IsZExt);
|
2014-03-29 18:18:08 +08:00
|
|
|
case MVT::i8:
|
|
|
|
if (DestVT == MVT::i64)
|
2014-09-16 07:20:17 +08:00
|
|
|
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-09-16 07:20:17 +08:00
|
|
|
Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
|
2014-03-29 18:18:08 +08:00
|
|
|
Imm = 7;
|
|
|
|
break;
|
|
|
|
case MVT::i16:
|
|
|
|
if (DestVT == MVT::i64)
|
2014-09-16 07:20:17 +08:00
|
|
|
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
|
2014-03-29 18:18:08 +08:00
|
|
|
else
|
2014-09-16 07:20:17 +08:00
|
|
|
Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
|
2014-03-29 18:18:08 +08:00
|
|
|
Imm = 15;
|
|
|
|
break;
|
|
|
|
case MVT::i32:
|
|
|
|
assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
|
2014-09-16 07:20:17 +08:00
|
|
|
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
|
2014-03-29 18:18:08 +08:00
|
|
|
Imm = 31;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle i8 and i16 as i32.
|
|
|
|
if (DestVT == MVT::i8 || DestVT == MVT::i16)
|
|
|
|
DestVT = MVT::i32;
|
2014-04-30 17:32:01 +08:00
|
|
|
else if (DestVT == MVT::i64) {
|
2014-05-24 20:50:23 +08:00
|
|
|
unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
2014-04-30 17:32:01 +08:00
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
2014-05-24 20:50:23 +08:00
|
|
|
TII.get(AArch64::SUBREG_TO_REG), Src64)
|
2014-04-30 17:32:01 +08:00
|
|
|
.addImm(0)
|
|
|
|
.addReg(SrcReg)
|
2014-05-24 20:50:23 +08:00
|
|
|
.addImm(AArch64::sub_32);
|
2014-04-30 17:32:01 +08:00
|
|
|
SrcReg = Src64;
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-08-22 04:57:57 +08:00
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
2014-09-04 04:56:59 +08:00
|
|
|
return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
static bool isZExtLoad(const MachineInstr *LI) {
|
|
|
|
switch (LI->getOpcode()) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case AArch64::LDURBBi:
|
|
|
|
case AArch64::LDURHHi:
|
|
|
|
case AArch64::LDURWi:
|
|
|
|
case AArch64::LDRBBui:
|
|
|
|
case AArch64::LDRHHui:
|
|
|
|
case AArch64::LDRWui:
|
|
|
|
case AArch64::LDRBBroX:
|
|
|
|
case AArch64::LDRHHroX:
|
|
|
|
case AArch64::LDRWroX:
|
|
|
|
case AArch64::LDRBBroW:
|
|
|
|
case AArch64::LDRHHroW:
|
|
|
|
case AArch64::LDRWroW:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isSExtLoad(const MachineInstr *LI) {
|
|
|
|
switch (LI->getOpcode()) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case AArch64::LDURSBWi:
|
|
|
|
case AArch64::LDURSHWi:
|
|
|
|
case AArch64::LDURSBXi:
|
|
|
|
case AArch64::LDURSHXi:
|
|
|
|
case AArch64::LDURSWi:
|
|
|
|
case AArch64::LDRSBWui:
|
|
|
|
case AArch64::LDRSHWui:
|
|
|
|
case AArch64::LDRSBXui:
|
|
|
|
case AArch64::LDRSHXui:
|
|
|
|
case AArch64::LDRSWui:
|
|
|
|
case AArch64::LDRSBWroX:
|
|
|
|
case AArch64::LDRSHWroX:
|
|
|
|
case AArch64::LDRSBXroX:
|
|
|
|
case AArch64::LDRSHXroX:
|
|
|
|
case AArch64::LDRSWroX:
|
|
|
|
case AArch64::LDRSBWroW:
|
|
|
|
case AArch64::LDRSHWroW:
|
|
|
|
case AArch64::LDRSBXroW:
|
|
|
|
case AArch64::LDRSHXroW:
|
|
|
|
case AArch64::LDRSWroW:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
|
|
|
|
MVT SrcVT) {
|
|
|
|
const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
|
|
|
|
if (!LI || !LI->hasOneUse())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check if the load instruction has already been selected.
|
|
|
|
unsigned Reg = lookUpRegForValue(LI);
|
|
|
|
if (!Reg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
|
|
|
|
if (!MI)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Check if the correct load instruction has been emitted - SelectionDAG might
|
|
|
|
// have emitted a zero-extending load, but we need a sign-extending load.
|
|
|
|
bool IsZExt = isa<ZExtInst>(I);
|
|
|
|
const auto *LoadMI = MI;
|
|
|
|
if (LoadMI->getOpcode() == TargetOpcode::COPY &&
|
|
|
|
LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
|
|
|
|
unsigned LoadReg = MI->getOperand(1).getReg();
|
|
|
|
LoadMI = MRI.getUniqueVRegDef(LoadReg);
|
|
|
|
assert(LoadMI && "Expected valid instruction");
|
|
|
|
}
|
|
|
|
if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Nothing to be done.
|
|
|
|
if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
|
|
|
|
updateValueMap(I, Reg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IsZExt) {
|
|
|
|
unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG), Reg64)
|
|
|
|
.addImm(0)
|
|
|
|
.addReg(Reg, getKillRegState(true))
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
Reg = Reg64;
|
|
|
|
} else {
|
|
|
|
assert((MI->getOpcode() == TargetOpcode::COPY &&
|
|
|
|
MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
|
|
|
|
"Expected copy instruction");
|
|
|
|
Reg = MI->getOperand(1).getReg();
|
2018-12-18 01:25:53 +08:00
|
|
|
MachineBasicBlock::iterator I(MI);
|
|
|
|
removeDeadCode(I, std::next(I));
|
2014-10-15 04:36:02 +08:00
|
|
|
}
|
|
|
|
updateValueMap(I, Reg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectIntExt(const Instruction *I) {
|
2014-09-30 08:49:58 +08:00
|
|
|
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
|
|
|
|
"Unexpected integer extend instruction.");
|
|
|
|
MVT RetVT;
|
|
|
|
MVT SrcVT;
|
|
|
|
if (!isTypeSupported(I->getType(), RetVT))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
// Try to optimize already sign-/zero-extended values from load instructions.
|
|
|
|
if (optimizeIntExtLoad(I, RetVT, SrcVT))
|
|
|
|
return true;
|
|
|
|
|
2014-09-30 08:49:58 +08:00
|
|
|
unsigned SrcReg = getRegForValue(I->getOperand(0));
|
|
|
|
if (!SrcReg)
|
|
|
|
return false;
|
2014-10-07 11:39:59 +08:00
|
|
|
bool SrcIsKill = hasTrivialKill(I->getOperand(0));
|
2014-09-30 08:49:58 +08:00
|
|
|
|
2014-10-15 04:36:02 +08:00
|
|
|
// Try to optimize already sign-/zero-extended values from function arguments.
|
2014-10-07 11:39:59 +08:00
|
|
|
bool IsZExt = isa<ZExtInst>(I);
|
|
|
|
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
|
|
|
|
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
|
|
|
|
if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
|
|
|
|
unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
|
|
|
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
|
|
|
|
.addImm(0)
|
|
|
|
.addReg(SrcReg, getKillRegState(SrcIsKill))
|
|
|
|
.addImm(AArch64::sub_32);
|
|
|
|
SrcReg = ResultReg;
|
|
|
|
}
|
2014-11-11 05:05:31 +08:00
|
|
|
// Conservatively clear all kill flags from all uses, because we are
|
|
|
|
// replacing a sign-/zero-extend instruction at IR level with a nop at MI
|
|
|
|
// level. The result of the instruction at IR level might have been
|
|
|
|
// trivially dead, which is now not longer true.
|
|
|
|
unsigned UseReg = lookUpRegForValue(I);
|
|
|
|
if (UseReg)
|
|
|
|
MRI.clearKillFlags(UseReg);
|
|
|
|
|
2014-10-07 11:39:59 +08:00
|
|
|
updateValueMap(I, SrcReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2014-08-05 13:43:44 +08:00
|
|
|
|
2014-10-07 11:39:59 +08:00
|
|
|
unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
|
2014-08-05 13:43:44 +08:00
|
|
|
if (!ResultReg)
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
2014-08-05 13:43:44 +08:00
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
|
2014-03-29 18:18:08 +08:00
|
|
|
if (!DestEVT.isSimple())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MVT DestVT = DestEVT.getSimpleVT();
|
|
|
|
if (DestVT != MVT::i64 && DestVT != MVT::i32)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned DivOpc;
|
2014-09-16 07:20:17 +08:00
|
|
|
bool Is64bit = (DestVT == MVT::i64);
|
2014-03-29 18:18:08 +08:00
|
|
|
switch (ISDOpcode) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case ISD::SREM:
|
2014-09-16 07:20:17 +08:00
|
|
|
DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
case ISD::UREM:
|
2014-09-16 07:20:17 +08:00
|
|
|
DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
|
2014-03-29 18:18:08 +08:00
|
|
|
break;
|
|
|
|
}
|
2014-09-16 07:20:17 +08:00
|
|
|
unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
|
2014-03-29 18:18:08 +08:00
|
|
|
unsigned Src0Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (!Src0Reg)
|
|
|
|
return false;
|
2014-08-22 04:57:57 +08:00
|
|
|
bool Src0IsKill = hasTrivialKill(I->getOperand(0));
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
unsigned Src1Reg = getRegForValue(I->getOperand(1));
|
|
|
|
if (!Src1Reg)
|
|
|
|
return false;
|
2014-08-22 04:57:57 +08:00
|
|
|
bool Src1IsKill = hasTrivialKill(I->getOperand(1));
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-08-22 04:57:57 +08:00
|
|
|
const TargetRegisterClass *RC =
|
|
|
|
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
|
2014-08-22 04:57:57 +08:00
|
|
|
Src1Reg, /*IsKill=*/false);
|
|
|
|
assert(QuotReg && "Unexpected DIV instruction emission failure.");
|
2014-04-17 01:09:20 +08:00
|
|
|
// The remainder is computed as numerator - (quotient * denominator) using the
|
2014-03-29 18:18:08 +08:00
|
|
|
// MSUB instruction.
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
|
2014-08-22 04:57:57 +08:00
|
|
|
Src1Reg, Src1IsKill, Src0Reg,
|
|
|
|
Src0IsKill);
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectMul(const Instruction *I) {
|
2014-09-18 04:35:41 +08:00
|
|
|
MVT VT;
|
|
|
|
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
|
2014-03-29 18:18:08 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-18 04:35:41 +08:00
|
|
|
if (VT.isVector())
|
|
|
|
return selectBinaryOp(I, ISD::MUL);
|
|
|
|
|
|
|
|
const Value *Src0 = I->getOperand(0);
|
|
|
|
const Value *Src1 = I->getOperand(1);
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(Src0))
|
|
|
|
if (C->getValue().isPowerOf2())
|
|
|
|
std::swap(Src0, Src1);
|
|
|
|
|
|
|
|
// Try to simplify to a shift instruction.
|
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(Src1))
|
|
|
|
if (C->getValue().isPowerOf2()) {
|
|
|
|
uint64_t ShiftVal = C->getValue().logBase2();
|
|
|
|
MVT SrcVT = VT;
|
|
|
|
bool IsZExt = true;
|
|
|
|
if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
|
2014-09-30 08:49:58 +08:00
|
|
|
if (!isIntExtFree(ZExt)) {
|
|
|
|
MVT VT;
|
|
|
|
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
|
|
|
|
SrcVT = VT;
|
|
|
|
IsZExt = true;
|
|
|
|
Src0 = ZExt->getOperand(0);
|
|
|
|
}
|
2014-09-18 04:35:41 +08:00
|
|
|
}
|
|
|
|
} else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
|
2014-09-30 08:49:58 +08:00
|
|
|
if (!isIntExtFree(SExt)) {
|
|
|
|
MVT VT;
|
|
|
|
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
|
|
|
|
SrcVT = VT;
|
|
|
|
IsZExt = false;
|
|
|
|
Src0 = SExt->getOperand(0);
|
|
|
|
}
|
2014-09-18 04:35:41 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Src0Reg = getRegForValue(Src0);
|
|
|
|
if (!Src0Reg)
|
|
|
|
return false;
|
|
|
|
bool Src0IsKill = hasTrivialKill(Src0);
|
|
|
|
|
|
|
|
unsigned ResultReg =
|
|
|
|
emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
|
|
|
|
|
|
|
|
if (ResultReg) {
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
unsigned Src0Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (!Src0Reg)
|
|
|
|
return false;
|
2014-07-31 06:04:25 +08:00
|
|
|
bool Src0IsKill = hasTrivialKill(I->getOperand(0));
|
2014-03-29 18:18:08 +08:00
|
|
|
|
|
|
|
unsigned Src1Reg = getRegForValue(I->getOperand(1));
|
|
|
|
if (!Src1Reg)
|
|
|
|
return false;
|
2014-07-31 06:04:25 +08:00
|
|
|
bool Src1IsKill = hasTrivialKill(I->getOperand(1));
|
|
|
|
|
2014-09-18 04:35:41 +08:00
|
|
|
unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
|
2014-07-31 06:04:25 +08:00
|
|
|
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
2014-03-29 18:18:08 +08:00
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-03-29 18:18:08 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectShift(const Instruction *I) {
|
2014-08-27 08:58:26 +08:00
|
|
|
MVT RetVT;
|
2014-09-16 05:27:56 +08:00
|
|
|
if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
|
2014-07-31 06:04:22 +08:00
|
|
|
return false;
|
|
|
|
|
2014-09-16 05:27:56 +08:00
|
|
|
if (RetVT.isVector())
|
|
|
|
return selectOperator(I, I->getOpcode());
|
|
|
|
|
2014-08-22 07:06:07 +08:00
|
|
|
if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
uint64_t ShiftVal = C->getZExtValue();
|
2014-08-27 08:58:26 +08:00
|
|
|
MVT SrcVT = RetVT;
|
2015-03-25 00:24:01 +08:00
|
|
|
bool IsZExt = I->getOpcode() != Instruction::AShr;
|
2014-08-29 08:19:21 +08:00
|
|
|
const Value *Op0 = I->getOperand(0);
|
2014-08-27 08:58:26 +08:00
|
|
|
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
|
2014-09-30 08:49:58 +08:00
|
|
|
if (!isIntExtFree(ZExt)) {
|
|
|
|
MVT TmpVT;
|
|
|
|
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
|
|
|
|
SrcVT = TmpVT;
|
|
|
|
IsZExt = true;
|
|
|
|
Op0 = ZExt->getOperand(0);
|
|
|
|
}
|
2014-08-27 08:58:26 +08:00
|
|
|
}
|
|
|
|
} else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
|
2014-09-30 08:49:58 +08:00
|
|
|
if (!isIntExtFree(SExt)) {
|
|
|
|
MVT TmpVT;
|
|
|
|
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
|
|
|
|
SrcVT = TmpVT;
|
|
|
|
IsZExt = false;
|
|
|
|
Op0 = SExt->getOperand(0);
|
|
|
|
}
|
2014-08-27 08:58:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned Op0Reg = getRegForValue(Op0);
|
|
|
|
if (!Op0Reg)
|
|
|
|
return false;
|
|
|
|
bool Op0IsKill = hasTrivialKill(Op0);
|
|
|
|
|
2014-08-22 07:06:07 +08:00
|
|
|
switch (I->getOpcode()) {
|
|
|
|
default: llvm_unreachable("Unexpected instruction.");
|
|
|
|
case Instruction::Shl:
|
2014-08-27 08:58:26 +08:00
|
|
|
ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
|
2014-08-22 07:06:07 +08:00
|
|
|
break;
|
|
|
|
case Instruction::AShr:
|
2014-08-27 08:58:26 +08:00
|
|
|
ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
|
2014-08-22 07:06:07 +08:00
|
|
|
break;
|
|
|
|
case Instruction::LShr:
|
2014-08-27 08:58:26 +08:00
|
|
|
ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
|
2014-08-22 07:06:07 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
2014-07-31 06:04:22 +08:00
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-08-22 07:06:07 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-08-27 08:58:26 +08:00
|
|
|
unsigned Op0Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (!Op0Reg)
|
|
|
|
return false;
|
|
|
|
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
|
|
|
|
|
2014-08-22 07:06:07 +08:00
|
|
|
unsigned Op1Reg = getRegForValue(I->getOperand(1));
|
|
|
|
if (!Op1Reg)
|
|
|
|
return false;
|
|
|
|
bool Op1IsKill = hasTrivialKill(I->getOperand(1));
|
|
|
|
|
|
|
|
unsigned ResultReg = 0;
|
|
|
|
switch (I->getOpcode()) {
|
|
|
|
default: llvm_unreachable("Unexpected instruction.");
|
|
|
|
case Instruction::Shl:
|
|
|
|
ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
|
|
|
|
break;
|
|
|
|
case Instruction::AShr:
|
|
|
|
ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
|
|
|
|
break;
|
|
|
|
case Instruction::LShr:
|
|
|
|
ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
|
|
|
|
break;
|
2014-07-31 06:04:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-07-31 06:04:22 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 07:20:17 +08:00
|
|
|
bool AArch64FastISel::selectBitCast(const Instruction *I) {
|
2014-07-31 14:25:37 +08:00
|
|
|
MVT RetVT, SrcVT;
|
|
|
|
|
|
|
|
if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
|
|
|
|
return false;
|
|
|
|
if (!isTypeLegal(I->getType(), RetVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned Opc;
|
|
|
|
if (RetVT == MVT::f32 && SrcVT == MVT::i32)
|
|
|
|
Opc = AArch64::FMOVWSr;
|
|
|
|
else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
|
|
|
|
Opc = AArch64::FMOVXDr;
|
|
|
|
else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
|
|
|
|
Opc = AArch64::FMOVSWr;
|
|
|
|
else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
|
|
|
|
Opc = AArch64::FMOVDXr;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
|
2014-08-22 04:57:57 +08:00
|
|
|
const TargetRegisterClass *RC = nullptr;
|
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default: llvm_unreachable("Unexpected value type.");
|
|
|
|
case MVT::i32: RC = &AArch64::GPR32RegClass; break;
|
|
|
|
case MVT::i64: RC = &AArch64::GPR64RegClass; break;
|
|
|
|
case MVT::f32: RC = &AArch64::FPR32RegClass; break;
|
|
|
|
case MVT::f64: RC = &AArch64::FPR64RegClass; break;
|
|
|
|
}
|
2014-07-31 14:25:37 +08:00
|
|
|
unsigned Op0Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (!Op0Reg)
|
|
|
|
return false;
|
|
|
|
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
|
2014-09-04 04:56:59 +08:00
|
|
|
unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
|
2014-07-31 14:25:37 +08:00
|
|
|
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
updateValueMap(I, ResultReg);
|
2014-07-31 14:25:37 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-16 06:07:49 +08:00
|
|
|
bool AArch64FastISel::selectFRem(const Instruction *I) {
|
|
|
|
MVT RetVT;
|
|
|
|
if (!isTypeLegal(I->getType(), RetVT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
RTLIB::Libcall LC;
|
|
|
|
switch (RetVT.SimpleTy) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case MVT::f32:
|
|
|
|
LC = RTLIB::REM_F32;
|
|
|
|
break;
|
|
|
|
case MVT::f64:
|
|
|
|
LC = RTLIB::REM_F64;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ArgListTy Args;
|
|
|
|
Args.reserve(I->getNumOperands());
|
|
|
|
|
|
|
|
// Populate the argument list.
|
|
|
|
for (auto &Arg : I->operands()) {
|
|
|
|
ArgListEntry Entry;
|
|
|
|
Entry.Val = Arg;
|
|
|
|
Entry.Ty = Arg->getType();
|
|
|
|
Args.push_back(Entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
CallLoweringInfo CLI;
|
2015-06-23 20:21:54 +08:00
|
|
|
MCContext &Ctx = MF->getContext();
|
|
|
|
CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
|
2014-09-16 06:07:49 +08:00
|
|
|
TLI.getLibcallName(LC), std::move(Args));
|
|
|
|
if (!lowerCallTo(CLI))
|
|
|
|
return false;
|
|
|
|
updateValueMap(I, CLI.ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-18 05:55:55 +08:00
|
|
|
bool AArch64FastISel::selectSDiv(const Instruction *I) {
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(I->getType(), VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!isa<ConstantInt>(I->getOperand(1)))
|
|
|
|
return selectBinaryOp(I, ISD::SDIV);
|
|
|
|
|
|
|
|
const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
|
|
|
|
if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
|
|
|
|
!(C.isPowerOf2() || (-C).isPowerOf2()))
|
|
|
|
return selectBinaryOp(I, ISD::SDIV);
|
|
|
|
|
|
|
|
unsigned Lg2 = C.countTrailingZeros();
|
|
|
|
unsigned Src0Reg = getRegForValue(I->getOperand(0));
|
|
|
|
if (!Src0Reg)
|
|
|
|
return false;
|
|
|
|
bool Src0IsKill = hasTrivialKill(I->getOperand(0));
|
|
|
|
|
|
|
|
if (cast<BinaryOperator>(I)->isExact()) {
|
|
|
|
unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-10-17 00:41:15 +08:00
|
|
|
int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
|
|
|
|
unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
|
2014-09-18 05:55:55 +08:00
|
|
|
if (!AddReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// (Src0 < 0) ? Pow2 - 1 : 0;
|
|
|
|
if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned SelectOpc;
|
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
if (VT == MVT::i64) {
|
|
|
|
SelectOpc = AArch64::CSELXr;
|
|
|
|
RC = &AArch64::GPR64RegClass;
|
|
|
|
} else {
|
|
|
|
SelectOpc = AArch64::CSELWr;
|
|
|
|
RC = &AArch64::GPR32RegClass;
|
|
|
|
}
|
|
|
|
unsigned SelectReg =
|
|
|
|
fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
|
|
|
|
Src0IsKill, AArch64CC::LT);
|
|
|
|
if (!SelectReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
|
|
|
|
// negate the result.
|
|
|
|
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
|
|
|
|
unsigned ResultReg;
|
|
|
|
if (C.isNegative())
|
|
|
|
ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
|
|
|
|
SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
|
|
|
|
else
|
|
|
|
ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
|
|
|
|
|
|
|
|
if (!ResultReg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
updateValueMap(I, ResultReg);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-11-14 04:50:44 +08:00
|
|
|
/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
|
|
|
|
/// have to duplicate it for AArch64, because otherwise we would fail during the
|
|
|
|
/// sign-extend emission.
|
|
|
|
std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
|
|
|
|
unsigned IdxN = getRegForValue(Idx);
|
|
|
|
if (IdxN == 0)
|
|
|
|
// Unhandled operand. Halt "fast" selection and bail.
|
|
|
|
return std::pair<unsigned, bool>(0, false);
|
|
|
|
|
|
|
|
bool IdxNIsKill = hasTrivialKill(Idx);
|
|
|
|
|
|
|
|
// If the index is smaller or larger than intptr_t, truncate or extend it.
|
2015-07-09 10:09:04 +08:00
|
|
|
MVT PtrVT = TLI.getPointerTy(DL);
|
2014-11-14 04:50:44 +08:00
|
|
|
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
|
|
|
|
if (IdxVT.bitsLT(PtrVT)) {
|
|
|
|
IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
|
|
|
|
IdxNIsKill = true;
|
|
|
|
} else if (IdxVT.bitsGT(PtrVT))
|
|
|
|
llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
|
|
|
|
return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
|
|
|
|
}
|
|
|
|
|
2014-10-16 02:58:07 +08:00
|
|
|
/// This is mostly a copy of the existing FastISel GEP code, but we have to
|
|
|
|
/// duplicate it for AArch64, because otherwise we would bail out even for
|
|
|
|
/// simple cases. This is because the standard fastEmit functions don't cover
|
|
|
|
/// MUL at all and ADD is lowered very inefficientily.
|
|
|
|
bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
|
|
|
|
unsigned N = getRegForValue(I->getOperand(0));
|
|
|
|
if (!N)
|
|
|
|
return false;
|
|
|
|
bool NIsKill = hasTrivialKill(I->getOperand(0));
|
|
|
|
|
|
|
|
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
|
|
|
|
// into a single N = N + TotalOffset.
|
|
|
|
uint64_t TotalOffs = 0;
|
2015-07-09 10:09:04 +08:00
|
|
|
MVT VT = TLI.getPointerTy(DL);
|
2016-01-20 08:26:52 +08:00
|
|
|
for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
|
|
|
|
GTI != E; ++GTI) {
|
|
|
|
const Value *Idx = GTI.getOperand();
|
2016-12-02 10:24:42 +08:00
|
|
|
if (auto *StTy = GTI.getStructTypeOrNull()) {
|
2014-10-16 02:58:07 +08:00
|
|
|
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
|
|
|
|
// N = N + Offset
|
|
|
|
if (Field)
|
|
|
|
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
|
|
|
|
} else {
|
2016-01-20 08:26:52 +08:00
|
|
|
Type *Ty = GTI.getIndexedType();
|
|
|
|
|
2014-10-16 02:58:07 +08:00
|
|
|
// If this is a constant subscript, handle it quickly.
|
|
|
|
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
|
|
|
|
if (CI->isZero())
|
|
|
|
continue;
|
|
|
|
// N = N + Offset
|
|
|
|
TotalOffs +=
|
|
|
|
DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (TotalOffs) {
|
|
|
|
N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
|
|
|
|
if (!N)
|
|
|
|
return false;
|
|
|
|
NIsKill = true;
|
|
|
|
TotalOffs = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// N = N + Idx * ElementSize;
|
|
|
|
uint64_t ElementSize = DL.getTypeAllocSize(Ty);
|
|
|
|
std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
|
|
|
|
unsigned IdxN = Pair.first;
|
|
|
|
bool IdxNIsKill = Pair.second;
|
|
|
|
if (!IdxN)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (ElementSize != 1) {
|
|
|
|
unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
|
|
|
|
if (!C)
|
|
|
|
return false;
|
|
|
|
IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
|
|
|
|
if (!IdxN)
|
|
|
|
return false;
|
|
|
|
IdxNIsKill = true;
|
|
|
|
}
|
|
|
|
N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
|
|
|
|
if (!N)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (TotalOffs) {
|
|
|
|
N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
|
|
|
|
if (!N)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
updateValueMap(I, N);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-07-21 05:12:32 +08:00
|
|
|
bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
|
|
|
|
assert(TM.getOptLevel() == CodeGenOpt::None &&
|
|
|
|
"cmpxchg survived AtomicExpand at optlevel > -O0");
|
|
|
|
|
|
|
|
auto *RetPairTy = cast<StructType>(I->getType());
|
|
|
|
Type *RetTy = RetPairTy->getTypeAtIndex(0U);
|
|
|
|
assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
|
|
|
|
"cmpxchg has a non-i1 status result");
|
|
|
|
|
|
|
|
MVT VT;
|
|
|
|
if (!isTypeLegal(RetTy, VT))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const TargetRegisterClass *ResRC;
|
2016-08-03 04:22:36 +08:00
|
|
|
unsigned Opc, CmpOpc;
|
2016-07-21 05:12:32 +08:00
|
|
|
// This only supports i32/i64, because i8/i16 aren't legal, and the generic
|
|
|
|
// extractvalue selection doesn't support that.
|
|
|
|
if (VT == MVT::i32) {
|
|
|
|
Opc = AArch64::CMP_SWAP_32;
|
2016-08-03 04:22:36 +08:00
|
|
|
CmpOpc = AArch64::SUBSWrs;
|
2016-07-21 05:12:32 +08:00
|
|
|
ResRC = &AArch64::GPR32RegClass;
|
|
|
|
} else if (VT == MVT::i64) {
|
|
|
|
Opc = AArch64::CMP_SWAP_64;
|
2016-08-03 04:22:36 +08:00
|
|
|
CmpOpc = AArch64::SUBSXrs;
|
2016-07-21 05:12:32 +08:00
|
|
|
ResRC = &AArch64::GPR64RegClass;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const MCInstrDesc &II = TII.get(Opc);
|
|
|
|
|
|
|
|
const unsigned AddrReg = constrainOperandRegClass(
|
|
|
|
II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
|
|
|
|
const unsigned DesiredReg = constrainOperandRegClass(
|
|
|
|
II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
|
|
|
|
const unsigned NewReg = constrainOperandRegClass(
|
|
|
|
II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
|
|
|
|
|
|
|
|
const unsigned ResultReg1 = createResultReg(ResRC);
|
|
|
|
const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
|
2016-08-03 04:22:36 +08:00
|
|
|
const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
|
2016-07-21 05:12:32 +08:00
|
|
|
|
|
|
|
// FIXME: MachineMemOperand doesn't support cmpxchg yet.
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
|
2016-08-03 04:22:36 +08:00
|
|
|
.addDef(ResultReg1)
|
|
|
|
.addDef(ScratchReg)
|
|
|
|
.addUse(AddrReg)
|
|
|
|
.addUse(DesiredReg)
|
|
|
|
.addUse(NewReg);
|
|
|
|
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
|
|
|
|
.addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
|
|
|
|
.addUse(ResultReg1)
|
|
|
|
.addUse(DesiredReg)
|
|
|
|
.addImm(0);
|
|
|
|
|
|
|
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
|
|
|
|
.addDef(ResultReg2)
|
|
|
|
.addUse(AArch64::WZR)
|
|
|
|
.addUse(AArch64::WZR)
|
|
|
|
.addImm(AArch64CC::NE);
|
2016-07-21 05:12:32 +08:00
|
|
|
|
|
|
|
assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
|
|
|
|
updateValueMap(I, ResultReg1, 2);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:56:52 +08:00
|
|
|
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
|
2014-03-29 18:18:08 +08:00
|
|
|
switch (I->getOpcode()) {
|
|
|
|
default:
|
2014-09-04 09:29:21 +08:00
|
|
|
break;
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::Add:
|
2014-09-03 09:38:36 +08:00
|
|
|
case Instruction::Sub:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectAddSub(I);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::Mul:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectMul(I);
|
2014-09-18 05:55:55 +08:00
|
|
|
case Instruction::SDiv:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectSDiv(I);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::SRem:
|
2014-09-04 02:46:45 +08:00
|
|
|
if (!selectBinaryOp(I, ISD::SREM))
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectRem(I, ISD::SREM);
|
|
|
|
return true;
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::URem:
|
2014-09-04 02:46:45 +08:00
|
|
|
if (!selectBinaryOp(I, ISD::UREM))
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectRem(I, ISD::UREM);
|
|
|
|
return true;
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::Shl:
|
|
|
|
case Instruction::LShr:
|
|
|
|
case Instruction::AShr:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectShift(I);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::And:
|
|
|
|
case Instruction::Or:
|
|
|
|
case Instruction::Xor:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectLogicalOp(I);
|
2014-09-04 01:58:10 +08:00
|
|
|
case Instruction::Br:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectBranch(I);
|
2014-03-29 18:18:08 +08:00
|
|
|
case Instruction::IndirectBr:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectIndirectBr(I);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::BitCast:
|
2014-09-04 02:46:45 +08:00
|
|
|
if (!FastISel::selectBitCast(I))
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectBitCast(I);
|
|
|
|
return true;
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::FPToSI:
|
2014-09-04 02:46:45 +08:00
|
|
|
if (!selectCast(I, ISD::FP_TO_SINT))
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectFPToInt(I, /*Signed=*/true);
|
|
|
|
return true;
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::FPToUI:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectFPToInt(I, /*Signed=*/false);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::ZExt:
|
|
|
|
case Instruction::SExt:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectIntExt(I);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::Trunc:
|
2014-09-04 02:46:45 +08:00
|
|
|
if (!selectCast(I, ISD::TRUNCATE))
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectTrunc(I);
|
|
|
|
return true;
|
2014-03-29 18:18:08 +08:00
|
|
|
case Instruction::FPExt:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectFPExt(I);
|
2014-03-29 18:18:08 +08:00
|
|
|
case Instruction::FPTrunc:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectFPTrunc(I);
|
2014-03-29 18:18:08 +08:00
|
|
|
case Instruction::SIToFP:
|
2014-09-04 02:46:45 +08:00
|
|
|
if (!selectCast(I, ISD::SINT_TO_FP))
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectIntToFP(I, /*Signed=*/true);
|
|
|
|
return true;
|
2014-03-29 18:18:08 +08:00
|
|
|
case Instruction::UIToFP:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectIntToFP(I, /*Signed=*/false);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::Load:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectLoad(I);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::Store:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectStore(I);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::FCmp:
|
|
|
|
case Instruction::ICmp:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectCmp(I);
|
2014-09-03 05:32:54 +08:00
|
|
|
case Instruction::Select:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectSelect(I);
|
2014-03-29 18:18:08 +08:00
|
|
|
case Instruction::Ret:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectRet(I);
|
2014-09-16 06:07:49 +08:00
|
|
|
case Instruction::FRem:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectFRem(I);
|
2014-10-16 02:58:07 +08:00
|
|
|
case Instruction::GetElementPtr:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectGetElementPtr(I);
|
2016-07-21 05:12:32 +08:00
|
|
|
case Instruction::AtomicCmpXchg:
|
2017-04-01 09:26:17 +08:00
|
|
|
return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2014-09-03 05:32:54 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
// Silence warnings.
|
2014-05-24 20:50:23 +08:00
|
|
|
(void)&CC_AArch64_DarwinPCS_VarArg;
|
2017-07-14 01:03:12 +08:00
|
|
|
(void)&CC_AArch64_Win64_VarArg;
|
2017-12-20 06:05:25 +08:00
|
|
|
|
|
|
|
// fall-back to target-independent instruction selection.
|
|
|
|
return selectOperator(I, I->getOpcode());
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace llvm {
|
2017-01-25 08:29:26 +08:00
|
|
|
|
|
|
|
FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
|
2014-09-16 07:20:17 +08:00
|
|
|
const TargetLibraryInfo *LibInfo) {
|
|
|
|
return new AArch64FastISel(FuncInfo, LibInfo);
|
2014-03-29 18:18:08 +08:00
|
|
|
}
|
2017-01-25 08:29:26 +08:00
|
|
|
|
|
|
|
} // end namespace llvm
|