forked from OSchip/llvm-project
[AArch64][GlobalISel] Make <8 x s8> shifts legal.
This commit is contained in:
parent
4140f0744f
commit
8071c2f5c6
|
@ -22,8 +22,8 @@
|
|||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
|
||||
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||
|
@ -34,8 +34,8 @@
|
|||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/IntrinsicsAArch64.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
@ -101,8 +101,7 @@ private:
|
|||
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
|
||||
MachineRegisterInfo &MRI) const;
|
||||
|
||||
bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
|
||||
int64_t CmpConstant,
|
||||
bool tryOptAndIntoCompareBranch(MachineInstr *LHS, int64_t CmpConstant,
|
||||
const CmpInst::Predicate &Pred,
|
||||
MachineBasicBlock *DstMBB,
|
||||
MachineIRBuilder &MIB) const;
|
||||
|
@ -313,10 +312,11 @@ private:
|
|||
/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
|
||||
/// or not a shift + extend should be folded into an addressing mode. Returns
|
||||
/// None when this is not profitable or possible.
|
||||
ComplexRendererFns
|
||||
selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
|
||||
MachineOperand &Offset, unsigned SizeInBytes,
|
||||
bool WantsExt) const;
|
||||
ComplexRendererFns selectExtendedSHL(MachineOperand &Root,
|
||||
MachineOperand &Base,
|
||||
MachineOperand &Offset,
|
||||
unsigned SizeInBytes,
|
||||
bool WantsExt) const;
|
||||
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
|
||||
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
|
||||
unsigned SizeInBytes) const;
|
||||
|
@ -360,7 +360,7 @@ private:
|
|||
/// subregister copy if necessary. Return either ExtReg, or the result of the
|
||||
/// new copy.
|
||||
Register narrowExtendRegIfNeeded(Register ExtReg,
|
||||
MachineIRBuilder &MIB) const;
|
||||
MachineIRBuilder &MIB) const;
|
||||
Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
|
||||
MachineIRBuilder &MIB) const;
|
||||
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
|
||||
|
@ -1423,7 +1423,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
|
|||
// Check all operands are identical immediates.
|
||||
int64_t ImmVal = 0;
|
||||
for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
|
||||
auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
|
||||
auto VRegAndVal =
|
||||
getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
|
||||
if (!VRegAndVal)
|
||||
return None;
|
||||
|
||||
|
@ -1438,7 +1439,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
|
|||
|
||||
/// Matches and returns the shift immediate value for a SHL instruction given
|
||||
/// a shift operand.
|
||||
static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
|
||||
static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
|
||||
MachineRegisterInfo &MRI) {
|
||||
Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
|
||||
if (!ShiftImm)
|
||||
return None;
|
||||
|
@ -1498,6 +1500,8 @@ bool AArch64InstructionSelector::selectVectorSHL(
|
|||
Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
|
||||
} else if (Ty == LLT::vector(16, 8)) {
|
||||
Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
|
||||
} else if (Ty == LLT::vector(8, 8)) {
|
||||
Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
|
||||
return false;
|
||||
|
@ -1557,6 +1561,9 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
|
|||
} else if (Ty == LLT::vector(16, 8)) {
|
||||
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
|
||||
NegOpc = AArch64::NEGv8i16;
|
||||
} else if (Ty == LLT::vector(8, 8)) {
|
||||
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
|
||||
NegOpc = AArch64::NEGv8i8;
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
|
||||
return false;
|
||||
|
@ -1603,8 +1610,9 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
|
|||
return true;
|
||||
}
|
||||
|
||||
void AArch64InstructionSelector::materializeLargeCMVal(
|
||||
MachineInstr &I, const Value *V, unsigned OpFlags) const {
|
||||
void AArch64InstructionSelector::materializeLargeCMVal(MachineInstr &I,
|
||||
const Value *V,
|
||||
unsigned OpFlags) const {
|
||||
MachineBasicBlock &MBB = *I.getParent();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
@ -1635,8 +1643,8 @@ void AArch64InstructionSelector::materializeLargeCMVal(
|
|||
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
|
||||
return DstReg;
|
||||
};
|
||||
Register DstReg = BuildMovK(MovZ.getReg(0),
|
||||
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
|
||||
Register DstReg =
|
||||
BuildMovK(MovZ.getReg(0), AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
|
||||
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
|
||||
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
|
||||
return;
|
||||
|
@ -1707,8 +1715,8 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
|
|||
/// because the selector works bottom up, uses before defs. By the time we
|
||||
/// end up trying to select a G_PTR_ADD, we should have already attempted to
|
||||
/// fold this into addressing modes and were therefore unsuccessful.
|
||||
bool AArch64InstructionSelector::convertPtrAddToAdd(
|
||||
MachineInstr &I, MachineRegisterInfo &MRI) {
|
||||
bool AArch64InstructionSelector::convertPtrAddToAdd(MachineInstr &I,
|
||||
MachineRegisterInfo &MRI) {
|
||||
assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
|
||||
Register DstReg = I.getOperand(0).getReg();
|
||||
Register AddOp1Reg = I.getOperand(1).getReg();
|
||||
|
@ -1886,7 +1894,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
|
||||
// Certain non-generic instructions also need some special handling.
|
||||
|
||||
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
|
||||
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
|
||||
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||
|
||||
if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
|
||||
|
@ -1894,10 +1902,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
const LLT DefTy = MRI.getType(DefReg);
|
||||
|
||||
const RegClassOrRegBank &RegClassOrBank =
|
||||
MRI.getRegClassOrRegBank(DefReg);
|
||||
MRI.getRegClassOrRegBank(DefReg);
|
||||
|
||||
const TargetRegisterClass *DefRC
|
||||
= RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
|
||||
const TargetRegisterClass *DefRC =
|
||||
RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
|
||||
if (!DefRC) {
|
||||
if (!DefTy.isValid()) {
|
||||
LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
|
||||
|
@ -1922,7 +1930,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
if (I.getNumOperands() != I.getNumExplicitOperands()) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Generic instruction has unexpected implicit operands\n");
|
||||
|
@ -1998,10 +2005,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
.addUse(CondReg)
|
||||
.addImm(1);
|
||||
constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
|
||||
auto Bcc =
|
||||
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
|
||||
.addImm(AArch64CC::EQ)
|
||||
.addMBB(DestMBB);
|
||||
auto Bcc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
|
||||
.addImm(AArch64CC::EQ)
|
||||
.addMBB(DestMBB);
|
||||
|
||||
I.eraseFromParent();
|
||||
return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
|
||||
|
@ -2375,8 +2381,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
if (NewOpc == I.getOpcode())
|
||||
return nullptr;
|
||||
// Check if we can fold anything into the addressing mode.
|
||||
auto AddrModeFns =
|
||||
selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
|
||||
auto AddrModeFns = selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
|
||||
if (!AddrModeFns) {
|
||||
// Can't fold anything. Use the original instruction.
|
||||
I.setDesc(TII.get(NewOpc));
|
||||
|
@ -2536,10 +2541,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
// to the G_UADDO. CSINC increments the result when the predicate is false,
|
||||
// so to get the increment when it's true, we need to use the inverse. In
|
||||
// this case, we want to increment when carry is set.
|
||||
auto CsetMI = MIRBuilder
|
||||
.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
|
||||
{Register(AArch64::WZR), Register(AArch64::WZR)})
|
||||
.addImm(getInvertedCondCode(AArch64CC::HS));
|
||||
auto CsetMI =
|
||||
MIRBuilder
|
||||
.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
|
||||
{Register(AArch64::WZR), Register(AArch64::WZR)})
|
||||
.addImm(getInvertedCondCode(AArch64CC::HS));
|
||||
constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
|
@ -2775,14 +2781,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
|
|||
}
|
||||
|
||||
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
|
||||
{DefReg}, {SrcReg})
|
||||
.addImm(0)
|
||||
.addImm(SrcSize - 1);
|
||||
{DefReg}, {SrcReg})
|
||||
.addImm(0)
|
||||
.addImm(SrcSize - 1);
|
||||
} else if (DstSize <= 32) {
|
||||
ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
|
||||
{DefReg}, {SrcReg})
|
||||
.addImm(0)
|
||||
.addImm(SrcSize - 1);
|
||||
{DefReg}, {SrcReg})
|
||||
.addImm(0)
|
||||
.addImm(SrcSize - 1);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
@ -2999,7 +3005,7 @@ bool AArch64InstructionSelector::selectJumpTable(
|
|||
// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
|
||||
MachineIRBuilder MIB(I);
|
||||
auto MovMI =
|
||||
MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
|
||||
MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
|
||||
.addJumpTableIndex(JTI, AArch64II::MO_PAGE)
|
||||
.addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
|
||||
I.eraseFromParent();
|
||||
|
@ -3226,62 +3232,54 @@ bool AArch64InstructionSelector::selectVectorICmp(
|
|||
// tablegen selector.
|
||||
|
||||
static const unsigned OpcTable[4][4][9] = {
|
||||
{
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
|
||||
AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
|
||||
AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
|
||||
{AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
|
||||
AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
|
||||
AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
|
||||
},
|
||||
{
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
|
||||
AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
|
||||
AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
|
||||
{AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
|
||||
AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
|
||||
AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */}
|
||||
},
|
||||
{
|
||||
{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
|
||||
AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
|
||||
AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
|
||||
{AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
|
||||
AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
|
||||
AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */}
|
||||
},
|
||||
{
|
||||
{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
|
||||
AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
|
||||
AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */}
|
||||
},
|
||||
{{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
|
||||
AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
|
||||
AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
|
||||
{AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
|
||||
AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
|
||||
AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}},
|
||||
{{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
|
||||
AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
|
||||
AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
|
||||
{AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
|
||||
AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
|
||||
AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */}},
|
||||
{{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
|
||||
AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
|
||||
AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
|
||||
{AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
|
||||
AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
|
||||
AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */}},
|
||||
{{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
|
||||
AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
|
||||
AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */},
|
||||
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
|
||||
0 /* invalid */}},
|
||||
};
|
||||
unsigned EltIdx = Log2_32(SrcEltSize / 8);
|
||||
unsigned NumEltsIdx = Log2_32(NumElts / 2);
|
||||
|
@ -3400,11 +3398,11 @@ bool AArch64InstructionSelector::selectMergeValues(
|
|||
Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
|
||||
// Need to anyext the second scalar before we can use bfm
|
||||
MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
|
||||
TII.get(TargetOpcode::SUBREG_TO_REG))
|
||||
.addDef(SubToRegDef2)
|
||||
.addImm(0)
|
||||
.addUse(I.getOperand(2).getReg())
|
||||
.addImm(AArch64::sub_32);
|
||||
TII.get(TargetOpcode::SUBREG_TO_REG))
|
||||
.addDef(SubToRegDef2)
|
||||
.addImm(0)
|
||||
.addUse(I.getOperand(2).getReg())
|
||||
.addImm(AArch64::sub_32);
|
||||
MachineInstr &BFM =
|
||||
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
|
||||
.addDef(I.getOperand(0).getReg())
|
||||
|
@ -3732,10 +3730,11 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
|
|||
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||
break;
|
||||
case 8:
|
||||
LoadMI = &*MIRBuilder
|
||||
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
|
||||
.addConstantPoolIndex(
|
||||
CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||
LoadMI =
|
||||
&*MIRBuilder
|
||||
.buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
|
||||
.addConstantPoolIndex(CPIdx, 0,
|
||||
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||
break;
|
||||
default:
|
||||
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
|
||||
|
@ -4084,16 +4083,15 @@ MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
|
|||
return &I;
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
MachineInstr *AArch64InstructionSelector::emitCSetForICMP(
|
||||
Register DefReg, unsigned Pred, MachineIRBuilder &MIRBuilder) const {
|
||||
// CSINC increments the result when the predicate is false. Invert it.
|
||||
const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
|
||||
CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
|
||||
auto I =
|
||||
MIRBuilder
|
||||
.buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
|
||||
.addImm(InvCC);
|
||||
auto I = MIRBuilder
|
||||
.buildInstr(AArch64::CSINCWr, {DefReg},
|
||||
{Register(AArch64::WZR), Register(AArch64::WZR)})
|
||||
.addImm(InvCC);
|
||||
constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
|
||||
return &*I;
|
||||
}
|
||||
|
@ -4291,8 +4289,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
|
|||
if (!ValAndVReg || ValAndVReg->Value != 0)
|
||||
return nullptr;
|
||||
|
||||
return emitTST(LHSDef->getOperand(1),
|
||||
LHSDef->getOperand(2), MIRBuilder);
|
||||
return emitTST(LHSDef->getOperand(1), LHSDef->getOperand(2), MIRBuilder);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
@ -4971,7 +4968,9 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
|
|||
MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
|
||||
MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
|
||||
} else {
|
||||
MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
|
||||
MIRBuilder
|
||||
.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
|
||||
.addImm(1);
|
||||
MIRBuilder.buildInstr(AArch64::XPACLRI);
|
||||
MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
|
||||
}
|
||||
|
@ -5129,9 +5128,11 @@ static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
|
|||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AArch64InstructionSelector::selectExtendedSHL(
|
||||
MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
|
||||
unsigned SizeInBytes, bool WantsExt) const {
|
||||
AArch64InstructionSelector::selectExtendedSHL(MachineOperand &Root,
|
||||
MachineOperand &Base,
|
||||
MachineOperand &Offset,
|
||||
unsigned SizeInBytes,
|
||||
bool WantsExt) const {
|
||||
assert(Base.isReg() && "Expected base to be a register operand");
|
||||
assert(Offset.isReg() && "Expected offset to be a register operand");
|
||||
|
||||
|
@ -5270,8 +5271,8 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
|
|||
///
|
||||
/// Where x2 is the base register, and x3 is an offset register.
|
||||
///
|
||||
/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
|
||||
/// this will do so. Otherwise, it will return None.
|
||||
/// When possible (or profitable) to fold a G_PTR_ADD into the address
|
||||
/// calculation, this will do so. Otherwise, it will return None.
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AArch64InstructionSelector::selectAddrModeRegisterOffset(
|
||||
MachineOperand &Root) const {
|
||||
|
@ -5337,8 +5338,7 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
|
|||
|
||||
// Skip immediates that can be selected in the load/store addresing
|
||||
// mode.
|
||||
if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
|
||||
ImmOff < (0x1000 << Scale))
|
||||
if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale))
|
||||
return None;
|
||||
|
||||
// Helper lambda to decide whether or not it is preferable to emit an add.
|
||||
|
@ -5488,9 +5488,8 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
|
|||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
|
||||
unsigned Size,
|
||||
MachineRegisterInfo &MRI) const {
|
||||
AArch64InstructionSelector::tryFoldAddLowIntoImm(
|
||||
MachineInstr &RootDef, unsigned Size, MachineRegisterInfo &MRI) const {
|
||||
if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
|
||||
return None;
|
||||
MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
|
||||
|
@ -5498,7 +5497,8 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
|
|||
return None;
|
||||
|
||||
// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
|
||||
// TODO: Need to check GV's offset % size if doing offset folding into globals.
|
||||
// TODO: Need to check GV's offset % size if doing offset folding into
|
||||
// globals.
|
||||
assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
|
||||
auto GV = Adrp.getOperand(1).getGlobal();
|
||||
if (GV->isThreadLocal())
|
||||
|
@ -5755,11 +5755,10 @@ Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
|
|||
assert(SubReg && "Couldn't determine subregister?");
|
||||
|
||||
// Build the SUBREG_TO_REG and return the new, widened register.
|
||||
auto SubRegToReg =
|
||||
MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
|
||||
.addImm(0)
|
||||
.addUse(Reg)
|
||||
.addImm(SubReg);
|
||||
auto SubRegToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
|
||||
.addImm(0)
|
||||
.addUse(Reg)
|
||||
.addImm(SubReg);
|
||||
constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
|
||||
return SubRegToReg.getReg(0);
|
||||
}
|
||||
|
@ -5843,8 +5842,9 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
|
|||
MIB.addImm(CstVal.getValue());
|
||||
}
|
||||
|
||||
void AArch64InstructionSelector::renderLogicalImm32(
|
||||
MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
|
||||
void AArch64InstructionSelector::renderLogicalImm32(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &I,
|
||||
int OpIdx) const {
|
||||
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
|
||||
"Expected G_CONSTANT");
|
||||
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
|
||||
|
@ -5852,8 +5852,9 @@ void AArch64InstructionSelector::renderLogicalImm32(
|
|||
MIB.addImm(Enc);
|
||||
}
|
||||
|
||||
void AArch64InstructionSelector::renderLogicalImm64(
|
||||
MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
|
||||
void AArch64InstructionSelector::renderLogicalImm64(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &I,
|
||||
int OpIdx) const {
|
||||
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
|
||||
"Expected G_CONSTANT");
|
||||
uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
|
||||
|
@ -5890,7 +5891,6 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// Perform fixups on the given PHI instruction's operands to force them all
|
||||
// to be the same as the destination regbank.
|
||||
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
|
@ -5986,4 +5986,4 @@ createAArch64InstructionSelector(const AArch64TargetMachine &TM,
|
|||
AArch64RegisterBankInfo &RBI) {
|
||||
return new AArch64InstructionSelector(TM, Subtarget, RBI);
|
||||
}
|
||||
}
|
||||
} // namespace llvm
|
||||
|
|
|
@ -431,3 +431,46 @@ body: |
|
|||
$q0 = COPY %3(<4 x s32>)
|
||||
$q1 = COPY %4(<4 x s32>)
|
||||
...
|
||||
---
|
||||
name: test_shl_v8s8
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_shl_v8s8
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(<8 x s8>) = G_SHL [[COPY]], [[COPY1]](<8 x s8>)
|
||||
; CHECK: $d0 = COPY [[SHL]](<8 x s8>)
|
||||
%0:_(<8 x s8>) = COPY $d0
|
||||
%1:_(<8 x s8>) = COPY $d1
|
||||
%2:_(<8 x s8>) = G_SHL %0, %1
|
||||
$d0 = COPY %2
|
||||
...
|
||||
---
|
||||
name: test_ashr_v8s8
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_ashr_v8s8
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
|
||||
; CHECK: [[ASHR:%[0-9]+]]:_(<8 x s8>) = G_ASHR [[COPY]], [[COPY1]](<8 x s8>)
|
||||
; CHECK: $d0 = COPY [[ASHR]](<8 x s8>)
|
||||
%0:_(<8 x s8>) = COPY $d0
|
||||
%1:_(<8 x s8>) = COPY $d1
|
||||
%2:_(<8 x s8>) = G_ASHR %0, %1
|
||||
$d0 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: test_lshr_v8s8
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: test_lshr_v8s8
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(<8 x s8>) = G_LSHR [[COPY]], [[COPY1]](<8 x s8>)
|
||||
; CHECK: $d0 = COPY [[LSHR]](<8 x s8>)
|
||||
%0:_(<8 x s8>) = COPY $d0
|
||||
%1:_(<8 x s8>) = COPY $d1
|
||||
%2:_(<8 x s8>) = G_LSHR %0, %1
|
||||
$d0 = COPY %2
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue