2012-02-18 20:03:15 +08:00
|
|
|
//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
|
2009-11-07 07:52:48 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2009-11-07 07:52:48 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2010-09-09 07:39:54 +08:00
|
|
|
// This file contains a pass that expands pseudo instructions into target
|
2009-11-07 07:52:48 +08:00
|
|
|
// instructions to allow proper scheduling, if-conversion, and other late
|
|
|
|
// optimizations. This pass should be run after register allocation but before
|
2010-09-09 07:39:54 +08:00
|
|
|
// the post-regalloc scheduling pass.
|
2009-11-07 07:52:48 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "ARM.h"
|
|
|
|
#include "ARMBaseInstrInfo.h"
|
2010-10-20 07:27:08 +08:00
|
|
|
#include "ARMBaseRegisterInfo.h"
|
2013-12-02 18:35:41 +08:00
|
|
|
#include "ARMConstantPoolValue.h"
|
2010-10-20 07:27:08 +08:00
|
|
|
#include "ARMMachineFunctionInfo.h"
|
2017-01-27 07:40:06 +08:00
|
|
|
#include "ARMSubtarget.h"
|
2011-07-21 07:34:39 +08:00
|
|
|
#include "MCTargetDesc/ARMAddressingModes.h"
|
2016-04-19 05:48:55 +08:00
|
|
|
#include "llvm/CodeGen/LivePhysRegs.h"
|
2010-10-20 07:27:08 +08:00
|
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
2009-11-07 07:52:48 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
2019-05-24 16:25:02 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2017-01-27 07:40:06 +08:00
|
|
|
|
2009-11-07 07:52:48 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2014-04-22 10:41:26 +08:00
|
|
|
#define DEBUG_TYPE "arm-pseudo"
|
|
|
|
|
2011-08-19 09:42:18 +08:00
|
|
|
static cl::opt<bool>
|
2011-07-29 08:27:32 +08:00
|
|
|
VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
|
|
|
|
cl::desc("Verify machine code after expanding ARM pseudos"));
|
|
|
|
|
2017-09-06 06:45:23 +08:00
|
|
|
#define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass"
|
|
|
|
|
2009-11-07 07:52:48 +08:00
|
|
|
namespace {
|
|
|
|
class ARMExpandPseudo : public MachineFunctionPass {
|
|
|
|
public:
|
|
|
|
static char ID;
|
2010-08-07 02:33:48 +08:00
|
|
|
ARMExpandPseudo() : MachineFunctionPass(ID) {}
|
2009-11-07 07:52:48 +08:00
|
|
|
|
2010-10-20 07:27:08 +08:00
|
|
|
const ARMBaseInstrInfo *TII;
|
2010-05-13 08:17:02 +08:00
|
|
|
const TargetRegisterInfo *TRI;
|
2010-11-13 07:03:38 +08:00
|
|
|
const ARMSubtarget *STI;
|
2011-01-20 16:34:58 +08:00
|
|
|
ARMFunctionInfo *AFI;
|
2009-11-07 07:52:48 +08:00
|
|
|
|
2014-03-10 10:09:33 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction &Fn) override;
|
2009-11-07 07:52:48 +08:00
|
|
|
|
2016-04-05 01:09:25 +08:00
|
|
|
MachineFunctionProperties getRequiredProperties() const override {
|
|
|
|
return MachineFunctionProperties().set(
|
2016-08-25 09:27:13 +08:00
|
|
|
MachineFunctionProperties::Property::NoVRegs);
|
2016-04-05 01:09:25 +08:00
|
|
|
}
|
|
|
|
|
2016-10-01 10:56:57 +08:00
|
|
|
StringRef getPassName() const override {
|
2017-09-06 06:45:23 +08:00
|
|
|
return ARM_EXPAND_PSEUDO_NAME;
|
2009-11-07 07:52:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2010-05-13 07:13:12 +08:00
|
|
|
void TransferImpOps(MachineInstr &OldMI,
|
|
|
|
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
2011-01-20 16:34:58 +08:00
|
|
|
bool ExpandMI(MachineBasicBlock &MBB,
|
2016-04-19 05:48:55 +08:00
|
|
|
MachineBasicBlock::iterator MBBI,
|
|
|
|
MachineBasicBlock::iterator &NextMBBI);
|
2009-11-07 07:52:48 +08:00
|
|
|
bool ExpandMBB(MachineBasicBlock &MBB);
|
2010-09-14 07:01:35 +08:00
|
|
|
void ExpandVLD(MachineBasicBlock::iterator &MBBI);
|
|
|
|
void ExpandVST(MachineBasicBlock::iterator &MBBI);
|
|
|
|
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
|
2010-09-14 07:55:10 +08:00
|
|
|
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
|
2011-12-16 06:27:11 +08:00
|
|
|
unsigned Opc, bool IsExt);
|
2011-01-20 16:34:58 +08:00
|
|
|
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator &MBBI);
|
2016-04-19 05:48:55 +08:00
|
|
|
bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
|
|
|
|
unsigned StrexOp, unsigned UxtOp,
|
|
|
|
MachineBasicBlock::iterator &NextMBBI);
|
|
|
|
|
|
|
|
bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI,
|
|
|
|
MachineBasicBlock::iterator &NextMBBI);
|
2009-11-07 07:52:48 +08:00
|
|
|
};
|
|
|
|
char ARMExpandPseudo::ID = 0;
|
2015-06-23 17:49:53 +08:00
|
|
|
}
|
2009-11-07 07:52:48 +08:00
|
|
|
|
2017-09-06 06:45:23 +08:00
|
|
|
INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false,
|
|
|
|
false)
|
|
|
|
|
2010-05-13 07:13:12 +08:00
|
|
|
/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
|
|
|
|
/// the instructions created from the expansion.
|
|
|
|
void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
|
|
|
|
MachineInstrBuilder &UseMI,
|
|
|
|
MachineInstrBuilder &DefMI) {
|
2011-06-29 03:10:37 +08:00
|
|
|
const MCInstrDesc &Desc = OldMI.getDesc();
|
2010-05-13 07:13:12 +08:00
|
|
|
for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
|
|
|
|
i != e; ++i) {
|
|
|
|
const MachineOperand &MO = OldMI.getOperand(i);
|
|
|
|
assert(MO.isReg() && MO.getReg());
|
|
|
|
if (MO.isUse())
|
2017-01-13 17:58:52 +08:00
|
|
|
UseMI.add(MO);
|
2010-05-13 07:13:12 +08:00
|
|
|
else
|
2017-01-13 17:58:52 +08:00
|
|
|
DefMI.add(MO);
|
2010-05-13 07:13:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-09-14 07:01:35 +08:00
|
|
|
namespace {
|
|
|
|
// Constants for register spacing in NEON load/store instructions.
|
|
|
|
// For quad-register load-lane and store-lane pseudo instructors, the
|
|
|
|
// spacing is initially assumed to be EvenDblSpc, and that is changed to
|
|
|
|
// OddDblSpc depending on the lane number operand.
|
|
|
|
enum NEONRegSpacing {
|
|
|
|
SingleSpc,
|
2018-06-03 00:40:03 +08:00
|
|
|
SingleLowSpc , // Single spacing, low registers, three and four vectors.
|
|
|
|
SingleHighQSpc, // Single spacing, high registers, four vectors.
|
|
|
|
SingleHighTSpc, // Single spacing, high registers, three vectors.
|
2010-09-14 07:01:35 +08:00
|
|
|
EvenDblSpc,
|
|
|
|
OddDblSpc
|
|
|
|
};
|
|
|
|
|
|
|
|
// Entries for NEON load/store information table. The table is sorted by
|
|
|
|
// PseudoOpc for fast binary-search lookups.
|
|
|
|
struct NEONLdStTableEntry {
|
2012-03-11 15:16:55 +08:00
|
|
|
uint16_t PseudoOpc;
|
|
|
|
uint16_t RealOpc;
|
2010-09-14 07:01:35 +08:00
|
|
|
bool IsLoad;
|
2011-11-01 03:11:23 +08:00
|
|
|
bool isUpdating;
|
|
|
|
bool hasWritebackOperand;
|
2012-09-20 14:14:08 +08:00
|
|
|
uint8_t RegSpacing; // One of type NEONRegSpacing
|
|
|
|
uint8_t NumRegs; // D registers loaded or stored
|
|
|
|
uint8_t RegElts; // elements per D register; used for lane ops
|
2011-10-22 02:54:25 +08:00
|
|
|
// FIXME: Temporary flag to denote whether the real instruction takes
|
|
|
|
// a single register (like the encoding) or all of the registers in
|
|
|
|
// the list (like the asm syntax and the isel DAG). When all definitions
|
|
|
|
// are converted to take only the single encoded register, this will
|
|
|
|
// go away.
|
|
|
|
bool copyAllListRegs;
|
2010-09-14 07:01:35 +08:00
|
|
|
|
|
|
|
// Comparison methods for binary search of the table.
|
|
|
|
bool operator<(const NEONLdStTableEntry &TE) const {
|
|
|
|
return PseudoOpc < TE.PseudoOpc;
|
|
|
|
}
|
|
|
|
friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
|
|
|
|
return TE.PseudoOpc < PseudoOpc;
|
|
|
|
}
|
2010-10-23 16:10:43 +08:00
|
|
|
friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
|
|
|
|
const NEONLdStTableEntry &TE) {
|
2010-09-14 07:01:35 +08:00
|
|
|
return PseudoOpc < TE.PseudoOpc;
|
|
|
|
}
|
|
|
|
};
|
2015-06-23 17:49:53 +08:00
|
|
|
}
|
2010-09-14 07:01:35 +08:00
|
|
|
|
|
|
|
static const NEONLdStTableEntry NEONLdStTable[] = {
|
2011-11-01 03:11:23 +08:00
|
|
|
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
|
|
|
|
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
|
|
|
|
{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
|
|
|
|
{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true},
|
|
|
|
{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
|
|
|
|
{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
|
|
|
|
|
2018-06-03 00:40:03 +08:00
|
|
|
{ ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false},
|
|
|
|
{ ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false},
|
|
|
|
{ ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false},
|
|
|
|
{ ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false},
|
2011-11-01 03:11:23 +08:00
|
|
|
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
|
2014-01-16 17:16:13 +08:00
|
|
|
{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
|
2018-03-02 21:02:55 +08:00
|
|
|
{ ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false},
|
2011-11-01 03:11:23 +08:00
|
|
|
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
|
2014-01-16 17:16:13 +08:00
|
|
|
{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
|
2018-03-02 21:02:55 +08:00
|
|
|
{ ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false},
|
2018-06-03 00:40:03 +08:00
|
|
|
{ ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false},
|
|
|
|
{ ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false},
|
|
|
|
{ ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false},
|
|
|
|
{ ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false},
|
|
|
|
{ ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false},
|
|
|
|
{ ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false},
|
|
|
|
{ ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false},
|
|
|
|
{ ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false},
|
|
|
|
{ ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false},
|
|
|
|
{ ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false},
|
|
|
|
{ ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false},
|
|
|
|
{ ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false},
|
|
|
|
{ ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false},
|
|
|
|
{ ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false},
|
|
|
|
{ ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false},
|
|
|
|
{ ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false},
|
|
|
|
{ ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false},
|
|
|
|
{ ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false},
|
2011-11-01 03:11:23 +08:00
|
|
|
|
[NEON] Support vldNq intrinsics in AArch32 (LLVM part)
This patch adds support for the q versions of the dup
(load-to-all-lanes) NEON intrinsics, such as vld2q_dup_f16() for
example.
Currently, non-q versions of the dup intrinsics are implemented
in clang by generating IR that first loads the elements of the
structure into the first lane with the lane (to-single-lane)
intrinsics, and then propagating it other lanes. There are at
least two problems with this approach. First, there are no
double-spaced to-single-lane byte-element instructions. For
example, there is no such instruction as 'vld2.8 { d0[0], d2[0]
}, [r0]'. That means we cannot rely on the to-single-lane
intrinsics and instructions to implement the q versions of the
dup intrinsics. Note that to-all-lanes instructions do support
all sizes of data items, including bytes.
The second problem with the current approach is that we need a
separate vdup instruction to propagate the structure to each
lane. So for vld4q_dup_f16() we would need four vdup instructions
in addition to the initial vld instruction.
This patch introduces dup LLVM intrinsics and reworks handling of
the currently supported (non-q) NEON dup intrinsics to expand
them into those LLVM intrinsics, thus eliminating the need for
using to-single-lane intrinsics and instructions.
Additionally, this patch adds support for u64 and s64 dup NEON
intrinsics. These are marked as Arch64-only in the ARM NEON
Reference, but it seems there are no reasons to not support them
in AArch32 mode. Please correct, if that is wrong.
That's what we generate with this patch applied:
vld2q_dup_f16:
vld2.16 {d0[], d2[]}, [r0]
vld2.16 {d1[], d3[]}, [r0]
vld3q_dup_f16:
vld3.16 {d0[], d2[], d4[]}, [r0]
vld3.16 {d1[], d3[], d5[]}, [r0]
vld4q_dup_f16:
vld4.16 {d0[], d2[], d4[], d6[]}, [r0]
vld4.16 {d1[], d3[], d5[], d7[]}, [r0]
Differential Revision: https://reviews.llvm.org/D48439
llvm-svn: 335733
2018-06-27 21:57:52 +08:00
|
|
|
{ ARM::VLD2DUPq16EvenPseudo, ARM::VLD2DUPd16x2, true, false, false, EvenDblSpc, 2, 4 ,false},
|
|
|
|
{ ARM::VLD2DUPq16OddPseudo, ARM::VLD2DUPd16x2, true, false, false, OddDblSpc, 2, 4 ,false},
|
|
|
|
{ ARM::VLD2DUPq32EvenPseudo, ARM::VLD2DUPd32x2, true, false, false, EvenDblSpc, 2, 2 ,false},
|
|
|
|
{ ARM::VLD2DUPq32OddPseudo, ARM::VLD2DUPd32x2, true, false, false, OddDblSpc, 2, 2 ,false},
|
|
|
|
{ ARM::VLD2DUPq8EvenPseudo, ARM::VLD2DUPd8x2, true, false, false, EvenDblSpc, 2, 8 ,false},
|
|
|
|
{ ARM::VLD2DUPq8OddPseudo, ARM::VLD2DUPd8x2, true, false, false, OddDblSpc, 2, 8 ,false},
|
|
|
|
|
2011-11-01 03:11:23 +08:00
|
|
|
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
|
|
|
|
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
|
|
|
|
{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
|
|
|
|
{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true},
|
|
|
|
{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true},
|
|
|
|
{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true},
|
|
|
|
{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true},
|
|
|
|
{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true},
|
|
|
|
{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
|
|
|
|
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
|
|
|
|
|
|
|
|
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
|
2011-12-10 05:28:25 +08:00
|
|
|
{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
|
|
|
|
{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
|
2011-11-01 03:11:23 +08:00
|
|
|
{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
|
2011-12-10 05:28:25 +08:00
|
|
|
{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
|
|
|
|
{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
|
2011-11-01 03:11:23 +08:00
|
|
|
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
|
2011-12-10 05:28:25 +08:00
|
|
|
{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
|
|
|
|
{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
|
2011-11-01 03:11:23 +08:00
|
|
|
|
|
|
|
{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
|
|
|
|
{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
|
|
|
|
{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true},
|
|
|
|
{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
|
|
|
|
{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
|
|
|
|
{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
|
[NEON] Support vldNq intrinsics in AArch32 (LLVM part)
This patch adds support for the q versions of the dup
(load-to-all-lanes) NEON intrinsics, such as vld2q_dup_f16() for
example.
Currently, non-q versions of the dup intrinsics are implemented
in clang by generating IR that first loads the elements of the
structure into the first lane with the lane (to-single-lane)
intrinsics, and then propagating it other lanes. There are at
least two problems with this approach. First, there are no
double-spaced to-single-lane byte-element instructions. For
example, there is no such instruction as 'vld2.8 { d0[0], d2[0]
}, [r0]'. That means we cannot rely on the to-single-lane
intrinsics and instructions to implement the q versions of the
dup intrinsics. Note that to-all-lanes instructions do support
all sizes of data items, including bytes.
The second problem with the current approach is that we need a
separate vdup instruction to propagate the structure to each
lane. So for vld4q_dup_f16() we would need four vdup instructions
in addition to the initial vld instruction.
This patch introduces dup LLVM intrinsics and reworks handling of
the currently supported (non-q) NEON dup intrinsics to expand
them into those LLVM intrinsics, thus eliminating the need for
using to-single-lane intrinsics and instructions.
Additionally, this patch adds support for u64 and s64 dup NEON
intrinsics. These are marked as Arch64-only in the ARM NEON
Reference, but it seems there are no reasons to not support them
in AArch32 mode. Please correct, if that is wrong.
That's what we generate with this patch applied:
vld2q_dup_f16:
vld2.16 {d0[], d2[]}, [r0]
vld2.16 {d1[], d3[]}, [r0]
vld3q_dup_f16:
vld3.16 {d0[], d2[], d4[]}, [r0]
vld3.16 {d1[], d3[], d5[]}, [r0]
vld4q_dup_f16:
vld4.16 {d0[], d2[], d4[], d6[]}, [r0]
vld4.16 {d1[], d3[], d5[], d7[]}, [r0]
Differential Revision: https://reviews.llvm.org/D48439
llvm-svn: 335733
2018-06-27 21:57:52 +08:00
|
|
|
{ ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16, true, false, false, EvenDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3DUPq16OddPseudo, ARM::VLD3DUPq16, true, false, false, OddDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32, true, false, false, EvenDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3DUPq32OddPseudo, ARM::VLD3DUPq32, true, false, false, OddDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3DUPq8EvenPseudo, ARM::VLD3DUPq8, true, false, false, EvenDblSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VLD3DUPq8OddPseudo, ARM::VLD3DUPq8, true, false, false, OddDblSpc, 3, 8 ,true},
|
2011-11-01 03:11:23 +08:00
|
|
|
|
|
|
|
{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
|
|
|
|
|
|
|
|
{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
|
|
|
|
|
|
|
|
{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
|
|
|
|
|
|
|
|
{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true},
|
|
|
|
{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true},
|
|
|
|
{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true},
|
|
|
|
{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
|
|
|
|
{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
|
|
|
|
{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
|
[NEON] Support vldNq intrinsics in AArch32 (LLVM part)
This patch adds support for the q versions of the dup
(load-to-all-lanes) NEON intrinsics, such as vld2q_dup_f16() for
example.
Currently, non-q versions of the dup intrinsics are implemented
in clang by generating IR that first loads the elements of the
structure into the first lane with the lane (to-single-lane)
intrinsics, and then propagating it other lanes. There are at
least two problems with this approach. First, there are no
double-spaced to-single-lane byte-element instructions. For
example, there is no such instruction as 'vld2.8 { d0[0], d2[0]
}, [r0]'. That means we cannot rely on the to-single-lane
intrinsics and instructions to implement the q versions of the
dup intrinsics. Note that to-all-lanes instructions do support
all sizes of data items, including bytes.
The second problem with the current approach is that we need a
separate vdup instruction to propagate the structure to each
lane. So for vld4q_dup_f16() we would need four vdup instructions
in addition to the initial vld instruction.
This patch introduces dup LLVM intrinsics and reworks handling of
the currently supported (non-q) NEON dup intrinsics to expand
them into those LLVM intrinsics, thus eliminating the need for
using to-single-lane intrinsics and instructions.
Additionally, this patch adds support for u64 and s64 dup NEON
intrinsics. These are marked as Arch64-only in the ARM NEON
Reference, but it seems there are no reasons to not support them
in AArch32 mode. Please correct, if that is wrong.
That's what we generate with this patch applied:
vld2q_dup_f16:
vld2.16 {d0[], d2[]}, [r0]
vld2.16 {d1[], d3[]}, [r0]
vld3q_dup_f16:
vld3.16 {d0[], d2[], d4[]}, [r0]
vld3.16 {d1[], d3[], d5[]}, [r0]
vld4q_dup_f16:
vld4.16 {d0[], d2[], d4[], d6[]}, [r0]
vld4.16 {d1[], d3[], d5[], d7[]}, [r0]
Differential Revision: https://reviews.llvm.org/D48439
llvm-svn: 335733
2018-06-27 21:57:52 +08:00
|
|
|
{ ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16, true, false, false, EvenDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4DUPq16OddPseudo, ARM::VLD4DUPq16, true, false, false, OddDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32, true, false, false, EvenDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4DUPq32OddPseudo, ARM::VLD4DUPq32, true, false, false, OddDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4DUPq8EvenPseudo, ARM::VLD4DUPq8, true, false, false, EvenDblSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VLD4DUPq8OddPseudo, ARM::VLD4DUPq8, true, false, false, OddDblSpc, 4, 8 ,true},
|
2011-11-01 03:11:23 +08:00
|
|
|
|
|
|
|
{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
|
|
|
|
|
|
|
|
{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
|
|
|
|
|
|
|
|
{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
|
|
|
|
|
|
|
|
{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true},
|
|
|
|
{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true},
|
|
|
|
{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true},
|
|
|
|
{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true},
|
|
|
|
{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
|
|
|
|
{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
|
|
|
|
|
2018-06-10 17:27:27 +08:00
|
|
|
{ ARM::VST1d16QPseudo, ARM::VST1d16Q, false, false, false, SingleSpc, 4, 4 ,false},
|
|
|
|
{ ARM::VST1d16TPseudo, ARM::VST1d16T, false, false, false, SingleSpc, 3, 4 ,false},
|
|
|
|
{ ARM::VST1d32QPseudo, ARM::VST1d32Q, false, false, false, SingleSpc, 4, 2 ,false},
|
|
|
|
{ ARM::VST1d32TPseudo, ARM::VST1d32T, false, false, false, SingleSpc, 3, 2 ,false},
|
2011-11-30 06:58:48 +08:00
|
|
|
{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
|
|
|
|
{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
|
|
|
|
{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
|
2011-11-30 06:38:04 +08:00
|
|
|
{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
|
|
|
|
{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
|
|
|
|
{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
|
2018-06-10 17:27:27 +08:00
|
|
|
{ ARM::VST1d8QPseudo, ARM::VST1d8Q, false, false, false, SingleSpc, 4, 8 ,false},
|
|
|
|
{ ARM::VST1d8TPseudo, ARM::VST1d8T, false, false, false, SingleSpc, 3, 8 ,false},
|
|
|
|
{ ARM::VST1q16HighQPseudo, ARM::VST1d16Q, false, false, false, SingleHighQSpc, 4, 4 ,false},
|
|
|
|
{ ARM::VST1q16HighTPseudo, ARM::VST1d16T, false, false, false, SingleHighTSpc, 3, 4 ,false},
|
|
|
|
{ ARM::VST1q16LowQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleLowSpc, 4, 4 ,false},
|
|
|
|
{ ARM::VST1q16LowTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleLowSpc, 3, 4 ,false},
|
|
|
|
{ ARM::VST1q32HighQPseudo, ARM::VST1d32Q, false, false, false, SingleHighQSpc, 4, 2 ,false},
|
|
|
|
{ ARM::VST1q32HighTPseudo, ARM::VST1d32T, false, false, false, SingleHighTSpc, 3, 2 ,false},
|
|
|
|
{ ARM::VST1q32LowQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleLowSpc, 4, 2 ,false},
|
|
|
|
{ ARM::VST1q32LowTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleLowSpc, 3, 2 ,false},
|
|
|
|
{ ARM::VST1q64HighQPseudo, ARM::VST1d64Q, false, false, false, SingleHighQSpc, 4, 1 ,false},
|
|
|
|
{ ARM::VST1q64HighTPseudo, ARM::VST1d64T, false, false, false, SingleHighTSpc, 3, 1 ,false},
|
|
|
|
{ ARM::VST1q64LowQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleLowSpc, 4, 1 ,false},
|
|
|
|
{ ARM::VST1q64LowTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleLowSpc, 3, 1 ,false},
|
|
|
|
{ ARM::VST1q8HighQPseudo, ARM::VST1d8Q, false, false, false, SingleHighQSpc, 4, 8 ,false},
|
|
|
|
{ ARM::VST1q8HighTPseudo, ARM::VST1d8T, false, false, false, SingleHighTSpc, 3, 8 ,false},
|
|
|
|
{ ARM::VST1q8LowQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleLowSpc, 4, 8 ,false},
|
|
|
|
{ ARM::VST1q8LowTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleLowSpc, 3, 8 ,false},
|
2011-11-01 03:11:23 +08:00
|
|
|
|
|
|
|
{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
|
|
|
|
{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
|
|
|
|
{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
|
|
|
|
{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
|
|
|
|
{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true},
|
|
|
|
{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
|
|
|
|
{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true},
|
|
|
|
{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true},
|
|
|
|
{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
|
|
|
|
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
|
|
|
|
|
2011-12-15 03:35:22 +08:00
|
|
|
{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
|
2011-12-15 05:32:11 +08:00
|
|
|
{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
|
|
|
|
{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
|
2011-12-15 03:35:22 +08:00
|
|
|
{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
|
2011-12-15 05:32:11 +08:00
|
|
|
{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
|
|
|
|
{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
|
2011-12-15 03:35:22 +08:00
|
|
|
{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
|
2011-12-15 05:32:11 +08:00
|
|
|
{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
|
|
|
|
{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
|
2011-11-01 03:11:23 +08:00
|
|
|
|
|
|
|
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true},
|
|
|
|
{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true},
|
|
|
|
{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true},
|
|
|
|
{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true},
|
|
|
|
|
|
|
|
{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
|
|
|
|
|
|
|
|
{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true},
|
|
|
|
{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true},
|
|
|
|
{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true},
|
|
|
|
{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true},
|
|
|
|
|
|
|
|
{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true},
|
|
|
|
{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true},
|
|
|
|
{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true},
|
|
|
|
{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true},
|
|
|
|
|
|
|
|
{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
|
|
|
|
|
|
|
|
{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true},
|
|
|
|
{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true},
|
|
|
|
{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true},
|
|
|
|
{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true}
|
2010-09-14 07:01:35 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
|
|
|
|
/// load or store pseudo instruction.
|
|
|
|
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
|
|
|
|
#ifndef NDEBUG
|
|
|
|
// Make sure the table is sorted.
|
2018-06-28 18:03:45 +08:00
|
|
|
static std::atomic<bool> TableChecked(false);
|
|
|
|
if (!TableChecked.load(std::memory_order_relaxed)) {
|
2015-10-18 00:37:11 +08:00
|
|
|
assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) &&
|
|
|
|
"NEONLdStTable is not sorted!");
|
2018-06-28 18:24:38 +08:00
|
|
|
TableChecked.store(true, std::memory_order_relaxed);
|
2010-09-14 07:01:35 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2019-06-21 13:40:31 +08:00
|
|
|
auto I = llvm::lower_bound(NEONLdStTable, Opcode);
|
2015-10-18 00:37:11 +08:00
|
|
|
if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
|
2010-09-14 07:01:35 +08:00
|
|
|
return I;
|
2014-04-25 13:30:21 +08:00
|
|
|
return nullptr;
|
2010-09-14 07:01:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
|
|
|
|
/// corresponding to the specified register spacing. Not all of the results
|
|
|
|
/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
|
|
|
|
static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
|
|
|
|
const TargetRegisterInfo *TRI, unsigned &D0,
|
|
|
|
unsigned &D1, unsigned &D2, unsigned &D3) {
|
2018-06-03 00:40:03 +08:00
|
|
|
if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) {
|
2010-09-14 07:01:35 +08:00
|
|
|
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
|
|
|
|
D1 = TRI->getSubReg(Reg, ARM::dsub_1);
|
|
|
|
D2 = TRI->getSubReg(Reg, ARM::dsub_2);
|
|
|
|
D3 = TRI->getSubReg(Reg, ARM::dsub_3);
|
2018-06-03 00:40:03 +08:00
|
|
|
} else if (RegSpc == SingleHighQSpc) {
|
|
|
|
D0 = TRI->getSubReg(Reg, ARM::dsub_4);
|
|
|
|
D1 = TRI->getSubReg(Reg, ARM::dsub_5);
|
|
|
|
D2 = TRI->getSubReg(Reg, ARM::dsub_6);
|
|
|
|
D3 = TRI->getSubReg(Reg, ARM::dsub_7);
|
|
|
|
} else if (RegSpc == SingleHighTSpc) {
|
|
|
|
D0 = TRI->getSubReg(Reg, ARM::dsub_3);
|
|
|
|
D1 = TRI->getSubReg(Reg, ARM::dsub_4);
|
|
|
|
D2 = TRI->getSubReg(Reg, ARM::dsub_5);
|
|
|
|
D3 = TRI->getSubReg(Reg, ARM::dsub_6);
|
2010-09-14 07:01:35 +08:00
|
|
|
} else if (RegSpc == EvenDblSpc) {
|
|
|
|
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
|
|
|
|
D1 = TRI->getSubReg(Reg, ARM::dsub_2);
|
|
|
|
D2 = TRI->getSubReg(Reg, ARM::dsub_4);
|
|
|
|
D3 = TRI->getSubReg(Reg, ARM::dsub_6);
|
|
|
|
} else {
|
|
|
|
assert(RegSpc == OddDblSpc && "unknown register spacing");
|
|
|
|
D0 = TRI->getSubReg(Reg, ARM::dsub_1);
|
|
|
|
D1 = TRI->getSubReg(Reg, ARM::dsub_3);
|
|
|
|
D2 = TRI->getSubReg(Reg, ARM::dsub_5);
|
|
|
|
D3 = TRI->getSubReg(Reg, ARM::dsub_7);
|
2010-09-14 07:55:10 +08:00
|
|
|
}
|
2010-09-14 07:01:35 +08:00
|
|
|
}
|
|
|
|
|
2010-09-03 00:17:29 +08:00
|
|
|
/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
|
|
|
|
/// operands to real VLD instructions with D register operands.
|
2010-09-14 07:01:35 +08:00
|
|
|
void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
|
2010-09-03 00:00:54 +08:00
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
|
2010-09-03 00:00:54 +08:00
|
|
|
|
2010-09-14 07:01:35 +08:00
|
|
|
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
|
|
|
|
assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
|
2012-09-20 14:14:08 +08:00
|
|
|
NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
|
2010-09-14 07:01:35 +08:00
|
|
|
unsigned NumRegs = TableEntry->NumRegs;
|
|
|
|
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
|
|
|
TII->get(TableEntry->RealOpc));
|
2010-09-03 00:00:54 +08:00
|
|
|
unsigned OpIdx = 0;
|
|
|
|
|
|
|
|
bool DstIsDead = MI.getOperand(OpIdx).isDead();
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DstReg = MI.getOperand(OpIdx++).getReg();
|
[NEON] Support vldNq intrinsics in AArch32 (LLVM part)
This patch adds support for the q versions of the dup
(load-to-all-lanes) NEON intrinsics, such as vld2q_dup_f16() for
example.
Currently, non-q versions of the dup intrinsics are implemented
in clang by generating IR that first loads the elements of the
structure into the first lane with the lane (to-single-lane)
intrinsics, and then propagating it other lanes. There are at
least two problems with this approach. First, there are no
double-spaced to-single-lane byte-element instructions. For
example, there is no such instruction as 'vld2.8 { d0[0], d2[0]
}, [r0]'. That means we cannot rely on the to-single-lane
intrinsics and instructions to implement the q versions of the
dup intrinsics. Note that to-all-lanes instructions do support
all sizes of data items, including bytes.
The second problem with the current approach is that we need a
separate vdup instruction to propagate the structure to each
lane. So for vld4q_dup_f16() we would need four vdup instructions
in addition to the initial vld instruction.
This patch introduces dup LLVM intrinsics and reworks handling of
the currently supported (non-q) NEON dup intrinsics to expand
them into those LLVM intrinsics, thus eliminating the need for
using to-single-lane intrinsics and instructions.
Additionally, this patch adds support for u64 and s64 dup NEON
intrinsics. These are marked as Arch64-only in the ARM NEON
Reference, but it seems there are no reasons to not support them
in AArch32 mode. Please correct, if that is wrong.
That's what we generate with this patch applied:
vld2q_dup_f16:
vld2.16 {d0[], d2[]}, [r0]
vld2.16 {d1[], d3[]}, [r0]
vld3q_dup_f16:
vld3.16 {d0[], d2[], d4[]}, [r0]
vld3.16 {d1[], d3[], d5[]}, [r0]
vld4q_dup_f16:
vld4.16 {d0[], d2[], d4[], d6[]}, [r0]
vld4.16 {d1[], d3[], d5[], d7[]}, [r0]
Differential Revision: https://reviews.llvm.org/D48439
llvm-svn: 335733
2018-06-27 21:57:52 +08:00
|
|
|
if(TableEntry->RealOpc == ARM::VLD2DUPd8x2 ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD2DUPd16x2 ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD2DUPd32x2) {
|
|
|
|
unsigned SubRegIndex;
|
|
|
|
if (RegSpc == EvenDblSpc) {
|
|
|
|
SubRegIndex = ARM::dsub_0;
|
|
|
|
} else {
|
|
|
|
assert(RegSpc == OddDblSpc && "Unexpected spacing!");
|
|
|
|
SubRegIndex = ARM::dsub_1;
|
|
|
|
}
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register SubReg = TRI->getSubReg(DstReg, SubRegIndex);
|
[NEON] Support vldNq intrinsics in AArch32 (LLVM part)
This patch adds support for the q versions of the dup
(load-to-all-lanes) NEON intrinsics, such as vld2q_dup_f16() for
example.
Currently, non-q versions of the dup intrinsics are implemented
in clang by generating IR that first loads the elements of the
structure into the first lane with the lane (to-single-lane)
intrinsics, and then propagating it other lanes. There are at
least two problems with this approach. First, there are no
double-spaced to-single-lane byte-element instructions. For
example, there is no such instruction as 'vld2.8 { d0[0], d2[0]
}, [r0]'. That means we cannot rely on the to-single-lane
intrinsics and instructions to implement the q versions of the
dup intrinsics. Note that to-all-lanes instructions do support
all sizes of data items, including bytes.
The second problem with the current approach is that we need a
separate vdup instruction to propagate the structure to each
lane. So for vld4q_dup_f16() we would need four vdup instructions
in addition to the initial vld instruction.
This patch introduces dup LLVM intrinsics and reworks handling of
the currently supported (non-q) NEON dup intrinsics to expand
them into those LLVM intrinsics, thus eliminating the need for
using to-single-lane intrinsics and instructions.
Additionally, this patch adds support for u64 and s64 dup NEON
intrinsics. These are marked as Arch64-only in the ARM NEON
Reference, but it seems there are no reasons to not support them
in AArch32 mode. Please correct, if that is wrong.
That's what we generate with this patch applied:
vld2q_dup_f16:
vld2.16 {d0[], d2[]}, [r0]
vld2.16 {d1[], d3[]}, [r0]
vld3q_dup_f16:
vld3.16 {d0[], d2[], d4[]}, [r0]
vld3.16 {d1[], d3[], d5[]}, [r0]
vld4q_dup_f16:
vld4.16 {d0[], d2[], d4[], d6[]}, [r0]
vld4.16 {d1[], d3[], d5[], d7[]}, [r0]
Differential Revision: https://reviews.llvm.org/D48439
llvm-svn: 335733
2018-06-27 21:57:52 +08:00
|
|
|
unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0,
|
|
|
|
&ARM::DPairSpcRegClass);
|
|
|
|
MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead));
|
|
|
|
} else {
|
|
|
|
unsigned D0, D1, D2, D3;
|
|
|
|
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
|
|
|
|
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
|
|
|
|
if (NumRegs > 1 && TableEntry->copyAllListRegs)
|
|
|
|
MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
|
|
|
|
if (NumRegs > 2 && TableEntry->copyAllListRegs)
|
|
|
|
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
|
|
|
|
if (NumRegs > 3 && TableEntry->copyAllListRegs)
|
|
|
|
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
|
|
|
|
}
|
2010-09-03 00:00:54 +08:00
|
|
|
|
2011-11-01 03:11:23 +08:00
|
|
|
if (TableEntry->isUpdating)
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-09 08:15:32 +08:00
|
|
|
|
2010-09-03 00:00:54 +08:00
|
|
|
// Copy the addrmode6 operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2018-06-03 00:40:03 +08:00
|
|
|
|
2010-09-09 08:15:32 +08:00
|
|
|
// Copy the am6offset operand.
|
2018-06-03 00:40:03 +08:00
|
|
|
if (TableEntry->hasWritebackOperand) {
|
|
|
|
// TODO: The writing-back pseudo instructions we translate here are all
|
|
|
|
// defined to take am6offset nodes that are capable to represent both fixed
|
|
|
|
// and register forms. Some real instructions, however, do not rely on
|
|
|
|
// am6offset and have separate definitions for such forms. When this is the
|
|
|
|
// case, fixed forms do not take any offset nodes, so here we skip them for
|
2018-06-10 17:27:27 +08:00
|
|
|
// such instructions. Once all real and pseudo writing-back instructions are
|
2018-06-03 00:40:03 +08:00
|
|
|
// rewritten without use of am6offset nodes, this code will go away.
|
|
|
|
const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
|
|
|
|
if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD1d8Twb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD1d16Twb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD1d32Twb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VLD1d64Twb_fixed) {
|
|
|
|
assert(AM6Offset.getReg() == 0 &&
|
2018-06-10 17:27:27 +08:00
|
|
|
"A fixed writing-back pseudo instruction provides an offset "
|
2018-06-03 00:40:03 +08:00
|
|
|
"register!");
|
|
|
|
} else {
|
|
|
|
MIB.add(AM6Offset);
|
|
|
|
}
|
|
|
|
}
|
2010-09-03 00:00:54 +08:00
|
|
|
|
2010-09-09 08:38:32 +08:00
|
|
|
// For an instruction writing double-spaced subregs, the pseudo instruction
|
2010-09-16 12:25:37 +08:00
|
|
|
// has an extra operand that is a use of the super-register. Record the
|
|
|
|
// operand index and skip over it.
|
|
|
|
unsigned SrcOpIdx = 0;
|
[NEON] Support vldNq intrinsics in AArch32 (LLVM part)
This patch adds support for the q versions of the dup
(load-to-all-lanes) NEON intrinsics, such as vld2q_dup_f16() for
example.
Currently, non-q versions of the dup intrinsics are implemented
in clang by generating IR that first loads the elements of the
structure into the first lane with the lane (to-single-lane)
intrinsics, and then propagating it other lanes. There are at
least two problems with this approach. First, there are no
double-spaced to-single-lane byte-element instructions. For
example, there is no such instruction as 'vld2.8 { d0[0], d2[0]
}, [r0]'. That means we cannot rely on the to-single-lane
intrinsics and instructions to implement the q versions of the
dup intrinsics. Note that to-all-lanes instructions do support
all sizes of data items, including bytes.
The second problem with the current approach is that we need a
separate vdup instruction to propagate the structure to each
lane. So for vld4q_dup_f16() we would need four vdup instructions
in addition to the initial vld instruction.
This patch introduces dup LLVM intrinsics and reworks handling of
the currently supported (non-q) NEON dup intrinsics to expand
them into those LLVM intrinsics, thus eliminating the need for
using to-single-lane intrinsics and instructions.
Additionally, this patch adds support for u64 and s64 dup NEON
intrinsics. These are marked as Arch64-only in the ARM NEON
Reference, but it seems there are no reasons to not support them
in AArch32 mode. Please correct, if that is wrong.
That's what we generate with this patch applied:
vld2q_dup_f16:
vld2.16 {d0[], d2[]}, [r0]
vld2.16 {d1[], d3[]}, [r0]
vld3q_dup_f16:
vld3.16 {d0[], d2[], d4[]}, [r0]
vld3.16 {d1[], d3[], d5[]}, [r0]
vld4q_dup_f16:
vld4.16 {d0[], d2[], d4[], d6[]}, [r0]
vld4.16 {d1[], d3[], d5[], d7[]}, [r0]
Differential Revision: https://reviews.llvm.org/D48439
llvm-svn: 335733
2018-06-27 21:57:52 +08:00
|
|
|
if(TableEntry->RealOpc != ARM::VLD2DUPd8x2 &&
|
|
|
|
TableEntry->RealOpc != ARM::VLD2DUPd16x2 &&
|
|
|
|
TableEntry->RealOpc != ARM::VLD2DUPd32x2) {
|
|
|
|
if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
|
|
|
|
RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
|
|
|
|
RegSpc == SingleHighTSpc)
|
|
|
|
SrcOpIdx = OpIdx++;
|
|
|
|
}
|
2010-09-16 12:25:37 +08:00
|
|
|
|
|
|
|
// Copy the predicate operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-16 12:25:37 +08:00
|
|
|
|
|
|
|
// Copy the super-register source operand used for double-spaced subregs over
|
2010-09-09 08:38:32 +08:00
|
|
|
// to the new instruction as an implicit operand.
|
2010-09-16 12:25:37 +08:00
|
|
|
if (SrcOpIdx != 0) {
|
|
|
|
MachineOperand MO = MI.getOperand(SrcOpIdx);
|
2010-09-09 08:38:32 +08:00
|
|
|
MO.setImplicit(true);
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MO);
|
2010-09-09 08:38:32 +08:00
|
|
|
}
|
2010-09-04 02:16:02 +08:00
|
|
|
// Add an implicit def for the super-register.
|
|
|
|
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
|
2010-09-09 08:38:32 +08:00
|
|
|
TransferImpOps(MI, MIB, MIB);
|
2011-04-19 08:04:03 +08:00
|
|
|
|
|
|
|
// Transfer memoperands.
|
2018-08-17 05:30:05 +08:00
|
|
|
MIB.cloneMemRefs(MI);
|
2010-09-03 00:00:54 +08:00
|
|
|
MI.eraseFromParent();
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
|
2010-09-03 00:00:54 +08:00
|
|
|
}
|
|
|
|
|
2010-08-27 02:51:29 +08:00
|
|
|
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
|
|
|
|
/// operands to real VST instructions with D register operands.
|
2010-09-14 07:01:35 +08:00
|
|
|
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
|
2010-08-26 07:27:42 +08:00
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
|
2010-08-26 07:27:42 +08:00
|
|
|
|
2010-09-14 07:01:35 +08:00
|
|
|
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
|
|
|
|
assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
|
2012-09-20 14:14:08 +08:00
|
|
|
NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
|
2010-09-14 07:01:35 +08:00
|
|
|
unsigned NumRegs = TableEntry->NumRegs;
|
|
|
|
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
|
|
|
TII->get(TableEntry->RealOpc));
|
2010-08-26 07:27:42 +08:00
|
|
|
unsigned OpIdx = 0;
|
2011-11-01 03:11:23 +08:00
|
|
|
if (TableEntry->isUpdating)
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-09 08:15:32 +08:00
|
|
|
|
2010-08-26 07:27:42 +08:00
|
|
|
// Copy the addrmode6 operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2018-06-10 17:27:27 +08:00
|
|
|
|
|
|
|
if (TableEntry->hasWritebackOperand) {
|
|
|
|
// TODO: The writing-back pseudo instructions we translate here are all
|
|
|
|
// defined to take am6offset nodes that are capable to represent both fixed
|
|
|
|
// and register forms. Some real instructions, however, do not rely on
|
|
|
|
// am6offset and have separate definitions for such forms. When this is the
|
|
|
|
// case, fixed forms do not take any offset nodes, so here we skip them for
|
|
|
|
// such instructions. Once all real and pseudo writing-back instructions are
|
|
|
|
// rewritten without use of am6offset nodes, this code will go away.
|
|
|
|
const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
|
|
|
|
if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VST1d16Qwb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VST1d32Qwb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VST1d64Qwb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VST1d8Twb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VST1d16Twb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VST1d32Twb_fixed ||
|
|
|
|
TableEntry->RealOpc == ARM::VST1d64Twb_fixed) {
|
|
|
|
assert(AM6Offset.getReg() == 0 &&
|
|
|
|
"A fixed writing-back pseudo instruction provides an offset "
|
|
|
|
"register!");
|
|
|
|
} else {
|
|
|
|
MIB.add(AM6Offset);
|
|
|
|
}
|
|
|
|
}
|
2010-08-26 07:27:42 +08:00
|
|
|
|
|
|
|
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
|
2012-06-16 01:46:54 +08:00
|
|
|
bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register SrcReg = MI.getOperand(OpIdx++).getReg();
|
2010-08-26 07:27:42 +08:00
|
|
|
unsigned D0, D1, D2, D3;
|
2010-09-14 07:01:35 +08:00
|
|
|
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
|
2012-06-16 01:46:54 +08:00
|
|
|
MIB.addReg(D0, getUndefRegState(SrcIsUndef));
|
2011-11-01 05:50:31 +08:00
|
|
|
if (NumRegs > 1 && TableEntry->copyAllListRegs)
|
2012-06-16 01:46:54 +08:00
|
|
|
MIB.addReg(D1, getUndefRegState(SrcIsUndef));
|
2011-11-01 05:50:31 +08:00
|
|
|
if (NumRegs > 2 && TableEntry->copyAllListRegs)
|
2012-06-16 01:46:54 +08:00
|
|
|
MIB.addReg(D2, getUndefRegState(SrcIsUndef));
|
2011-11-01 05:50:31 +08:00
|
|
|
if (NumRegs > 3 && TableEntry->copyAllListRegs)
|
2012-06-16 01:46:54 +08:00
|
|
|
MIB.addReg(D3, getUndefRegState(SrcIsUndef));
|
2010-09-16 12:25:37 +08:00
|
|
|
|
|
|
|
// Copy the predicate operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-16 12:25:37 +08:00
|
|
|
|
2012-06-16 01:46:54 +08:00
|
|
|
if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
|
2011-04-29 13:24:29 +08:00
|
|
|
MIB->addRegisterKilled(SrcReg, TRI, true);
|
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub
regs, which may cause post RA scheduler reorder instructions that
breakes anti dependency.
For example:
VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg
will be expanded to
VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg;
An instruction that defines %D20 may be scheduled before the store by
mistake.
This patches adds implicit uses for such case. For the example above, it
emits:
VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use>
llvm-svn: 199282
2014-01-15 09:32:12 +08:00
|
|
|
else if (!SrcIsUndef)
|
|
|
|
MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
|
2010-09-14 07:55:10 +08:00
|
|
|
TransferImpOps(MI, MIB, MIB);
|
2011-04-19 08:04:03 +08:00
|
|
|
|
|
|
|
// Transfer memoperands.
|
2018-08-17 05:30:05 +08:00
|
|
|
MIB.cloneMemRefs(MI);
|
2010-08-26 07:27:42 +08:00
|
|
|
MI.eraseFromParent();
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
|
2010-08-26 07:27:42 +08:00
|
|
|
}
|
|
|
|
|
2010-09-14 07:01:35 +08:00
|
|
|
/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
|
|
|
|
/// register operands to real instructions with D register operands.
|
|
|
|
void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
|
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
|
2010-09-14 07:01:35 +08:00
|
|
|
|
|
|
|
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
|
|
|
|
assert(TableEntry && "NEONLdStTable lookup failed");
|
2012-09-20 14:14:08 +08:00
|
|
|
NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
|
2010-09-14 07:01:35 +08:00
|
|
|
unsigned NumRegs = TableEntry->NumRegs;
|
|
|
|
unsigned RegElts = TableEntry->RegElts;
|
|
|
|
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
|
|
|
TII->get(TableEntry->RealOpc));
|
|
|
|
unsigned OpIdx = 0;
|
|
|
|
// The lane operand is always the 3rd from last operand, before the 2
|
|
|
|
// predicate operands.
|
|
|
|
unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
|
|
|
|
|
|
|
|
// Adjust the lane and spacing as needed for Q registers.
|
|
|
|
assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
|
|
|
|
if (RegSpc == EvenDblSpc && Lane >= RegElts) {
|
|
|
|
RegSpc = OddDblSpc;
|
|
|
|
Lane -= RegElts;
|
|
|
|
}
|
|
|
|
assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
|
|
|
|
|
2011-01-24 01:05:06 +08:00
|
|
|
unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
|
2010-09-15 05:12:05 +08:00
|
|
|
unsigned DstReg = 0;
|
|
|
|
bool DstIsDead = false;
|
2010-09-14 07:01:35 +08:00
|
|
|
if (TableEntry->IsLoad) {
|
|
|
|
DstIsDead = MI.getOperand(OpIdx).isDead();
|
|
|
|
DstReg = MI.getOperand(OpIdx++).getReg();
|
|
|
|
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
|
2010-11-02 06:04:05 +08:00
|
|
|
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
|
|
|
|
if (NumRegs > 1)
|
|
|
|
MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
|
2010-09-14 07:01:35 +08:00
|
|
|
if (NumRegs > 2)
|
|
|
|
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
|
|
|
|
if (NumRegs > 3)
|
|
|
|
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
|
|
|
|
}
|
|
|
|
|
2011-11-01 03:11:23 +08:00
|
|
|
if (TableEntry->isUpdating)
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-14 07:01:35 +08:00
|
|
|
|
|
|
|
// Copy the addrmode6 operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-14 07:01:35 +08:00
|
|
|
// Copy the am6offset operand.
|
2011-11-01 03:11:23 +08:00
|
|
|
if (TableEntry->hasWritebackOperand)
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-14 07:01:35 +08:00
|
|
|
|
|
|
|
// Grab the super-register source.
|
|
|
|
MachineOperand MO = MI.getOperand(OpIdx++);
|
|
|
|
if (!TableEntry->IsLoad)
|
|
|
|
GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
|
|
|
|
|
|
|
|
// Add the subregs as sources of the new instruction.
|
|
|
|
unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
|
|
|
|
getKillRegState(MO.isKill()));
|
2010-11-02 06:04:05 +08:00
|
|
|
MIB.addReg(D0, SrcFlags);
|
|
|
|
if (NumRegs > 1)
|
|
|
|
MIB.addReg(D1, SrcFlags);
|
2010-09-14 07:01:35 +08:00
|
|
|
if (NumRegs > 2)
|
|
|
|
MIB.addReg(D2, SrcFlags);
|
|
|
|
if (NumRegs > 3)
|
|
|
|
MIB.addReg(D3, SrcFlags);
|
|
|
|
|
|
|
|
// Add the lane number operand.
|
|
|
|
MIB.addImm(Lane);
|
2010-09-16 12:25:37 +08:00
|
|
|
OpIdx += 1;
|
|
|
|
|
|
|
|
// Copy the predicate operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-14 07:01:35 +08:00
|
|
|
|
|
|
|
// Copy the super-register source to be an implicit source.
|
|
|
|
MO.setImplicit(true);
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MO);
|
2010-09-14 07:01:35 +08:00
|
|
|
if (TableEntry->IsLoad)
|
|
|
|
// Add an implicit def for the super-register.
|
|
|
|
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
|
|
|
|
TransferImpOps(MI, MIB, MIB);
|
2011-12-17 08:07:02 +08:00
|
|
|
// Transfer memoperands.
|
2018-08-17 05:30:05 +08:00
|
|
|
MIB.cloneMemRefs(MI);
|
2010-09-14 07:01:35 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2010-09-14 07:55:10 +08:00
|
|
|
/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
|
|
|
|
/// register operands to real instructions with D register operands.
|
|
|
|
void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
|
2011-12-16 06:27:11 +08:00
|
|
|
unsigned Opc, bool IsExt) {
|
2010-09-14 07:55:10 +08:00
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
|
2010-09-14 07:55:10 +08:00
|
|
|
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
|
|
|
|
unsigned OpIdx = 0;
|
|
|
|
|
|
|
|
// Transfer the destination register operand.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2017-12-13 01:53:59 +08:00
|
|
|
if (IsExt) {
|
|
|
|
MachineOperand VdSrc(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(VdSrc);
|
|
|
|
}
|
2010-09-14 07:55:10 +08:00
|
|
|
|
|
|
|
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register SrcReg = MI.getOperand(OpIdx++).getReg();
|
2010-09-14 07:55:10 +08:00
|
|
|
unsigned D0, D1, D2, D3;
|
|
|
|
GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
|
2011-12-16 06:27:11 +08:00
|
|
|
MIB.addReg(D0);
|
2010-09-14 07:55:10 +08:00
|
|
|
|
|
|
|
// Copy the other source register operand.
|
2017-12-13 01:53:59 +08:00
|
|
|
MachineOperand VmSrc(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(VmSrc);
|
2010-09-16 12:25:37 +08:00
|
|
|
|
|
|
|
// Copy the predicate operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-09-14 07:55:10 +08:00
|
|
|
|
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub
regs, which may cause post RA scheduler reorder instructions that
breakes anti dependency.
For example:
VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg
will be expanded to
VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg;
An instruction that defines %D20 may be scheduled before the store by
mistake.
This patches adds implicit uses for such case. For the example above, it
emits:
VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use>
llvm-svn: 199282
2014-01-15 09:32:12 +08:00
|
|
|
// Add an implicit kill and use for the super-reg.
|
|
|
|
MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
|
2010-09-14 07:55:10 +08:00
|
|
|
TransferImpOps(MI, MIB, MIB);
|
|
|
|
MI.eraseFromParent();
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
|
2010-09-14 07:55:10 +08:00
|
|
|
}
|
|
|
|
|
2014-04-30 12:54:58 +08:00
|
|
|
static bool IsAnAddressOperand(const MachineOperand &MO) {
|
|
|
|
// This check is overly conservative. Unless we are certain that the machine
|
|
|
|
// operand is not a symbol reference, we return that it is a symbol reference.
|
|
|
|
// This is important as the load pair may not be split up Windows.
|
|
|
|
switch (MO.getType()) {
|
|
|
|
case MachineOperand::MO_Register:
|
|
|
|
case MachineOperand::MO_Immediate:
|
|
|
|
case MachineOperand::MO_CImmediate:
|
|
|
|
case MachineOperand::MO_FPImmediate:
|
2019-08-13 23:34:38 +08:00
|
|
|
case MachineOperand::MO_ShuffleMask:
|
2014-04-30 12:54:58 +08:00
|
|
|
return false;
|
|
|
|
case MachineOperand::MO_MachineBasicBlock:
|
|
|
|
return true;
|
|
|
|
case MachineOperand::MO_FrameIndex:
|
|
|
|
return false;
|
|
|
|
case MachineOperand::MO_ConstantPoolIndex:
|
|
|
|
case MachineOperand::MO_TargetIndex:
|
|
|
|
case MachineOperand::MO_JumpTableIndex:
|
|
|
|
case MachineOperand::MO_ExternalSymbol:
|
|
|
|
case MachineOperand::MO_GlobalAddress:
|
|
|
|
case MachineOperand::MO_BlockAddress:
|
|
|
|
return true;
|
|
|
|
case MachineOperand::MO_RegisterMask:
|
|
|
|
case MachineOperand::MO_RegisterLiveOut:
|
|
|
|
return false;
|
|
|
|
case MachineOperand::MO_Metadata:
|
|
|
|
case MachineOperand::MO_MCSymbol:
|
|
|
|
return true;
|
|
|
|
case MachineOperand::MO_CFIIndex:
|
|
|
|
return false;
|
2016-07-30 04:32:59 +08:00
|
|
|
case MachineOperand::MO_IntrinsicID:
|
2016-08-18 04:25:25 +08:00
|
|
|
case MachineOperand::MO_Predicate:
|
2016-07-30 04:32:59 +08:00
|
|
|
llvm_unreachable("should not exist post-isel");
|
2014-04-30 12:54:58 +08:00
|
|
|
}
|
2014-04-30 13:12:41 +08:00
|
|
|
llvm_unreachable("unhandled machine operand type");
|
2014-04-30 12:54:58 +08:00
|
|
|
}
|
|
|
|
|
2017-09-06 06:54:06 +08:00
|
|
|
static MachineOperand makeImplicit(const MachineOperand &MO) {
|
|
|
|
MachineOperand NewMO = MO;
|
|
|
|
NewMO.setImplicit();
|
|
|
|
return NewMO;
|
|
|
|
}
|
|
|
|
|
2011-01-20 16:34:58 +08:00
|
|
|
void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator &MBBI) {
|
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
unsigned Opcode = MI.getOpcode();
|
|
|
|
unsigned PredReg = 0;
|
2016-02-23 10:46:52 +08:00
|
|
|
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2011-01-20 16:34:58 +08:00
|
|
|
bool DstIsDead = MI.getOperand(0).isDead();
|
|
|
|
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
|
|
|
|
const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
|
2014-04-30 12:54:58 +08:00
|
|
|
bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
|
2011-01-20 16:34:58 +08:00
|
|
|
MachineInstrBuilder LO16, HI16;
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
|
2011-01-20 16:34:58 +08:00
|
|
|
|
|
|
|
if (!STI->hasV6T2Ops() &&
|
|
|
|
(Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
|
2014-04-30 12:54:58 +08:00
|
|
|
// FIXME Windows CE supports older ARM CPUs
|
|
|
|
assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
|
|
|
|
|
2011-01-20 16:34:58 +08:00
|
|
|
// Expand into a movi + orr.
|
|
|
|
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
|
|
|
|
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
|
|
|
|
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
|
|
|
.addReg(DstReg);
|
|
|
|
|
|
|
|
assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
|
|
|
|
unsigned ImmVal = (unsigned)MO.getImm();
|
|
|
|
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
|
|
|
|
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
|
|
|
|
LO16 = LO16.addImm(SOImmValV1);
|
|
|
|
HI16 = HI16.addImm(SOImmValV2);
|
2018-08-17 05:30:05 +08:00
|
|
|
LO16.cloneMemRefs(MI);
|
|
|
|
HI16.cloneMemRefs(MI);
|
2017-01-20 16:15:24 +08:00
|
|
|
LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
|
|
|
|
HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
|
2017-09-06 06:54:06 +08:00
|
|
|
if (isCC)
|
|
|
|
LO16.add(makeImplicit(MI.getOperand(1)));
|
2011-01-20 16:34:58 +08:00
|
|
|
TransferImpOps(MI, LO16, HI16);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return;
|
|
|
|
}
|
2009-11-07 07:52:48 +08:00
|
|
|
|
2011-01-20 16:34:58 +08:00
|
|
|
unsigned LO16Opc = 0;
|
|
|
|
unsigned HI16Opc = 0;
|
|
|
|
if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
|
|
|
|
LO16Opc = ARM::t2MOVi16;
|
|
|
|
HI16Opc = ARM::t2MOVTi16;
|
|
|
|
} else {
|
|
|
|
LO16Opc = ARM::MOVi16;
|
|
|
|
HI16Opc = ARM::MOVTi16;
|
|
|
|
}
|
2009-11-07 07:52:48 +08:00
|
|
|
|
2011-01-20 16:34:58 +08:00
|
|
|
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
|
|
|
|
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
|
|
|
|
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
|
|
|
.addReg(DstReg);
|
|
|
|
|
2014-05-01 12:19:56 +08:00
|
|
|
switch (MO.getType()) {
|
|
|
|
case MachineOperand::MO_Immediate: {
|
2011-01-20 16:34:58 +08:00
|
|
|
unsigned Imm = MO.getImm();
|
|
|
|
unsigned Lo16 = Imm & 0xffff;
|
|
|
|
unsigned Hi16 = (Imm >> 16) & 0xffff;
|
|
|
|
LO16 = LO16.addImm(Lo16);
|
|
|
|
HI16 = HI16.addImm(Hi16);
|
2014-05-01 12:19:56 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case MachineOperand::MO_ExternalSymbol: {
|
|
|
|
const char *ES = MO.getSymbolName();
|
|
|
|
unsigned TF = MO.getTargetFlags();
|
|
|
|
LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16);
|
|
|
|
HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default: {
|
2011-01-20 16:34:58 +08:00
|
|
|
const GlobalValue *GV = MO.getGlobal();
|
|
|
|
unsigned TF = MO.getTargetFlags();
|
|
|
|
LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
|
|
|
|
HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
|
2014-05-01 12:19:56 +08:00
|
|
|
break;
|
|
|
|
}
|
2011-01-20 16:34:58 +08:00
|
|
|
}
|
2010-08-26 07:27:42 +08:00
|
|
|
|
2018-08-17 05:30:05 +08:00
|
|
|
LO16.cloneMemRefs(MI);
|
|
|
|
HI16.cloneMemRefs(MI);
|
2011-01-20 16:34:58 +08:00
|
|
|
LO16.addImm(Pred).addReg(PredReg);
|
|
|
|
HI16.addImm(Pred).addReg(PredReg);
|
|
|
|
|
ARM: correct bundle generation for MOV32T relocations
Although the previous code would construct a bundle and add the correct elements
to it, it would not finalise the bundle. This resulted in the InternalRead
markers not being added to the MachineOperands nor, more importantly, the
externally visible defs to the bundle itself. So, although the bundle was not
exposing the def, the generated code would be correct because there was no
optimisations being performed. When optimisations were enabled, the post
register allocator would kick in, and the hazard recognizer would reorder
operations around the load which would define the value being operated upon.
Rather than manually constructing the bundle, simply construct and finalise the
bundle via the finaliseBundle call after both MIs have been emitted. This
improves the code generation with optimisations where IMAGE_REL_ARM_MOV32T
relocations are emitted.
The changes to the other tests are the result of the bundle generation
preventing the scheduler from hoisting the moves across the loads. The net
effect of the generated code is equivalent, but, is much more identical to what
is actually being lowered.
llvm-svn: 209267
2014-05-21 09:25:24 +08:00
|
|
|
if (RequiresBundling)
|
2016-02-23 04:49:58 +08:00
|
|
|
finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
|
2014-04-30 12:54:58 +08:00
|
|
|
|
2017-09-06 06:54:06 +08:00
|
|
|
if (isCC)
|
|
|
|
LO16.add(makeImplicit(MI.getOperand(1)));
|
2011-01-20 16:34:58 +08:00
|
|
|
TransferImpOps(MI, LO16, HI16);
|
|
|
|
MI.eraseFromParent();
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump(););
|
|
|
|
LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
|
2011-01-20 16:34:58 +08:00
|
|
|
}
|
|
|
|
|
2016-04-19 05:48:55 +08:00
|
|
|
/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
|
2017-05-31 09:21:35 +08:00
|
|
|
/// possible. This only gets used at -O0 so we don't care about efficiency of
|
|
|
|
/// the generated code.
|
2016-04-19 05:48:55 +08:00
|
|
|
bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI,
|
|
|
|
unsigned LdrexOp, unsigned StrexOp,
|
|
|
|
unsigned UxtOp,
|
|
|
|
MachineBasicBlock::iterator &NextMBBI) {
|
|
|
|
bool IsThumb = STI->isThumb();
|
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
DebugLoc DL = MI.getDebugLoc();
|
2017-05-31 09:21:35 +08:00
|
|
|
const MachineOperand &Dest = MI.getOperand(0);
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register TempReg = MI.getOperand(1).getReg();
|
2017-05-31 09:21:35 +08:00
|
|
|
// Duplicating undef operands into 2 instructions does not guarantee the same
|
|
|
|
// value on both; However undef should be replaced by xzr anyway.
|
|
|
|
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register AddrReg = MI.getOperand(2).getReg();
|
|
|
|
Register DesiredReg = MI.getOperand(3).getReg();
|
|
|
|
Register NewReg = MI.getOperand(4).getReg();
|
2016-04-19 05:48:55 +08:00
|
|
|
|
|
|
|
MachineFunction *MF = MBB.getParent();
|
|
|
|
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
|
|
|
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
|
|
|
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
|
|
|
|
|
|
|
MF->insert(++MBB.getIterator(), LoadCmpBB);
|
|
|
|
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
|
|
|
|
MF->insert(++StoreBB->getIterator(), DoneBB);
|
|
|
|
|
|
|
|
if (UxtOp) {
|
|
|
|
MachineInstrBuilder MIB =
|
2017-05-31 09:21:35 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg)
|
|
|
|
.addReg(DesiredReg, RegState::Kill);
|
2016-04-19 05:48:55 +08:00
|
|
|
if (!IsThumb)
|
|
|
|
MIB.addImm(0);
|
2017-01-13 17:37:56 +08:00
|
|
|
MIB.add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// .Lloadcmp:
|
|
|
|
// ldrex rDest, [rAddr]
|
|
|
|
// cmp rDest, rDesired
|
|
|
|
// bne .Ldone
|
|
|
|
|
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
|
2017-05-31 09:21:35 +08:00
|
|
|
MIB.addReg(AddrReg);
|
2016-04-19 05:48:55 +08:00
|
|
|
if (LdrexOp == ARM::t2LDREX)
|
|
|
|
MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
|
2017-01-13 17:37:56 +08:00
|
|
|
MIB.add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
|
|
|
|
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
|
2017-01-13 17:37:56 +08:00
|
|
|
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
|
|
|
|
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
|
2017-05-31 09:21:35 +08:00
|
|
|
.addReg(DesiredReg)
|
2017-01-13 17:37:56 +08:00
|
|
|
.add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
|
|
|
|
BuildMI(LoadCmpBB, DL, TII->get(Bcc))
|
|
|
|
.addMBB(DoneBB)
|
|
|
|
.addImm(ARMCC::NE)
|
|
|
|
.addReg(ARM::CPSR, RegState::Kill);
|
|
|
|
LoadCmpBB->addSuccessor(DoneBB);
|
|
|
|
LoadCmpBB->addSuccessor(StoreBB);
|
|
|
|
|
|
|
|
// .Lstore:
|
2017-08-10 06:22:05 +08:00
|
|
|
// strex rTempReg, rNew, [rAddr]
|
|
|
|
// cmp rTempReg, #0
|
2016-04-19 05:48:55 +08:00
|
|
|
// bne .Lloadcmp
|
2017-08-10 06:22:05 +08:00
|
|
|
MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg)
|
2017-05-31 09:21:35 +08:00
|
|
|
.addReg(NewReg)
|
|
|
|
.addReg(AddrReg);
|
2016-04-19 05:48:55 +08:00
|
|
|
if (StrexOp == ARM::t2STREX)
|
|
|
|
MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
|
2017-01-13 17:37:56 +08:00
|
|
|
MIB.add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
|
|
|
|
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
|
2017-01-13 17:37:56 +08:00
|
|
|
BuildMI(StoreBB, DL, TII->get(CMPri))
|
2017-08-10 06:22:05 +08:00
|
|
|
.addReg(TempReg, RegState::Kill)
|
2017-01-13 17:37:56 +08:00
|
|
|
.addImm(0)
|
|
|
|
.add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
BuildMI(StoreBB, DL, TII->get(Bcc))
|
|
|
|
.addMBB(LoadCmpBB)
|
|
|
|
.addImm(ARMCC::NE)
|
|
|
|
.addReg(ARM::CPSR, RegState::Kill);
|
|
|
|
StoreBB->addSuccessor(LoadCmpBB);
|
|
|
|
StoreBB->addSuccessor(DoneBB);
|
|
|
|
|
|
|
|
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
|
|
|
|
DoneBB->transferSuccessors(&MBB);
|
|
|
|
|
2016-04-28 04:32:54 +08:00
|
|
|
MBB.addSuccessor(LoadCmpBB);
|
|
|
|
|
2016-04-19 05:48:55 +08:00
|
|
|
NextMBBI = MBB.end();
|
|
|
|
MI.eraseFromParent();
|
2017-05-31 09:21:35 +08:00
|
|
|
|
|
|
|
// Recompute livein lists.
|
|
|
|
LivePhysRegs LiveRegs;
|
2017-09-07 04:45:24 +08:00
|
|
|
computeAndAddLiveIns(LiveRegs, *DoneBB);
|
|
|
|
computeAndAddLiveIns(LiveRegs, *StoreBB);
|
|
|
|
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
|
2017-05-31 09:21:35 +08:00
|
|
|
// Do an extra pass around the loop to get loop carried registers right.
|
|
|
|
StoreBB->clearLiveIns();
|
2017-09-07 04:45:24 +08:00
|
|
|
computeAndAddLiveIns(LiveRegs, *StoreBB);
|
2017-05-31 09:21:35 +08:00
|
|
|
LoadCmpBB->clearLiveIns();
|
2017-09-07 04:45:24 +08:00
|
|
|
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
|
2017-05-31 09:21:35 +08:00
|
|
|
|
2016-04-19 05:48:55 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// ARM's ldrexd/strexd take a consecutive register pair (represented as a
|
|
|
|
/// single GPRPair register), Thumb's take two separate registers so we need to
|
|
|
|
/// extract the subregs from the pair.
|
|
|
|
static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
|
|
|
|
unsigned Flags, bool IsThumb,
|
|
|
|
const TargetRegisterInfo *TRI) {
|
|
|
|
if (IsThumb) {
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
|
|
|
|
Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
|
2018-11-03 02:22:15 +08:00
|
|
|
MIB.addReg(RegLo, Flags);
|
|
|
|
MIB.addReg(RegHi, Flags);
|
2016-04-19 05:48:55 +08:00
|
|
|
} else
|
2018-11-03 02:22:15 +08:00
|
|
|
MIB.addReg(Reg.getReg(), Flags);
|
2016-04-19 05:48:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
|
|
|
|
bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI,
|
|
|
|
MachineBasicBlock::iterator &NextMBBI) {
|
|
|
|
bool IsThumb = STI->isThumb();
|
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
DebugLoc DL = MI.getDebugLoc();
|
|
|
|
MachineOperand &Dest = MI.getOperand(0);
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register TempReg = MI.getOperand(1).getReg();
|
2017-05-31 09:21:35 +08:00
|
|
|
// Duplicating undef operands into 2 instructions does not guarantee the same
|
|
|
|
// value on both; However undef should be replaced by xzr anyway.
|
|
|
|
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register AddrReg = MI.getOperand(2).getReg();
|
|
|
|
Register DesiredReg = MI.getOperand(3).getReg();
|
2017-05-31 09:21:35 +08:00
|
|
|
MachineOperand New = MI.getOperand(4);
|
|
|
|
New.setIsKill(false);
|
2016-04-19 05:48:55 +08:00
|
|
|
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
|
|
|
|
Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
|
|
|
|
Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
|
|
|
|
Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
|
2016-04-19 05:48:55 +08:00
|
|
|
|
|
|
|
MachineFunction *MF = MBB.getParent();
|
|
|
|
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
|
|
|
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
|
|
|
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
|
|
|
|
|
|
|
MF->insert(++MBB.getIterator(), LoadCmpBB);
|
|
|
|
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
|
|
|
|
MF->insert(++StoreBB->getIterator(), DoneBB);
|
|
|
|
|
|
|
|
// .Lloadcmp:
|
|
|
|
// ldrexd rDestLo, rDestHi, [rAddr]
|
|
|
|
// cmp rDestLo, rDesiredLo
|
2017-12-07 18:40:31 +08:00
|
|
|
// sbcs dead rTempReg, rDestHi, rDesiredHi
|
2016-04-19 05:48:55 +08:00
|
|
|
// bne .Ldone
|
|
|
|
unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
|
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
|
|
|
|
addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
|
2017-05-31 09:21:35 +08:00
|
|
|
MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
|
|
|
|
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
|
2017-01-13 17:37:56 +08:00
|
|
|
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
|
|
|
|
.addReg(DestLo, getKillRegState(Dest.isDead()))
|
2017-05-31 09:21:35 +08:00
|
|
|
.addReg(DesiredLo)
|
2017-01-13 17:37:56 +08:00
|
|
|
.add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
|
2016-12-02 06:58:35 +08:00
|
|
|
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
|
|
|
|
.addReg(DestHi, getKillRegState(Dest.isDead()))
|
2017-05-31 09:21:35 +08:00
|
|
|
.addReg(DesiredHi)
|
2016-12-02 06:58:35 +08:00
|
|
|
.addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill);
|
2016-04-19 05:48:55 +08:00
|
|
|
|
|
|
|
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
|
|
|
|
BuildMI(LoadCmpBB, DL, TII->get(Bcc))
|
|
|
|
.addMBB(DoneBB)
|
|
|
|
.addImm(ARMCC::NE)
|
|
|
|
.addReg(ARM::CPSR, RegState::Kill);
|
|
|
|
LoadCmpBB->addSuccessor(DoneBB);
|
|
|
|
LoadCmpBB->addSuccessor(StoreBB);
|
|
|
|
|
|
|
|
// .Lstore:
|
2017-08-10 06:22:05 +08:00
|
|
|
// strexd rTempReg, rNewLo, rNewHi, [rAddr]
|
|
|
|
// cmp rTempReg, #0
|
2016-04-19 05:48:55 +08:00
|
|
|
// bne .Lloadcmp
|
|
|
|
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
|
2017-08-10 06:22:05 +08:00
|
|
|
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
|
2018-11-03 02:22:15 +08:00
|
|
|
unsigned Flags = getKillRegState(New.isDead());
|
|
|
|
addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI);
|
2017-05-31 09:21:35 +08:00
|
|
|
MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
|
|
|
|
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
|
2017-01-13 17:37:56 +08:00
|
|
|
BuildMI(StoreBB, DL, TII->get(CMPri))
|
2017-08-10 06:22:05 +08:00
|
|
|
.addReg(TempReg, RegState::Kill)
|
2017-01-13 17:37:56 +08:00
|
|
|
.addImm(0)
|
|
|
|
.add(predOps(ARMCC::AL));
|
2016-04-19 05:48:55 +08:00
|
|
|
BuildMI(StoreBB, DL, TII->get(Bcc))
|
|
|
|
.addMBB(LoadCmpBB)
|
|
|
|
.addImm(ARMCC::NE)
|
|
|
|
.addReg(ARM::CPSR, RegState::Kill);
|
|
|
|
StoreBB->addSuccessor(LoadCmpBB);
|
|
|
|
StoreBB->addSuccessor(DoneBB);
|
|
|
|
|
|
|
|
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
|
|
|
|
DoneBB->transferSuccessors(&MBB);
|
|
|
|
|
2016-04-28 04:32:54 +08:00
|
|
|
MBB.addSuccessor(LoadCmpBB);
|
|
|
|
|
2016-04-19 05:48:55 +08:00
|
|
|
NextMBBI = MBB.end();
|
|
|
|
MI.eraseFromParent();
|
2017-05-31 09:21:35 +08:00
|
|
|
|
|
|
|
// Recompute livein lists.
|
|
|
|
LivePhysRegs LiveRegs;
|
2017-09-07 04:45:24 +08:00
|
|
|
computeAndAddLiveIns(LiveRegs, *DoneBB);
|
|
|
|
computeAndAddLiveIns(LiveRegs, *StoreBB);
|
|
|
|
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
|
2017-05-31 09:21:35 +08:00
|
|
|
// Do an extra pass around the loop to get loop carried registers right.
|
|
|
|
StoreBB->clearLiveIns();
|
2017-09-07 04:45:24 +08:00
|
|
|
computeAndAddLiveIns(LiveRegs, *StoreBB);
|
2017-05-31 09:21:35 +08:00
|
|
|
LoadCmpBB->clearLiveIns();
|
2017-09-07 04:45:24 +08:00
|
|
|
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
|
2017-05-31 09:21:35 +08:00
|
|
|
|
2016-04-19 05:48:55 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-20 16:34:58 +08:00
|
|
|
bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
2016-04-19 05:48:55 +08:00
|
|
|
MachineBasicBlock::iterator MBBI,
|
|
|
|
MachineBasicBlock::iterator &NextMBBI) {
|
2011-01-20 16:34:58 +08:00
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
unsigned Opcode = MI.getOpcode();
|
|
|
|
switch (Opcode) {
|
|
|
|
default:
|
|
|
|
return false;
|
2015-07-21 05:42:14 +08:00
|
|
|
|
|
|
|
case ARM::TCRETURNdi:
|
|
|
|
case ARM::TCRETURNri: {
|
|
|
|
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
|
|
|
|
assert(MBBI->isReturn() &&
|
|
|
|
"Can only insert epilog into returning blocks");
|
|
|
|
unsigned RetOpcode = MBBI->getOpcode();
|
|
|
|
DebugLoc dl = MBBI->getDebugLoc();
|
|
|
|
const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
|
|
|
|
MBB.getParent()->getSubtarget().getInstrInfo());
|
|
|
|
|
|
|
|
// Tail call return: adjust the stack pointer and jump to callee.
|
|
|
|
MBBI = MBB.getLastNonDebugInstr();
|
|
|
|
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
|
|
|
|
|
|
|
// Jump to label or value in register.
|
|
|
|
if (RetOpcode == ARM::TCRETURNdi) {
|
|
|
|
unsigned TCOpcode =
|
|
|
|
STI->isThumb()
|
|
|
|
? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
|
|
|
|
: ARM::TAILJMPd;
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
|
|
|
|
if (JumpTarget.isGlobal())
|
|
|
|
MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
|
|
|
|
JumpTarget.getTargetFlags());
|
|
|
|
else {
|
|
|
|
assert(JumpTarget.isSymbol());
|
|
|
|
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
|
|
|
|
JumpTarget.getTargetFlags());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add the default predicate in Thumb mode.
|
|
|
|
if (STI->isThumb())
|
2017-01-20 16:15:24 +08:00
|
|
|
MIB.add(predOps(ARMCC::AL));
|
2015-07-21 05:42:14 +08:00
|
|
|
} else if (RetOpcode == ARM::TCRETURNri) {
|
2017-08-29 04:20:47 +08:00
|
|
|
unsigned Opcode =
|
|
|
|
STI->isThumb() ? ARM::tTAILJMPr
|
|
|
|
: (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4);
|
2015-07-21 05:42:14 +08:00
|
|
|
BuildMI(MBB, MBBI, dl,
|
2017-08-29 04:20:47 +08:00
|
|
|
TII.get(Opcode))
|
2015-07-21 05:42:14 +08:00
|
|
|
.addReg(JumpTarget.getReg(), RegState::Kill);
|
|
|
|
}
|
|
|
|
|
2016-07-09 04:21:17 +08:00
|
|
|
auto NewMI = std::prev(MBBI);
|
2015-07-21 05:42:14 +08:00
|
|
|
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
|
|
|
|
NewMI->addOperand(MBBI->getOperand(i));
|
|
|
|
|
|
|
|
// Delete the pseudo instruction TCRETURN.
|
|
|
|
MBB.erase(MBBI);
|
|
|
|
MBBI = NewMI;
|
|
|
|
return true;
|
|
|
|
}
|
2011-03-12 07:09:50 +08:00
|
|
|
case ARM::VMOVScc:
|
|
|
|
case ARM::VMOVDcc: {
|
|
|
|
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
|
|
|
|
MI.getOperand(1).getReg())
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(2))
|
|
|
|
.addImm(MI.getOperand(3).getImm()) // 'pred'
|
2017-09-06 06:54:06 +08:00
|
|
|
.add(MI.getOperand(4))
|
|
|
|
.add(makeImplicit(MI.getOperand(1)));
|
2011-03-12 07:09:50 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2011-07-02 01:14:11 +08:00
|
|
|
case ARM::t2MOVCCr:
|
2011-03-11 07:56:09 +08:00
|
|
|
case ARM::MOVCCr: {
|
2011-07-02 01:14:11 +08:00
|
|
|
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
|
2011-03-11 07:56:09 +08:00
|
|
|
MI.getOperand(1).getReg())
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(2))
|
|
|
|
.addImm(MI.getOperand(3).getImm()) // 'pred'
|
|
|
|
.add(MI.getOperand(4))
|
2017-09-06 06:54:06 +08:00
|
|
|
.add(condCodeOp()) // 's' bit
|
|
|
|
.add(makeImplicit(MI.getOperand(1)));
|
2011-03-11 07:56:09 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2011-07-22 07:38:37 +08:00
|
|
|
case ARM::MOVCCsi: {
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
|
|
|
|
(MI.getOperand(1).getReg()))
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(2))
|
|
|
|
.addImm(MI.getOperand(3).getImm())
|
|
|
|
.addImm(MI.getOperand(4).getImm()) // 'pred'
|
|
|
|
.add(MI.getOperand(5))
|
2017-09-06 06:54:06 +08:00
|
|
|
.add(condCodeOp()) // 's' bit
|
|
|
|
.add(makeImplicit(MI.getOperand(1)));
|
2011-07-22 07:38:37 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2011-07-22 02:54:16 +08:00
|
|
|
case ARM::MOVCCsr: {
|
2011-07-22 07:38:37 +08:00
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
|
2011-03-11 07:56:09 +08:00
|
|
|
(MI.getOperand(1).getReg()))
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(2))
|
|
|
|
.add(MI.getOperand(3))
|
|
|
|
.addImm(MI.getOperand(4).getImm())
|
|
|
|
.addImm(MI.getOperand(5).getImm()) // 'pred'
|
|
|
|
.add(MI.getOperand(6))
|
2017-09-06 06:54:06 +08:00
|
|
|
.add(condCodeOp()) // 's' bit
|
|
|
|
.add(makeImplicit(MI.getOperand(1)));
|
2011-03-11 09:09:28 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2013-08-22 17:57:11 +08:00
|
|
|
case ARM::t2MOVCCi16:
|
2011-03-11 09:09:28 +08:00
|
|
|
case ARM::MOVCCi16: {
|
2013-08-22 17:57:11 +08:00
|
|
|
unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
|
2011-03-11 09:09:28 +08:00
|
|
|
MI.getOperand(1).getReg())
|
2017-01-13 17:58:52 +08:00
|
|
|
.addImm(MI.getOperand(2).getImm())
|
|
|
|
.addImm(MI.getOperand(3).getImm()) // 'pred'
|
2017-09-06 06:54:06 +08:00
|
|
|
.add(MI.getOperand(4))
|
|
|
|
.add(makeImplicit(MI.getOperand(1)));
|
2011-03-11 09:09:28 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2011-07-02 01:14:11 +08:00
|
|
|
case ARM::t2MOVCCi:
|
2011-03-11 09:09:28 +08:00
|
|
|
case ARM::MOVCCi: {
|
2011-07-02 01:14:11 +08:00
|
|
|
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
|
2011-03-11 09:09:28 +08:00
|
|
|
MI.getOperand(1).getReg())
|
2017-01-13 17:58:52 +08:00
|
|
|
.addImm(MI.getOperand(2).getImm())
|
|
|
|
.addImm(MI.getOperand(3).getImm()) // 'pred'
|
|
|
|
.add(MI.getOperand(4))
|
2017-09-06 06:54:06 +08:00
|
|
|
.add(condCodeOp()) // 's' bit
|
|
|
|
.add(makeImplicit(MI.getOperand(1)));
|
2011-03-12 03:55:55 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2013-08-22 17:57:11 +08:00
|
|
|
case ARM::t2MVNCCi:
|
2011-03-12 03:55:55 +08:00
|
|
|
case ARM::MVNCCi: {
|
2013-08-22 17:57:11 +08:00
|
|
|
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
|
2011-03-12 03:55:55 +08:00
|
|
|
MI.getOperand(1).getReg())
|
2017-01-13 17:58:52 +08:00
|
|
|
.addImm(MI.getOperand(2).getImm())
|
|
|
|
.addImm(MI.getOperand(3).getImm()) // 'pred'
|
|
|
|
.add(MI.getOperand(4))
|
2017-09-06 06:54:06 +08:00
|
|
|
.add(condCodeOp()) // 's' bit
|
|
|
|
.add(makeImplicit(MI.getOperand(1)));
|
2011-03-11 07:56:09 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2013-08-22 17:57:11 +08:00
|
|
|
case ARM::t2MOVCClsl:
|
|
|
|
case ARM::t2MOVCClsr:
|
|
|
|
case ARM::t2MOVCCasr:
|
|
|
|
case ARM::t2MOVCCror: {
|
|
|
|
unsigned NewOpc;
|
|
|
|
switch (Opcode) {
|
|
|
|
case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
|
|
|
|
case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
|
|
|
|
case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
|
|
|
|
case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
|
|
|
|
default: llvm_unreachable("unexpeced conditional move");
|
|
|
|
}
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
|
|
|
|
MI.getOperand(1).getReg())
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(2))
|
|
|
|
.addImm(MI.getOperand(3).getImm())
|
|
|
|
.addImm(MI.getOperand(4).getImm()) // 'pred'
|
|
|
|
.add(MI.getOperand(5))
|
2017-09-06 06:54:06 +08:00
|
|
|
.add(condCodeOp()) // 's' bit
|
|
|
|
.add(makeImplicit(MI.getOperand(1)));
|
2013-08-22 17:57:11 +08:00
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2012-11-07 07:05:24 +08:00
|
|
|
case ARM::Int_eh_sjlj_dispatchsetup: {
|
2010-10-20 07:27:08 +08:00
|
|
|
MachineFunction &MF = *MI.getParent()->getParent();
|
|
|
|
const ARMBaseInstrInfo *AII =
|
|
|
|
static_cast<const ARMBaseInstrInfo*>(TII);
|
|
|
|
const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
|
|
|
|
// For functions using a base pointer, we rematerialize it (via the frame
|
|
|
|
// pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
|
|
|
|
// for us. Otherwise, expand to nothing.
|
|
|
|
if (RI.hasBasePointer(MF)) {
|
|
|
|
int32_t NumBytes = AFI->getFramePtrSpillOffset();
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register FramePtr = RI.getFrameRegister(MF);
|
2014-08-05 10:39:49 +08:00
|
|
|
assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
|
|
|
|
"base pointer without frame pointer?");
|
2010-10-20 07:27:08 +08:00
|
|
|
|
|
|
|
if (AFI->isThumb2Function()) {
|
2012-03-27 15:21:54 +08:00
|
|
|
emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
|
|
|
|
FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
|
2010-10-20 07:27:08 +08:00
|
|
|
} else if (AFI->isThumbFunction()) {
|
2012-03-27 15:21:54 +08:00
|
|
|
emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
|
|
|
|
FramePtr, -NumBytes, *TII, RI);
|
2010-10-20 07:27:08 +08:00
|
|
|
} else {
|
2012-03-27 15:21:54 +08:00
|
|
|
emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
|
|
|
|
FramePtr, -NumBytes, ARMCC::AL, 0,
|
|
|
|
*TII);
|
2010-10-20 07:27:08 +08:00
|
|
|
}
|
2010-10-20 08:02:50 +08:00
|
|
|
// If there's dynamic realignment, adjust for it.
|
2010-10-20 09:10:01 +08:00
|
|
|
if (RI.needsStackRealignment(MF)) {
|
2016-07-29 02:40:00 +08:00
|
|
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
|
|
unsigned MaxAlign = MFI.getMaxAlignment();
|
2010-10-20 08:02:50 +08:00
|
|
|
assert (!AFI->isThumb1OnlyFunction());
|
|
|
|
// Emit bic r6, r6, MaxAlign
|
2015-01-08 23:09:14 +08:00
|
|
|
assert(MaxAlign <= 256 && "The BIC instruction cannot encode "
|
|
|
|
"immediates larger than 256 with all lower "
|
|
|
|
"bits set.");
|
2010-10-20 08:02:50 +08:00
|
|
|
unsigned bicOpc = AFI->isThumbFunction() ?
|
|
|
|
ARM::t2BICri : ARM::BICri;
|
2017-01-13 18:18:01 +08:00
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
|
|
|
|
.addReg(ARM::R6, RegState::Kill)
|
|
|
|
.addImm(MaxAlign - 1)
|
|
|
|
.add(predOps(ARMCC::AL))
|
|
|
|
.add(condCodeOp());
|
2010-10-20 08:02:50 +08:00
|
|
|
}
|
2010-10-20 07:27:08 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
MI.eraseFromParent();
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-10-20 07:27:08 +08:00
|
|
|
}
|
|
|
|
|
2010-10-15 06:57:13 +08:00
|
|
|
case ARM::MOVsrl_flag:
|
|
|
|
case ARM::MOVsra_flag: {
|
2013-09-28 21:42:22 +08:00
|
|
|
// These are just fancy MOVs instructions.
|
2017-01-13 17:37:56 +08:00
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
|
|
|
|
MI.getOperand(0).getReg())
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(1))
|
2017-01-13 17:37:56 +08:00
|
|
|
.addImm(ARM_AM::getSORegOpc(
|
|
|
|
(Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
|
|
|
|
.add(predOps(ARMCC::AL))
|
|
|
|
.addReg(ARM::CPSR, RegState::Define);
|
2010-10-15 06:57:13 +08:00
|
|
|
MI.eraseFromParent();
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-10-15 06:57:13 +08:00
|
|
|
}
|
|
|
|
case ARM::RRX: {
|
|
|
|
// This encodes as "MOVs Rd, Rm, rrx
|
|
|
|
MachineInstrBuilder MIB =
|
2017-01-13 17:37:56 +08:00
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
|
|
|
|
MI.getOperand(0).getReg())
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(1))
|
2017-01-13 17:37:56 +08:00
|
|
|
.addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
|
|
|
|
.add(predOps(ARMCC::AL))
|
2017-01-20 16:15:24 +08:00
|
|
|
.add(condCodeOp());
|
2010-10-15 06:57:13 +08:00
|
|
|
TransferImpOps(MI, MIB, MIB);
|
|
|
|
MI.eraseFromParent();
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-10-15 06:57:13 +08:00
|
|
|
}
|
2011-07-01 03:38:01 +08:00
|
|
|
case ARM::tTPsoft:
|
2010-12-09 07:14:44 +08:00
|
|
|
case ARM::TPsoft: {
|
2017-01-30 00:46:22 +08:00
|
|
|
const bool Thumb = Opcode == ARM::tTPsoft;
|
|
|
|
|
2014-06-24 23:45:59 +08:00
|
|
|
MachineInstrBuilder MIB;
|
2017-01-30 00:46:22 +08:00
|
|
|
if (STI->genLongCalls()) {
|
|
|
|
MachineFunction *MF = MBB.getParent();
|
|
|
|
MachineConstantPool *MCP = MF->getConstantPool();
|
|
|
|
unsigned PCLabelID = AFI->createPICLabelUId();
|
|
|
|
MachineConstantPoolValue *CPV =
|
2017-12-16 06:22:58 +08:00
|
|
|
ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
|
2017-01-30 00:46:22 +08:00
|
|
|
"__aeabi_read_tp", PCLabelID, 0);
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Reg = MI.getOperand(0).getReg();
|
2017-01-30 00:46:22 +08:00
|
|
|
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
|
|
|
TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
|
|
|
|
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
|
|
|
|
if (!Thumb)
|
|
|
|
MIB.addImm(0);
|
|
|
|
MIB.add(predOps(ARMCC::AL));
|
|
|
|
|
2014-06-24 23:45:59 +08:00
|
|
|
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
2017-01-30 00:46:22 +08:00
|
|
|
TII->get(Thumb ? ARM::tBLXr : ARM::BLX));
|
|
|
|
if (Thumb)
|
|
|
|
MIB.add(predOps(ARMCC::AL));
|
|
|
|
MIB.addReg(Reg, RegState::Kill);
|
|
|
|
} else {
|
|
|
|
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
|
|
|
TII->get(Thumb ? ARM::tBL : ARM::BL));
|
|
|
|
if (Thumb)
|
|
|
|
MIB.add(predOps(ARMCC::AL));
|
|
|
|
MIB.addExternalSymbol("__aeabi_read_tp", 0);
|
|
|
|
}
|
2010-12-09 07:14:44 +08:00
|
|
|
|
2018-08-17 05:30:05 +08:00
|
|
|
MIB.cloneMemRefs(MI);
|
2010-12-09 07:14:44 +08:00
|
|
|
TransferImpOps(MI, MIB, MIB);
|
|
|
|
MI.eraseFromParent();
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-12-09 08:51:54 +08:00
|
|
|
}
|
2010-09-14 07:55:10 +08:00
|
|
|
case ARM::tLDRpci_pic:
|
2009-11-07 07:52:48 +08:00
|
|
|
case ARM::t2LDRpci_pic: {
|
|
|
|
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
|
2011-02-09 06:39:40 +08:00
|
|
|
? ARM::tLDRpci : ARM::t2LDRpci;
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2010-05-13 07:13:12 +08:00
|
|
|
bool DstIsDead = MI.getOperand(0).isDead();
|
|
|
|
MachineInstrBuilder MIB1 =
|
2017-01-13 17:37:56 +08:00
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(1))
|
2017-01-13 17:37:56 +08:00
|
|
|
.add(predOps(ARMCC::AL));
|
2018-08-17 05:30:05 +08:00
|
|
|
MIB1.cloneMemRefs(MI);
|
2017-01-13 17:58:52 +08:00
|
|
|
MachineInstrBuilder MIB2 =
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
|
|
|
|
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
|
|
|
.addReg(DstReg)
|
|
|
|
.add(MI.getOperand(2));
|
2010-05-13 07:13:12 +08:00
|
|
|
TransferImpOps(MI, MIB1, MIB2);
|
2009-11-07 07:52:48 +08:00
|
|
|
MI.eraseFromParent();
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2009-11-07 07:52:48 +08:00
|
|
|
}
|
2010-05-13 07:13:12 +08:00
|
|
|
|
2013-12-02 18:35:41 +08:00
|
|
|
case ARM::LDRLIT_ga_abs:
|
|
|
|
case ARM::LDRLIT_ga_pcrel:
|
|
|
|
case ARM::LDRLIT_ga_pcrel_ldr:
|
|
|
|
case ARM::tLDRLIT_ga_abs:
|
|
|
|
case ARM::tLDRLIT_ga_pcrel: {
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2013-12-02 18:35:41 +08:00
|
|
|
bool DstIsDead = MI.getOperand(0).isDead();
|
|
|
|
const MachineOperand &MO1 = MI.getOperand(1);
|
2017-11-14 04:45:38 +08:00
|
|
|
auto Flags = MO1.getTargetFlags();
|
2013-12-02 18:35:41 +08:00
|
|
|
const GlobalValue *GV = MO1.getGlobal();
|
|
|
|
bool IsARM =
|
|
|
|
Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs;
|
|
|
|
bool IsPIC =
|
|
|
|
Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
|
|
|
|
unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
|
|
|
|
unsigned PICAddOpc =
|
|
|
|
IsARM
|
2014-12-11 07:40:50 +08:00
|
|
|
? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
|
2013-12-02 18:35:41 +08:00
|
|
|
: ARM::tPICADD;
|
|
|
|
|
|
|
|
// We need a new const-pool entry to load from.
|
|
|
|
MachineConstantPool *MCP = MBB.getParent()->getConstantPool();
|
|
|
|
unsigned ARMPCLabelIndex = 0;
|
|
|
|
MachineConstantPoolValue *CPV;
|
|
|
|
|
|
|
|
if (IsPIC) {
|
|
|
|
unsigned PCAdj = IsARM ? 8 : 4;
|
2017-11-14 04:45:38 +08:00
|
|
|
auto Modifier = (Flags & ARMII::MO_GOT)
|
|
|
|
? ARMCP::GOT_PREL
|
|
|
|
: ARMCP::no_modifier;
|
2013-12-02 18:35:41 +08:00
|
|
|
ARMPCLabelIndex = AFI->createPICLabelUId();
|
2017-08-29 17:47:55 +08:00
|
|
|
CPV = ARMConstantPoolConstant::Create(
|
|
|
|
GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier,
|
|
|
|
/*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL);
|
2013-12-02 18:35:41 +08:00
|
|
|
} else
|
|
|
|
CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier);
|
|
|
|
|
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
|
|
|
|
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
|
|
|
|
if (IsARM)
|
|
|
|
MIB.addImm(0);
|
2017-01-13 17:37:56 +08:00
|
|
|
MIB.add(predOps(ARMCC::AL));
|
2013-12-02 18:35:41 +08:00
|
|
|
|
|
|
|
if (IsPIC) {
|
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc))
|
|
|
|
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
|
|
|
.addReg(DstReg)
|
|
|
|
.addImm(ARMPCLabelIndex);
|
|
|
|
|
|
|
|
if (IsARM)
|
2017-01-13 17:37:56 +08:00
|
|
|
MIB.add(predOps(ARMCC::AL));
|
2013-12-02 18:35:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2011-01-22 02:55:51 +08:00
|
|
|
case ARM::MOV_ga_pcrel:
|
|
|
|
case ARM::MOV_ga_pcrel_ldr:
|
|
|
|
case ARM::t2MOV_ga_pcrel: {
|
|
|
|
// Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
|
2011-01-20 16:34:58 +08:00
|
|
|
unsigned LabelId = AFI->createPICLabelUId();
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
2010-05-13 07:13:12 +08:00
|
|
|
bool DstIsDead = MI.getOperand(0).isDead();
|
2011-01-20 16:34:58 +08:00
|
|
|
const MachineOperand &MO1 = MI.getOperand(1);
|
|
|
|
const GlobalValue *GV = MO1.getGlobal();
|
|
|
|
unsigned TF = MO1.getTargetFlags();
|
2013-11-26 00:24:52 +08:00
|
|
|
bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
|
2011-01-22 02:55:51 +08:00
|
|
|
unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
|
2011-07-14 01:25:55 +08:00
|
|
|
unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
|
2013-11-26 00:24:52 +08:00
|
|
|
unsigned LO16TF = TF | ARMII::MO_LO16;
|
|
|
|
unsigned HI16TF = TF | ARMII::MO_HI16;
|
2011-01-20 16:34:58 +08:00
|
|
|
unsigned PICAddOpc = isARM
|
2011-01-22 02:55:51 +08:00
|
|
|
? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
|
2011-01-20 16:34:58 +08:00
|
|
|
: ARM::tPICADD;
|
|
|
|
MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
|
|
|
TII->get(LO16Opc), DstReg)
|
2011-01-22 02:55:51 +08:00
|
|
|
.addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
|
2011-01-20 16:34:58 +08:00
|
|
|
.addImm(LabelId);
|
2013-11-26 00:24:52 +08:00
|
|
|
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
|
2011-01-20 16:34:58 +08:00
|
|
|
.addReg(DstReg)
|
2011-01-22 02:55:51 +08:00
|
|
|
.addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
|
2011-01-20 16:34:58 +08:00
|
|
|
.addImm(LabelId);
|
2011-01-22 02:55:51 +08:00
|
|
|
|
|
|
|
MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
2011-01-20 16:34:58 +08:00
|
|
|
TII->get(PICAddOpc))
|
2010-10-16 02:25:59 +08:00
|
|
|
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
|
2011-01-20 16:34:58 +08:00
|
|
|
.addReg(DstReg).addImm(LabelId);
|
|
|
|
if (isARM) {
|
2017-01-13 17:37:56 +08:00
|
|
|
MIB3.add(predOps(ARMCC::AL));
|
2011-01-22 02:55:51 +08:00
|
|
|
if (Opcode == ARM::MOV_ga_pcrel_ldr)
|
2018-08-17 05:30:05 +08:00
|
|
|
MIB3.cloneMemRefs(MI);
|
2011-01-17 16:03:18 +08:00
|
|
|
}
|
2011-01-22 02:55:51 +08:00
|
|
|
TransferImpOps(MI, MIB1, MIB3);
|
2009-11-07 07:52:48 +08:00
|
|
|
MI.eraseFromParent();
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-05-13 08:17:02 +08:00
|
|
|
}
|
|
|
|
|
2011-01-20 16:34:58 +08:00
|
|
|
case ARM::MOVi32imm:
|
|
|
|
case ARM::MOVCCi32imm:
|
|
|
|
case ARM::t2MOVi32imm:
|
|
|
|
case ARM::t2MOVCCi32imm:
|
|
|
|
ExpandMOV32BitImm(MBB, MBBI);
|
|
|
|
return true;
|
|
|
|
|
2013-10-01 22:33:28 +08:00
|
|
|
case ARM::SUBS_PC_LR: {
|
|
|
|
MachineInstrBuilder MIB =
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
|
|
|
|
.addReg(ARM::LR)
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(0))
|
|
|
|
.add(MI.getOperand(1))
|
|
|
|
.add(MI.getOperand(2))
|
2013-10-01 22:33:28 +08:00
|
|
|
.addReg(ARM::CPSR, RegState::Undef);
|
|
|
|
TransferImpOps(MI, MIB, MIB);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2011-03-30 00:45:53 +08:00
|
|
|
case ARM::VLDMQIA: {
|
|
|
|
unsigned NewOpc = ARM::VLDMDIA;
|
2010-09-16 08:31:02 +08:00
|
|
|
MachineInstrBuilder MIB =
|
2010-11-16 09:16:36 +08:00
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
|
2010-09-16 08:31:02 +08:00
|
|
|
unsigned OpIdx = 0;
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2010-09-16 08:31:02 +08:00
|
|
|
// Grab the Q register destination.
|
|
|
|
bool DstIsDead = MI.getOperand(OpIdx).isDead();
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DstReg = MI.getOperand(OpIdx++).getReg();
|
2010-11-16 09:16:36 +08:00
|
|
|
|
|
|
|
// Copy the source register.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2010-09-16 08:31:02 +08:00
|
|
|
// Copy the predicate operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2010-09-16 08:31:02 +08:00
|
|
|
// Add the destination operands (D subregs).
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
|
|
|
|
Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
|
2010-09-16 08:31:02 +08:00
|
|
|
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
|
|
|
|
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2010-09-16 08:31:02 +08:00
|
|
|
// Add an implicit def for the super-register.
|
|
|
|
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
|
|
|
|
TransferImpOps(MI, MIB, MIB);
|
2018-08-17 05:30:05 +08:00
|
|
|
MIB.cloneMemRefs(MI);
|
2010-09-16 08:31:02 +08:00
|
|
|
MI.eraseFromParent();
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-09-16 08:31:02 +08:00
|
|
|
}
|
|
|
|
|
2011-03-30 00:45:53 +08:00
|
|
|
case ARM::VSTMQIA: {
|
|
|
|
unsigned NewOpc = ARM::VSTMDIA;
|
2010-09-16 08:31:02 +08:00
|
|
|
MachineInstrBuilder MIB =
|
2010-11-16 09:16:36 +08:00
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
|
2010-09-16 08:31:02 +08:00
|
|
|
unsigned OpIdx = 0;
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2010-09-16 08:31:02 +08:00
|
|
|
// Grab the Q register source.
|
|
|
|
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register SrcReg = MI.getOperand(OpIdx++).getReg();
|
2010-11-16 09:16:36 +08:00
|
|
|
|
|
|
|
// Copy the destination register.
|
2017-12-15 02:06:25 +08:00
|
|
|
MachineOperand Dst(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(Dst);
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2010-09-16 08:31:02 +08:00
|
|
|
// Copy the predicate operands.
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
|
|
|
MIB.add(MI.getOperand(OpIdx++));
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2010-09-16 08:31:02 +08:00
|
|
|
// Add the source operands (D subregs).
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
|
|
|
|
Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
|
2015-02-17 03:34:30 +08:00
|
|
|
MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
|
|
|
|
.addReg(D1, SrcIsKill ? RegState::Kill : 0);
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2011-04-29 13:24:29 +08:00
|
|
|
if (SrcIsKill) // Add an implicit kill for the Q register.
|
|
|
|
MIB->addRegisterKilled(SrcReg, TRI, true);
|
2010-11-16 09:16:36 +08:00
|
|
|
|
2010-09-16 08:31:02 +08:00
|
|
|
TransferImpOps(MI, MIB, MIB);
|
2018-08-17 05:30:05 +08:00
|
|
|
MIB.cloneMemRefs(MI);
|
2010-09-16 08:31:02 +08:00
|
|
|
MI.eraseFromParent();
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-09-16 08:31:02 +08:00
|
|
|
}
|
|
|
|
|
2010-09-03 00:00:54 +08:00
|
|
|
case ARM::VLD2q8Pseudo:
|
|
|
|
case ARM::VLD2q16Pseudo:
|
|
|
|
case ARM::VLD2q32Pseudo:
|
2011-12-10 05:28:25 +08:00
|
|
|
case ARM::VLD2q8PseudoWB_fixed:
|
|
|
|
case ARM::VLD2q16PseudoWB_fixed:
|
|
|
|
case ARM::VLD2q32PseudoWB_fixed:
|
|
|
|
case ARM::VLD2q8PseudoWB_register:
|
|
|
|
case ARM::VLD2q16PseudoWB_register:
|
|
|
|
case ARM::VLD2q32PseudoWB_register:
|
2010-09-04 02:16:02 +08:00
|
|
|
case ARM::VLD3d8Pseudo:
|
|
|
|
case ARM::VLD3d16Pseudo:
|
|
|
|
case ARM::VLD3d32Pseudo:
|
2018-06-03 00:40:03 +08:00
|
|
|
case ARM::VLD1d8TPseudo:
|
|
|
|
case ARM::VLD1d16TPseudo:
|
|
|
|
case ARM::VLD1d32TPseudo:
|
2010-09-03 00:00:54 +08:00
|
|
|
case ARM::VLD1d64TPseudo:
|
2014-01-16 17:16:13 +08:00
|
|
|
case ARM::VLD1d64TPseudoWB_fixed:
|
2018-03-02 21:02:55 +08:00
|
|
|
case ARM::VLD1d64TPseudoWB_register:
|
2010-09-04 02:16:02 +08:00
|
|
|
case ARM::VLD3d8Pseudo_UPD:
|
|
|
|
case ARM::VLD3d16Pseudo_UPD:
|
|
|
|
case ARM::VLD3d32Pseudo_UPD:
|
|
|
|
case ARM::VLD3q8Pseudo_UPD:
|
|
|
|
case ARM::VLD3q16Pseudo_UPD:
|
|
|
|
case ARM::VLD3q32Pseudo_UPD:
|
2011-02-08 01:43:15 +08:00
|
|
|
case ARM::VLD3q8oddPseudo:
|
|
|
|
case ARM::VLD3q16oddPseudo:
|
|
|
|
case ARM::VLD3q32oddPseudo:
|
2010-09-04 02:16:02 +08:00
|
|
|
case ARM::VLD3q8oddPseudo_UPD:
|
|
|
|
case ARM::VLD3q16oddPseudo_UPD:
|
|
|
|
case ARM::VLD3q32oddPseudo_UPD:
|
|
|
|
case ARM::VLD4d8Pseudo:
|
|
|
|
case ARM::VLD4d16Pseudo:
|
|
|
|
case ARM::VLD4d32Pseudo:
|
2018-06-03 00:40:03 +08:00
|
|
|
case ARM::VLD1d8QPseudo:
|
|
|
|
case ARM::VLD1d16QPseudo:
|
|
|
|
case ARM::VLD1d32QPseudo:
|
2010-09-03 00:00:54 +08:00
|
|
|
case ARM::VLD1d64QPseudo:
|
2014-01-16 17:16:13 +08:00
|
|
|
case ARM::VLD1d64QPseudoWB_fixed:
|
2018-03-02 21:02:55 +08:00
|
|
|
case ARM::VLD1d64QPseudoWB_register:
|
2018-06-03 00:40:03 +08:00
|
|
|
case ARM::VLD1q8HighQPseudo:
|
|
|
|
case ARM::VLD1q8LowQPseudo_UPD:
|
|
|
|
case ARM::VLD1q8HighTPseudo:
|
|
|
|
case ARM::VLD1q8LowTPseudo_UPD:
|
|
|
|
case ARM::VLD1q16HighQPseudo:
|
|
|
|
case ARM::VLD1q16LowQPseudo_UPD:
|
|
|
|
case ARM::VLD1q16HighTPseudo:
|
|
|
|
case ARM::VLD1q16LowTPseudo_UPD:
|
|
|
|
case ARM::VLD1q32HighQPseudo:
|
|
|
|
case ARM::VLD1q32LowQPseudo_UPD:
|
|
|
|
case ARM::VLD1q32HighTPseudo:
|
|
|
|
case ARM::VLD1q32LowTPseudo_UPD:
|
|
|
|
case ARM::VLD1q64HighQPseudo:
|
|
|
|
case ARM::VLD1q64LowQPseudo_UPD:
|
|
|
|
case ARM::VLD1q64HighTPseudo:
|
|
|
|
case ARM::VLD1q64LowTPseudo_UPD:
|
2010-09-04 02:16:02 +08:00
|
|
|
case ARM::VLD4d8Pseudo_UPD:
|
|
|
|
case ARM::VLD4d16Pseudo_UPD:
|
|
|
|
case ARM::VLD4d32Pseudo_UPD:
|
|
|
|
case ARM::VLD4q8Pseudo_UPD:
|
|
|
|
case ARM::VLD4q16Pseudo_UPD:
|
|
|
|
case ARM::VLD4q32Pseudo_UPD:
|
2011-02-08 01:43:15 +08:00
|
|
|
case ARM::VLD4q8oddPseudo:
|
|
|
|
case ARM::VLD4q16oddPseudo:
|
|
|
|
case ARM::VLD4q32oddPseudo:
|
2010-09-04 02:16:02 +08:00
|
|
|
case ARM::VLD4q8oddPseudo_UPD:
|
|
|
|
case ARM::VLD4q16oddPseudo_UPD:
|
|
|
|
case ARM::VLD4q32oddPseudo_UPD:
|
2010-11-30 03:35:29 +08:00
|
|
|
case ARM::VLD3DUPd8Pseudo:
|
|
|
|
case ARM::VLD3DUPd16Pseudo:
|
|
|
|
case ARM::VLD3DUPd32Pseudo:
|
|
|
|
case ARM::VLD3DUPd8Pseudo_UPD:
|
|
|
|
case ARM::VLD3DUPd16Pseudo_UPD:
|
|
|
|
case ARM::VLD3DUPd32Pseudo_UPD:
|
2010-11-30 08:00:35 +08:00
|
|
|
case ARM::VLD4DUPd8Pseudo:
|
|
|
|
case ARM::VLD4DUPd16Pseudo:
|
|
|
|
case ARM::VLD4DUPd32Pseudo:
|
|
|
|
case ARM::VLD4DUPd8Pseudo_UPD:
|
|
|
|
case ARM::VLD4DUPd16Pseudo_UPD:
|
|
|
|
case ARM::VLD4DUPd32Pseudo_UPD:
|
[NEON] Support vldNq intrinsics in AArch32 (LLVM part)
This patch adds support for the q versions of the dup
(load-to-all-lanes) NEON intrinsics, such as vld2q_dup_f16() for
example.
Currently, non-q versions of the dup intrinsics are implemented
in clang by generating IR that first loads the elements of the
structure into the first lane with the lane (to-single-lane)
intrinsics, and then propagating it other lanes. There are at
least two problems with this approach. First, there are no
double-spaced to-single-lane byte-element instructions. For
example, there is no such instruction as 'vld2.8 { d0[0], d2[0]
}, [r0]'. That means we cannot rely on the to-single-lane
intrinsics and instructions to implement the q versions of the
dup intrinsics. Note that to-all-lanes instructions do support
all sizes of data items, including bytes.
The second problem with the current approach is that we need a
separate vdup instruction to propagate the structure to each
lane. So for vld4q_dup_f16() we would need four vdup instructions
in addition to the initial vld instruction.
This patch introduces dup LLVM intrinsics and reworks handling of
the currently supported (non-q) NEON dup intrinsics to expand
them into those LLVM intrinsics, thus eliminating the need for
using to-single-lane intrinsics and instructions.
Additionally, this patch adds support for u64 and s64 dup NEON
intrinsics. These are marked as Arch64-only in the ARM NEON
Reference, but it seems there are no reasons to not support them
in AArch32 mode. Please correct, if that is wrong.
That's what we generate with this patch applied:
vld2q_dup_f16:
vld2.16 {d0[], d2[]}, [r0]
vld2.16 {d1[], d3[]}, [r0]
vld3q_dup_f16:
vld3.16 {d0[], d2[], d4[]}, [r0]
vld3.16 {d1[], d3[], d5[]}, [r0]
vld4q_dup_f16:
vld4.16 {d0[], d2[], d4[], d6[]}, [r0]
vld4.16 {d1[], d3[], d5[], d7[]}, [r0]
Differential Revision: https://reviews.llvm.org/D48439
llvm-svn: 335733
2018-06-27 21:57:52 +08:00
|
|
|
case ARM::VLD2DUPq8EvenPseudo:
|
|
|
|
case ARM::VLD2DUPq8OddPseudo:
|
|
|
|
case ARM::VLD2DUPq16EvenPseudo:
|
|
|
|
case ARM::VLD2DUPq16OddPseudo:
|
|
|
|
case ARM::VLD2DUPq32EvenPseudo:
|
|
|
|
case ARM::VLD2DUPq32OddPseudo:
|
|
|
|
case ARM::VLD3DUPq8EvenPseudo:
|
|
|
|
case ARM::VLD3DUPq8OddPseudo:
|
|
|
|
case ARM::VLD3DUPq16EvenPseudo:
|
|
|
|
case ARM::VLD3DUPq16OddPseudo:
|
|
|
|
case ARM::VLD3DUPq32EvenPseudo:
|
|
|
|
case ARM::VLD3DUPq32OddPseudo:
|
|
|
|
case ARM::VLD4DUPq8EvenPseudo:
|
|
|
|
case ARM::VLD4DUPq8OddPseudo:
|
|
|
|
case ARM::VLD4DUPq16EvenPseudo:
|
|
|
|
case ARM::VLD4DUPq16OddPseudo:
|
|
|
|
case ARM::VLD4DUPq32EvenPseudo:
|
|
|
|
case ARM::VLD4DUPq32OddPseudo:
|
2010-09-14 07:01:35 +08:00
|
|
|
ExpandVLD(MBBI);
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-09-03 00:00:54 +08:00
|
|
|
|
2010-08-28 13:12:57 +08:00
|
|
|
case ARM::VST2q8Pseudo:
|
|
|
|
case ARM::VST2q16Pseudo:
|
|
|
|
case ARM::VST2q32Pseudo:
|
2011-12-15 05:32:11 +08:00
|
|
|
case ARM::VST2q8PseudoWB_fixed:
|
|
|
|
case ARM::VST2q16PseudoWB_fixed:
|
|
|
|
case ARM::VST2q32PseudoWB_fixed:
|
|
|
|
case ARM::VST2q8PseudoWB_register:
|
|
|
|
case ARM::VST2q16PseudoWB_register:
|
|
|
|
case ARM::VST2q32PseudoWB_register:
|
2010-08-27 02:51:29 +08:00
|
|
|
case ARM::VST3d8Pseudo:
|
|
|
|
case ARM::VST3d16Pseudo:
|
|
|
|
case ARM::VST3d32Pseudo:
|
2018-06-10 17:27:27 +08:00
|
|
|
case ARM::VST1d8TPseudo:
|
|
|
|
case ARM::VST1d16TPseudo:
|
|
|
|
case ARM::VST1d32TPseudo:
|
2010-08-27 02:51:29 +08:00
|
|
|
case ARM::VST1d64TPseudo:
|
|
|
|
case ARM::VST3d8Pseudo_UPD:
|
|
|
|
case ARM::VST3d16Pseudo_UPD:
|
|
|
|
case ARM::VST3d32Pseudo_UPD:
|
2011-11-30 06:38:04 +08:00
|
|
|
case ARM::VST1d64TPseudoWB_fixed:
|
|
|
|
case ARM::VST1d64TPseudoWB_register:
|
2010-08-27 02:51:29 +08:00
|
|
|
case ARM::VST3q8Pseudo_UPD:
|
|
|
|
case ARM::VST3q16Pseudo_UPD:
|
|
|
|
case ARM::VST3q32Pseudo_UPD:
|
2011-02-08 01:43:15 +08:00
|
|
|
case ARM::VST3q8oddPseudo:
|
|
|
|
case ARM::VST3q16oddPseudo:
|
|
|
|
case ARM::VST3q32oddPseudo:
|
2010-08-27 02:51:29 +08:00
|
|
|
case ARM::VST3q8oddPseudo_UPD:
|
|
|
|
case ARM::VST3q16oddPseudo_UPD:
|
|
|
|
case ARM::VST3q32oddPseudo_UPD:
|
2010-08-26 07:27:42 +08:00
|
|
|
case ARM::VST4d8Pseudo:
|
|
|
|
case ARM::VST4d16Pseudo:
|
|
|
|
case ARM::VST4d32Pseudo:
|
2018-06-10 17:27:27 +08:00
|
|
|
case ARM::VST1d8QPseudo:
|
|
|
|
case ARM::VST1d16QPseudo:
|
|
|
|
case ARM::VST1d32QPseudo:
|
2010-08-26 13:33:30 +08:00
|
|
|
case ARM::VST1d64QPseudo:
|
2010-08-26 07:27:42 +08:00
|
|
|
case ARM::VST4d8Pseudo_UPD:
|
|
|
|
case ARM::VST4d16Pseudo_UPD:
|
|
|
|
case ARM::VST4d32Pseudo_UPD:
|
2011-11-30 06:58:48 +08:00
|
|
|
case ARM::VST1d64QPseudoWB_fixed:
|
|
|
|
case ARM::VST1d64QPseudoWB_register:
|
2018-06-10 17:27:27 +08:00
|
|
|
case ARM::VST1q8HighQPseudo:
|
|
|
|
case ARM::VST1q8LowQPseudo_UPD:
|
|
|
|
case ARM::VST1q8HighTPseudo:
|
|
|
|
case ARM::VST1q8LowTPseudo_UPD:
|
|
|
|
case ARM::VST1q16HighQPseudo:
|
|
|
|
case ARM::VST1q16LowQPseudo_UPD:
|
|
|
|
case ARM::VST1q16HighTPseudo:
|
|
|
|
case ARM::VST1q16LowTPseudo_UPD:
|
|
|
|
case ARM::VST1q32HighQPseudo:
|
|
|
|
case ARM::VST1q32LowQPseudo_UPD:
|
|
|
|
case ARM::VST1q32HighTPseudo:
|
|
|
|
case ARM::VST1q32LowTPseudo_UPD:
|
|
|
|
case ARM::VST1q64HighQPseudo:
|
|
|
|
case ARM::VST1q64LowQPseudo_UPD:
|
|
|
|
case ARM::VST1q64HighTPseudo:
|
|
|
|
case ARM::VST1q64LowTPseudo_UPD:
|
2010-08-26 07:27:42 +08:00
|
|
|
case ARM::VST4q8Pseudo_UPD:
|
|
|
|
case ARM::VST4q16Pseudo_UPD:
|
|
|
|
case ARM::VST4q32Pseudo_UPD:
|
2011-02-08 01:43:15 +08:00
|
|
|
case ARM::VST4q8oddPseudo:
|
|
|
|
case ARM::VST4q16oddPseudo:
|
|
|
|
case ARM::VST4q32oddPseudo:
|
2010-08-26 07:27:42 +08:00
|
|
|
case ARM::VST4q8oddPseudo_UPD:
|
|
|
|
case ARM::VST4q16oddPseudo_UPD:
|
|
|
|
case ARM::VST4q32oddPseudo_UPD:
|
2010-09-14 07:01:35 +08:00
|
|
|
ExpandVST(MBBI);
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
2010-09-14 07:01:35 +08:00
|
|
|
|
2010-11-02 06:04:05 +08:00
|
|
|
case ARM::VLD1LNq8Pseudo:
|
|
|
|
case ARM::VLD1LNq16Pseudo:
|
|
|
|
case ARM::VLD1LNq32Pseudo:
|
|
|
|
case ARM::VLD1LNq8Pseudo_UPD:
|
|
|
|
case ARM::VLD1LNq16Pseudo_UPD:
|
|
|
|
case ARM::VLD1LNq32Pseudo_UPD:
|
2010-09-14 07:01:35 +08:00
|
|
|
case ARM::VLD2LNd8Pseudo:
|
|
|
|
case ARM::VLD2LNd16Pseudo:
|
|
|
|
case ARM::VLD2LNd32Pseudo:
|
|
|
|
case ARM::VLD2LNq16Pseudo:
|
|
|
|
case ARM::VLD2LNq32Pseudo:
|
|
|
|
case ARM::VLD2LNd8Pseudo_UPD:
|
|
|
|
case ARM::VLD2LNd16Pseudo_UPD:
|
|
|
|
case ARM::VLD2LNd32Pseudo_UPD:
|
|
|
|
case ARM::VLD2LNq16Pseudo_UPD:
|
|
|
|
case ARM::VLD2LNq32Pseudo_UPD:
|
|
|
|
case ARM::VLD3LNd8Pseudo:
|
|
|
|
case ARM::VLD3LNd16Pseudo:
|
|
|
|
case ARM::VLD3LNd32Pseudo:
|
|
|
|
case ARM::VLD3LNq16Pseudo:
|
|
|
|
case ARM::VLD3LNq32Pseudo:
|
|
|
|
case ARM::VLD3LNd8Pseudo_UPD:
|
|
|
|
case ARM::VLD3LNd16Pseudo_UPD:
|
|
|
|
case ARM::VLD3LNd32Pseudo_UPD:
|
|
|
|
case ARM::VLD3LNq16Pseudo_UPD:
|
|
|
|
case ARM::VLD3LNq32Pseudo_UPD:
|
|
|
|
case ARM::VLD4LNd8Pseudo:
|
|
|
|
case ARM::VLD4LNd16Pseudo:
|
|
|
|
case ARM::VLD4LNd32Pseudo:
|
|
|
|
case ARM::VLD4LNq16Pseudo:
|
|
|
|
case ARM::VLD4LNq32Pseudo:
|
|
|
|
case ARM::VLD4LNd8Pseudo_UPD:
|
|
|
|
case ARM::VLD4LNd16Pseudo_UPD:
|
|
|
|
case ARM::VLD4LNd32Pseudo_UPD:
|
|
|
|
case ARM::VLD4LNq16Pseudo_UPD:
|
|
|
|
case ARM::VLD4LNq32Pseudo_UPD:
|
2010-11-03 05:18:25 +08:00
|
|
|
case ARM::VST1LNq8Pseudo:
|
|
|
|
case ARM::VST1LNq16Pseudo:
|
|
|
|
case ARM::VST1LNq32Pseudo:
|
|
|
|
case ARM::VST1LNq8Pseudo_UPD:
|
|
|
|
case ARM::VST1LNq16Pseudo_UPD:
|
|
|
|
case ARM::VST1LNq32Pseudo_UPD:
|
2010-09-14 07:01:35 +08:00
|
|
|
case ARM::VST2LNd8Pseudo:
|
|
|
|
case ARM::VST2LNd16Pseudo:
|
|
|
|
case ARM::VST2LNd32Pseudo:
|
|
|
|
case ARM::VST2LNq16Pseudo:
|
|
|
|
case ARM::VST2LNq32Pseudo:
|
|
|
|
case ARM::VST2LNd8Pseudo_UPD:
|
|
|
|
case ARM::VST2LNd16Pseudo_UPD:
|
|
|
|
case ARM::VST2LNd32Pseudo_UPD:
|
|
|
|
case ARM::VST2LNq16Pseudo_UPD:
|
|
|
|
case ARM::VST2LNq32Pseudo_UPD:
|
|
|
|
case ARM::VST3LNd8Pseudo:
|
|
|
|
case ARM::VST3LNd16Pseudo:
|
|
|
|
case ARM::VST3LNd32Pseudo:
|
|
|
|
case ARM::VST3LNq16Pseudo:
|
|
|
|
case ARM::VST3LNq32Pseudo:
|
|
|
|
case ARM::VST3LNd8Pseudo_UPD:
|
|
|
|
case ARM::VST3LNd16Pseudo_UPD:
|
|
|
|
case ARM::VST3LNd32Pseudo_UPD:
|
|
|
|
case ARM::VST3LNq16Pseudo_UPD:
|
|
|
|
case ARM::VST3LNq32Pseudo_UPD:
|
|
|
|
case ARM::VST4LNd8Pseudo:
|
|
|
|
case ARM::VST4LNd16Pseudo:
|
|
|
|
case ARM::VST4LNd32Pseudo:
|
|
|
|
case ARM::VST4LNq16Pseudo:
|
|
|
|
case ARM::VST4LNq32Pseudo:
|
|
|
|
case ARM::VST4LNd8Pseudo_UPD:
|
|
|
|
case ARM::VST4LNd16Pseudo_UPD:
|
|
|
|
case ARM::VST4LNd32Pseudo_UPD:
|
|
|
|
case ARM::VST4LNq16Pseudo_UPD:
|
|
|
|
case ARM::VST4LNq32Pseudo_UPD:
|
|
|
|
ExpandLaneOp(MBBI);
|
2011-01-20 16:34:58 +08:00
|
|
|
return true;
|
|
|
|
|
2011-12-16 06:27:11 +08:00
|
|
|
case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
|
|
|
|
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
|
|
|
|
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
|
|
|
|
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
|
2016-04-19 05:48:55 +08:00
|
|
|
|
|
|
|
case ARM::CMP_SWAP_8:
|
|
|
|
if (STI->isThumb())
|
|
|
|
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB,
|
|
|
|
ARM::tUXTB, NextMBBI);
|
|
|
|
else
|
|
|
|
return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB,
|
|
|
|
ARM::UXTB, NextMBBI);
|
|
|
|
case ARM::CMP_SWAP_16:
|
|
|
|
if (STI->isThumb())
|
|
|
|
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH,
|
|
|
|
ARM::tUXTH, NextMBBI);
|
|
|
|
else
|
|
|
|
return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH,
|
|
|
|
ARM::UXTH, NextMBBI);
|
|
|
|
case ARM::CMP_SWAP_32:
|
|
|
|
if (STI->isThumb())
|
|
|
|
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0,
|
|
|
|
NextMBBI);
|
|
|
|
else
|
|
|
|
return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
|
|
|
|
|
|
|
|
case ARM::CMP_SWAP_64:
|
|
|
|
return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
|
2019-08-17 07:30:16 +08:00
|
|
|
|
|
|
|
case ARM::tBL_PUSHLR:
|
|
|
|
case ARM::BL_PUSHLR: {
|
|
|
|
const bool Thumb = Opcode == ARM::tBL_PUSHLR;
|
|
|
|
Register Reg = MI.getOperand(0).getReg();
|
|
|
|
assert(Reg == ARM::LR && "expect LR register!");
|
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
if (Thumb) {
|
|
|
|
// push {lr}
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH))
|
|
|
|
.add(predOps(ARMCC::AL))
|
|
|
|
.addReg(Reg);
|
|
|
|
|
|
|
|
// bl __gnu_mcount_nc
|
|
|
|
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL));
|
|
|
|
} else {
|
|
|
|
// stmdb sp!, {lr}
|
|
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD))
|
|
|
|
.addReg(ARM::SP, RegState::Define)
|
|
|
|
.addReg(ARM::SP)
|
|
|
|
.add(predOps(ARMCC::AL))
|
|
|
|
.addReg(Reg);
|
|
|
|
|
|
|
|
// bl __gnu_mcount_nc
|
|
|
|
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
|
|
|
|
}
|
|
|
|
MIB.cloneMemRefs(MI);
|
|
|
|
for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i));
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2011-01-20 16:34:58 +08:00
|
|
|
}
|
|
|
|
}
|
2010-08-26 07:27:42 +08:00
|
|
|
|
2011-01-20 16:34:58 +08:00
|
|
|
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|
|
|
bool Modified = false;
|
|
|
|
|
|
|
|
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
|
|
|
while (MBBI != E) {
|
2014-03-02 20:27:27 +08:00
|
|
|
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
|
2016-04-19 05:48:55 +08:00
|
|
|
Modified |= ExpandMI(MBB, MBBI, NMBBI);
|
2009-11-07 07:52:48 +08:00
|
|
|
MBBI = NMBBI;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Modified;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
|
2015-01-29 08:19:33 +08:00
|
|
|
STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
|
|
|
|
TII = STI->getInstrInfo();
|
|
|
|
TRI = STI->getRegisterInfo();
|
2011-01-20 16:34:58 +08:00
|
|
|
AFI = MF.getInfo<ARMFunctionInfo>();
|
2009-11-07 07:52:48 +08:00
|
|
|
|
2019-05-24 16:25:02 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
|
|
|
|
<< "********** Function: " << MF.getName() << '\n');
|
|
|
|
|
2009-11-07 07:52:48 +08:00
|
|
|
bool Modified = false;
|
2017-07-20 20:35:37 +08:00
|
|
|
for (MachineBasicBlock &MBB : MF)
|
|
|
|
Modified |= ExpandMBB(MBB);
|
2011-07-29 08:27:32 +08:00
|
|
|
if (VerifyARMPseudo)
|
|
|
|
MF.verify(this, "After expanding ARM pseudo instructions.");
|
2019-05-24 16:25:02 +08:00
|
|
|
|
|
|
|
LLVM_DEBUG(dbgs() << "***************************************************\n");
|
2009-11-07 07:52:48 +08:00
|
|
|
return Modified;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
|
|
|
|
/// expansion pass.
|
|
|
|
FunctionPass *llvm::createARMExpandPseudoPass() {
|
|
|
|
return new ARMExpandPseudo();
|
|
|
|
}
|