ARM backend contribution from Apple.

llvm-svn: 33353
This commit is contained in:
Evan Cheng 2007-01-19 07:51:42 +00:00
parent 28c5b8618a
commit 10043e215b
32 changed files with 8730 additions and 2003 deletions

View File

@ -20,43 +20,77 @@
#include <cassert>
namespace llvm {
// Enums corresponding to ARM condition codes
namespace ARMCC {
enum CondCodes {
EQ,
NE,
CS,
CC,
MI,
PL,
VS,
VC,
HI,
LS,
GE,
LT,
GT,
LE,
AL
};
class ARMTargetMachine;
class FunctionPass;
// Enums corresponding to ARM condition codes
namespace ARMCC {
enum CondCodes {
EQ,
NE,
HS,
LO,
MI,
PL,
VS,
VC,
HI,
LS,
GE,
LT,
GT,
LE,
AL
};
inline static CondCodes getOppositeCondition(CondCodes CC){
switch (CC) {
default: assert(0 && "Unknown condition code");
case EQ: return NE;
case NE: return EQ;
case HS: return LO;
case LO: return HS;
case MI: return PL;
case PL: return MI;
case VS: return VC;
case VC: return VS;
case HI: return LS;
case LS: return HI;
case GE: return LT;
case LT: return GE;
case GT: return LE;
case LE: return GT;
}
}
}
namespace ARMShift {
enum ShiftTypes {
LSL,
LSR,
ASR,
ROR,
RRX
};
inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
switch (CC) {
default: assert(0 && "Unknown condition code");
case ARMCC::EQ: return "eq";
case ARMCC::NE: return "ne";
case ARMCC::HS: return "hs";
case ARMCC::LO: return "lo";
case ARMCC::MI: return "mi";
case ARMCC::PL: return "pl";
case ARMCC::VS: return "vs";
case ARMCC::VC: return "vc";
case ARMCC::HI: return "hi";
case ARMCC::LS: return "ls";
case ARMCC::GE: return "ge";
case ARMCC::LT: return "lt";
case ARMCC::GT: return "gt";
case ARMCC::LE: return "le";
case ARMCC::AL: return "al";
}
}
class FunctionPass;
class TargetMachine;
FunctionPass *createARMISelDag(ARMTargetMachine &TM);
FunctionPass *createARMCodePrinterPass(std::ostream &O, ARMTargetMachine &TM);
FunctionPass *createARMLoadStoreOptimizationPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createARMISelDag(TargetMachine &TM);
FunctionPass *createARMCodePrinterPass(std::ostream &OS, TargetMachine &TM);
FunctionPass *createARMFixMulPass();
} // end namespace llvm;
// Defines symbolic names for ARM registers. This defines a mapping from

View File

@ -17,6 +17,73 @@
include "../Target.td"
//===----------------------------------------------------------------------===//
// ARM Subtarget features.
//
def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
"ARM v4T">;
def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
"ARM v5T">;
def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
"ARM v5TE, v5TEj, v5TExp">;
def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
"ARM v6">;
def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFP2", "true",
"Enable VFP2 instructions ">;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
//
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
// V4 Processors.
def : Proc<"generic", []>;
def : Proc<"arm8", []>;
def : Proc<"arm810", []>;
def : Proc<"strongarm", []>;
def : Proc<"strongarm110", []>;
def : Proc<"strongarm1100", []>;
def : Proc<"strongarm1110", []>;
// V4T Processors.
def : Proc<"arm7tdmi", [ArchV4T]>;
def : Proc<"arm7tdmi-s", [ArchV4T]>;
def : Proc<"arm710t", [ArchV4T]>;
def : Proc<"arm720t", [ArchV4T]>;
def : Proc<"arm9", [ArchV4T]>;
def : Proc<"arm9tdmi", [ArchV4T]>;
def : Proc<"arm920", [ArchV4T]>;
def : Proc<"arm920t", [ArchV4T]>;
def : Proc<"arm922t", [ArchV4T]>;
def : Proc<"arm940t", [ArchV4T]>;
def : Proc<"ep9312", [ArchV4T]>;
// V5T Processors.
def : Proc<"arm10tdmi", [ArchV5T]>;
def : Proc<"arm1020t", [ArchV5T]>;
// V5TE Processors.
def : Proc<"arm9e", [ArchV5TE]>;
def : Proc<"arm946e-s", [ArchV5TE]>;
def : Proc<"arm966e-s", [ArchV5TE]>;
def : Proc<"arm968e-s", [ArchV5TE]>;
def : Proc<"arm10e", [ArchV5TE]>;
def : Proc<"arm1020e", [ArchV5TE]>;
def : Proc<"arm1022e", [ArchV5TE]>;
def : Proc<"xscale", [ArchV5TE]>;
def : Proc<"iwmmxt", [ArchV5TE]>;
// V6 Processors.
def : Proc<"arm1136j-s", [ArchV6]>;
def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>;
def : Proc<"arm1176jz-s", [ArchV6]>;
def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
def : Proc<"mpcorenovfp", [ArchV6]>;
def : Proc<"mpcore", [ArchV6, FeatureVFP2]>;
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@ -31,8 +98,14 @@ include "ARMInstrInfo.td"
def ARMInstrInfo : InstrInfo {
// Define how we want to layout our target-specific information field.
let TSFlagsFields = [];
let TSFlagsShifts = [];
let TSFlagsFields = ["AddrModeBits",
"SizeFlag",
"IndexModeBits",
"Opcode"];
let TSFlagsShifts = [0,
4,
7,
9];
}
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,394 @@
//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the ARM addressing mode implementation stuff.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
namespace llvm {
/// ARM_AM - ARM Addressing Mode Stuff
namespace ARM_AM {
enum ShiftOpc {
no_shift = 0,
asr,
lsl,
lsr,
ror,
rrx
};
enum AddrOpc {
add = '+', sub = '-'
};
static inline const char *getShiftOpcStr(ShiftOpc Op) {
switch (Op) {
default: assert(0 && "Unknown shift opc!");
case ARM_AM::asr: return "asr";
case ARM_AM::lsl: return "lsl";
case ARM_AM::lsr: return "lsr";
case ARM_AM::ror: return "ror";
case ARM_AM::rrx: return "rrx";
}
}
static inline ShiftOpc getShiftOpcForNode(SDOperand N) {
switch (N.getOpcode()) {
default: return ARM_AM::no_shift;
case ISD::SHL: return ARM_AM::lsl;
case ISD::SRL: return ARM_AM::lsr;
case ISD::SRA: return ARM_AM::asr;
case ISD::ROTR: return ARM_AM::ror;
//case ISD::ROTL: // Only if imm -> turn into ROTR.
// Can't handle RRX here, because it would require folding a flag into
// the addressing mode. :( This causes us to miss certain things.
//case ARMISD::RRX: return ARM_AM::rrx;
}
}
enum AMSubMode {
bad_am_submode = 0,
ia,
ib,
da,
db
};
static inline const char *getAMSubModeStr(AMSubMode Mode) {
switch (Mode) {
default: assert(0 && "Unknown addressing sub-mode!");
case ARM_AM::ia: return "ia";
case ARM_AM::ib: return "ib";
case ARM_AM::da: return "da";
case ARM_AM::db: return "db";
}
}
static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
switch (Mode) {
default: assert(0 && "Unknown addressing sub-mode!");
case ARM_AM::ia: return isLD ? "fd" : "ea";
case ARM_AM::ib: return isLD ? "ed" : "fa";
case ARM_AM::da: return isLD ? "fa" : "ed";
case ARM_AM::db: return isLD ? "ea" : "fd";
}
}
/// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
///
static inline unsigned rotr32(unsigned Val, unsigned Amt) {
assert(Amt < 32 && "Invalid rotate amount");
return (Val >> Amt) | (Val << ((32-Amt)&31));
}
/// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
///
static inline unsigned rotl32(unsigned Val, unsigned Amt) {
assert(Amt < 32 && "Invalid rotate amount");
return (Val << Amt) | (Val >> ((32-Amt)&31));
}
//===--------------------------------------------------------------------===//
// Addressing Mode #1: shift_operand with registers
//===--------------------------------------------------------------------===//
//
// This 'addressing mode' is used for arithmetic instructions. It can
// represent things like:
// reg
// reg [asr|lsl|lsr|ror|rrx] reg
// reg [asr|lsl|lsr|ror|rrx] imm
//
// This is stored three operands [rega, regb, opc]. The first is the base
// reg, the second is the shift amount (or reg0 if not present or imm). The
// third operand encodes the shift opcode and the imm if a reg isn't present.
//
static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
return ShOp | (Imm << 3);
}
static inline unsigned getSORegOffset(unsigned Op) {
return Op >> 3;
}
static inline ShiftOpc getSORegShOp(unsigned Op) {
return (ShiftOpc)(Op & 7);
}
/// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
/// the 8-bit imm value.
static inline unsigned getSOImmValImm(unsigned Imm) {
return Imm & 0xFF;
}
/// getSOImmValRotate - Given an encoded imm field for the reg/imm form, return
/// the rotate amount.
static inline unsigned getSOImmValRot(unsigned Imm) {
return (Imm >> 8) * 2;
}
/// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
/// computing the rotate amount to use. If this immediate value cannot be
/// handled with a single shifter-op, determine a good rotate amount that will
/// take a maximal chunk of bits out of the immediate.
static inline unsigned getSOImmValRotate(unsigned Imm) {
// 8-bit (or less) immediates are trivially shifter_operands with a rotate
// of zero.
if ((Imm & ~255U) == 0) return 0;
// Use CTZ to compute the rotate amount.
unsigned TZ = CountTrailingZeros_32(Imm);
// Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
// not 9.
unsigned RotAmt = TZ & ~1;
// If we can handle this spread, return it.
if ((rotr32(Imm, RotAmt) & ~255U) == 0)
return (32-RotAmt)&31; // HW rotates right, not left.
// For values like 0xF000000F, we should skip the first run of ones, then
// retry the hunt.
if (Imm & 1) {
unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF
// Restart the search for a high-order bit after the initial seconds of
// ones.
unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
// Rotate amount must be even.
unsigned RotAmt2 = TZ2 & ~1;
// If this fits, use it.
if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
return (32-RotAmt2)&31; // HW rotates right, not left.
}
}
// Otherwise, we have no way to cover this span of bits with a single
// shifter_op immediate. Return a chunk of bits that will be useful to
// handle.
return (32-RotAmt)&31; // HW rotates right, not left.
}
/// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
/// into an shifter_operand immediate operand, return the 12-bit encoding for
/// it. If not, return -1.
static inline int getSOImmVal(unsigned Arg) {
// 8-bit (or less) immediates are trivially shifter_operands with a rotate
// of zero.
if ((Arg & ~255U) == 0) return Arg;
unsigned RotAmt = getSOImmValRotate(Arg);
// If this cannot be handled with a single shifter_op, bail out.
if (rotr32(~255U, RotAmt) & Arg)
return -1;
// Encode this correctly.
return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
}
/// isSOImmTwoPartVal - Return true if the specified value can be obtained by
/// or'ing together two SOImmVal's.
static inline bool isSOImmTwoPartVal(unsigned V) {
// If this can be handled with a single shifter_op, bail out.
V = rotr32(~255U, getSOImmValRotate(V)) & V;
if (V == 0)
return false;
// If this can be handled with two shifter_op's, accept.
V = rotr32(~255U, getSOImmValRotate(V)) & V;
return V == 0;
}
/// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
/// return the first chunk of it.
static inline unsigned getSOImmTwoPartFirst(unsigned V) {
return rotr32(255U, getSOImmValRotate(V)) & V;
}
/// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
/// return the second chunk of it.
static inline unsigned getSOImmTwoPartSecond(unsigned V) {
// Mask out the first hunk.
V = rotr32(~255U, getSOImmValRotate(V)) & V;
// Take what's left.
assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
return V;
}
/// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
/// by a left shift. Returns the shift amount to use.
static inline unsigned getThumbImmValShift(unsigned Imm) {
// 8-bit (or less) immediates are trivially immediate operand with a shift
// of zero.
if ((Imm & ~255U) == 0) return 0;
// Use CTZ to compute the shift amount.
return CountTrailingZeros_32(Imm);
}
/// isThumbImmShiftedVal - Return true if the specified value can be obtained
/// by left shifting a 8-bit immediate.
static inline bool isThumbImmShiftedVal(unsigned V) {
// If this can be handled with
V = (~255U << getThumbImmValShift(V)) & V;
return V == 0;
}
/// getThumbImmNonShiftedVal - If V is a value that satisfies
/// isThumbImmShiftedVal, return the non-shiftd value.
static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
return V >> getThumbImmValShift(V);
}
//===--------------------------------------------------------------------===//
// Addressing Mode #2
//===--------------------------------------------------------------------===//
//
// This is used for most simple load/store instructions.
//
// addrmode2 := reg +/- reg shop imm
// addrmode2 := reg +/- imm12
//
// The first operand is always a Reg. The second operand is a reg if in
// reg/reg form, otherwise it's reg#0. The third field encodes the operation
// in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
//
// If this addressing mode is a frame index (before prolog/epilog insertion
// and code rewriting), this operand will have the form: FI#, reg0, <offs>
// with no shift amount for the frame offset.
//
static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
assert(Imm12 < (1 << 12) && "Imm too large!");
bool isSub = Opc == sub;
return Imm12 | ((int)isSub << 12) | (SO << 13);
}
static inline unsigned getAM2Offset(unsigned AM2Opc) {
return AM2Opc & ((1 << 12)-1);
}
static inline AddrOpc getAM2Op(unsigned AM2Opc) {
return ((AM2Opc >> 12) & 1) ? sub : add;
}
static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
return (ShiftOpc)(AM2Opc >> 13);
}
//===--------------------------------------------------------------------===//
// Addressing Mode #3
//===--------------------------------------------------------------------===//
//
// This is used for sign-extending loads, and load/store-pair instructions.
//
// addrmode3 := reg +/- reg
// addrmode3 := reg +/- imm8
//
// The first operand is always a Reg. The second operand is a reg if in
// reg/reg form, otherwise it's reg#0. The third field encodes the operation
// in bit 8, the immediate in bits 0-7.
/// getAM3Opc - This function encodes the addrmode3 opc field.
static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
bool isSub = Opc == sub;
return ((int)isSub << 8) | Offset;
}
static inline unsigned char getAM3Offset(unsigned AM3Opc) {
return AM3Opc & 0xFF;
}
static inline AddrOpc getAM3Op(unsigned AM3Opc) {
return ((AM3Opc >> 8) & 1) ? sub : add;
}
//===--------------------------------------------------------------------===//
// Addressing Mode #4
//===--------------------------------------------------------------------===//
//
// This is used for load / store multiple instructions.
//
// addrmode4 := reg, <mode>
//
// The four modes are:
// IA - Increment after
// IB - Increment before
// DA - Decrement after
// DB - Decrement before
//
// If the 4th bit (writeback)is set, then the base register is updated after
// the memory transfer.
static inline AMSubMode getAM4SubMode(unsigned Mode) {
return (AMSubMode)(Mode & 0x7);
}
static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
return (int)SubMode | ((int)WB << 3);
}
static inline bool getAM4WBFlag(unsigned Mode) {
return (Mode >> 3) & 1;
}
//===--------------------------------------------------------------------===//
// Addressing Mode #5
//===--------------------------------------------------------------------===//
//
// This is used for coprocessor instructions, such as FP load/stores.
//
// addrmode5 := reg +/- imm8*4
//
// The first operand is always a Reg. The third field encodes the operation
// in bit 8, the immediate in bits 0-7.
//
// This can also be used for FP load/store multiple ops. The third field encodes
// writeback mode in bit 8, the number of registers (or 2 times the number of
// registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
// following two sub-modes:
//
// IA - Increment after
// DB - Decrement before
/// getAM5Opc - This function encodes the addrmode5 opc field.
static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
bool isSub = Opc == sub;
return ((int)isSub << 8) | Offset;
}
static inline unsigned char getAM5Offset(unsigned AM5Opc) {
return AM5Opc & 0xFF;
}
static inline AddrOpc getAM5Op(unsigned AM5Opc) {
return ((AM5Opc >> 8) & 1) ? sub : add;
}
/// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
/// FSTM instructions.
static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
unsigned char Offset) {
assert((SubMode == ia || SubMode == db) &&
"Illegal addressing mode 5 sub-mode!");
return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
}
static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
return (AMSubMode)((AM5Opc >> 9) & 0x7);
}
static inline bool getAM5WBFlag(unsigned AM5Opc) {
return ((AM5Opc >> 8) & 1);
}
} // end namespace ARM_AM
} // end namespace llvm
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,84 +0,0 @@
//===-- ARMCommon.cpp - Define support functions for ARM --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the "Instituto Nokia de Tecnologia" and
// is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//
//===----------------------------------------------------------------------===//
#include "ARMCommon.h"
static inline unsigned rotateL(unsigned x, unsigned n){
return ((x << n) | (x >> (32 - n)));
}
static inline unsigned rotateR(unsigned x, unsigned n){
return ((x >> n) | (x << (32 - n)));
}
// finds the end position of largest sequence of zeros in binary representation
// of 'immediate'.
static int findLargestZeroSequence(unsigned immediate){
int max_zero_pos = 0;
int max_zero_length = 0;
int zero_pos;
int zero_length;
int pos = 0;
int end_pos;
while ((immediate & 0x3) == 0) {
immediate = rotateR(immediate, 2);
pos+=2;
}
end_pos = pos+32;
while (pos<end_pos){
while (((immediate & 0x3) != 0)&&(pos<end_pos)) {
immediate = rotateR(immediate, 2);
pos+=2;
}
zero_pos = pos;
while (((immediate & 0x3) == 0)&&(pos<end_pos)) {
immediate = rotateR(immediate, 2);
pos+=2;
}
zero_length = pos - zero_pos;
if (zero_length > max_zero_length){
max_zero_length = zero_length;
max_zero_pos = zero_pos % 32;
}
}
return (max_zero_pos + max_zero_length) % 32;
}
std::vector<unsigned> splitImmediate(unsigned immediate){
std::vector<unsigned> immediatePieces;
if (immediate == 0){
immediatePieces.push_back(0);
} else {
int start_pos = findLargestZeroSequence(immediate);
unsigned immediate_tmp = rotateR(immediate, start_pos);
int pos = 0;
while (pos < 32){
while(((immediate_tmp&0x3) == 0)&&(pos<32)){
immediate_tmp = rotateR(immediate_tmp,2);
pos+=2;
}
if (pos < 32){
immediatePieces.push_back(rotateL(immediate_tmp&0xFF,
(start_pos + pos) % 32 ));
immediate_tmp = rotateR(immediate_tmp,8);
pos+=8;
}
}
}
return immediatePieces;
}

View File

@ -1,22 +0,0 @@
//===-- ARMCommon.h - Define support functions for ARM ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the "Instituto Nokia de Tecnologia" and
// is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//
//===----------------------------------------------------------------------===//
#ifndef ARM_COMMON_H
#define ARM_COMMON_H
#include <vector>
std::vector<unsigned> splitImmediate(unsigned immediate);
#endif

View File

@ -0,0 +1,490 @@
//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that splits the constant pool up into 'islands'
// which are scattered through-out the function. This is required due to the
// limited pc-relative displacements that ARM has.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
#include "ARMInstrInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include <iostream>
using namespace llvm;
STATISTIC(NumSplit, "Number of uncond branches inserted");
namespace {
/// ARMConstantIslands - Due to limited pc-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
/// inside a function. To do this, it completely ignores the normal LLVM
/// constant pool, instead, it places constants where-ever it feels like with
/// special instructions.
///
/// The terminology used in this pass includes:
/// Islands - Clumps of constants placed in the function.
/// Water - Potential places where an island could be formed.
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
/// NextUID - Assign unique ID's to CPE's.
unsigned NextUID;
/// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
/// by MBB Number.
std::vector<unsigned> BBSizes;
/// WaterList - A sorted list of basic blocks where islands could be placed
/// (i.e. blocks that don't fall through to the following block, due
/// to a return, unreachable, or unconditional branch).
std::vector<MachineBasicBlock*> WaterList;
/// CPUser - One user of a constant pool, keeping the machine instruction
/// pointer, the constant pool being referenced, and the max displacement
/// allowed from the instruction to the CP.
struct CPUser {
MachineInstr *MI;
MachineInstr *CPEMI;
unsigned MaxDisp;
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
: MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
};
/// CPUsers - Keep track of all of the machine instructions that use various
/// constant pools and their max displacement.
std::vector<CPUser> CPUsers;
const TargetInstrInfo *TII;
const TargetAsmInfo *TAI;
public:
virtual bool runOnMachineFunction(MachineFunction &Fn);
virtual const char *getPassName() const {
return "ARM constant island placement pass";
}
private:
void DoInitialPlacement(MachineFunction &Fn,
std::vector<MachineInstr*> &CPEMIs);
void InitialFunctionScan(MachineFunction &Fn,
const std::vector<MachineInstr*> &CPEMIs);
void SplitBlockBeforeInstr(MachineInstr *MI);
bool HandleConstantPoolUser(MachineFunction &Fn, CPUser &U);
void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
unsigned GetInstSize(MachineInstr *MI) const;
unsigned GetOffsetOf(MachineInstr *MI) const;
};
}
/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
/// optimization pass.
FunctionPass *llvm::createARMConstantIslandPass() {
return new ARMConstantIslands();
}
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
// If there are no constants, there is nothing to do.
MachineConstantPool &MCP = *Fn.getConstantPool();
if (MCP.isEmpty()) return false;
TII = Fn.getTarget().getInstrInfo();
TAI = Fn.getTarget().getTargetAsmInfo();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
Fn.RenumberBlocks();
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
DoInitialPlacement(Fn, CPEMIs);
/// The next UID to take is the first unused one.
NextUID = CPEMIs.size();
// Do the initial scan of the function, building up information about the
// sizes of each block, the location of all the water, and finding all of the
// constant pool users.
InitialFunctionScan(Fn, CPEMIs);
CPEMIs.clear();
// Iteratively place constant pool entries until there is no change.
bool MadeChange;
do {
MadeChange = false;
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
MadeChange |= HandleConstantPoolUser(Fn, CPUsers[i]);
} while (MadeChange);
BBSizes.clear();
WaterList.clear();
CPUsers.clear();
return true;
}
/// DoInitialPlacement - Perform the initial placement of the constant pool
/// entries. To start with, we put them all at the end of the function.
void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
std::vector<MachineInstr*> &CPEMIs){
// Create the basic block to hold the CPE's.
MachineBasicBlock *BB = new MachineBasicBlock();
Fn.getBasicBlockList().push_back(BB);
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs =
Fn.getConstantPool()->getConstants();
const TargetData &TD = *Fn.getTarget().getTargetData();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeSize(CPs[i].getType());
// Verify that all constant pool entries are a multiple of 4 bytes. If not,
// we would have to pad them out or something so that instructions stay
// aligned.
assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
MachineInstr *CPEMI =
BuildMI(BB, TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(i).addConstantPoolIndex(i).addImm(Size);
CPEMIs.push_back(CPEMI);
DEBUG(std::cerr << "Moved CPI#" << i << " to end of function as #"
<< i << "\n");
}
}
/// BBHasFallthrough - Return true of the specified basic block can fallthrough
/// into the block immediately after it.
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
return false;
MachineBasicBlock *NextBB = next(MBBI);
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I)
if (*I == NextBB)
return true;
return false;
}
/// InitialFunctionScan - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
const std::vector<MachineInstr*> &CPEMIs) {
for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
// If this block doesn't fall through into the next MBB, then this is
// 'water' that a constant pool island could be placed.
if (!BBHasFallthrough(&MBB))
WaterList.push_back(&MBB);
unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
// Add instruction size to MBBSize.
MBBSize += GetInstSize(I);
// Scan the instructions for constant pool operands.
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
if (I->getOperand(op).isConstantPoolIndex()) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
unsigned MaxOffs = 0;
// Basic size info comes from the TSFlags field.
unsigned TSFlags = I->getInstrDescriptor()->TSFlags;
switch (TSFlags & ARMII::AddrModeMask) {
default:
// Constant pool entries can reach anything.
if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
continue;
assert(0 && "Unknown addressing mode for CP reference!");
case ARMII::AddrMode1: // AM1: 8 bits << 2
MaxOffs = 1 << (8+2); // Taking the address of a CP entry.
break;
case ARMII::AddrMode2:
MaxOffs = 1 << 12; // +-offset_12
break;
case ARMII::AddrMode3:
MaxOffs = 1 << 8; // +-offset_8
break;
// addrmode4 has no immediate offset.
case ARMII::AddrMode5:
MaxOffs = 1 << (8+2); // +-(offset_8*4)
break;
case ARMII::AddrModeT1:
MaxOffs = 1 << 5;
break;
case ARMII::AddrModeT2:
MaxOffs = 1 << (5+1);
break;
case ARMII::AddrModeT4:
MaxOffs = 1 << (5+2);
break;
}
// Remember that this is a user of a CP entry.
MachineInstr *CPEMI =CPEMIs[I->getOperand(op).getConstantPoolIndex()];
CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
// Instructions can only use one CP entry, don't bother scanning the
// rest of the operands.
break;
}
}
BBSizes.push_back(MBBSize);
}
}
/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
unsigned JTI) DISABLE_INLINE;
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
unsigned JTI) {
return JT[JTI].MBBs.size();
}
/// GetInstSize - Return the size of the specified MachineInstr.
///
unsigned ARMConstantIslands::GetInstSize(MachineInstr *MI) const {
// Basic size info comes from the TSFlags field.
unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
default:
// If this machine instr is an inline asm, measure it.
if (MI->getOpcode() == ARM::INLINEASM)
return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
assert(0 && "Unknown or unset size field for instr!");
break;
case ARMII::Size8Bytes: return 8; // Arm instruction x 2.
case ARMII::Size4Bytes: return 4; // Arm instruction.
case ARMII::Size2Bytes: return 2; // Thumb instruction.
case ARMII::SizeSpecial: {
switch (MI->getOpcode()) {
case ARM::CONSTPOOL_ENTRY:
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI->getOperand(2).getImm();
case ARM::BR_JTr:
case ARM::BR_JTm:
case ARM::BR_JTadd: {
// These are jumptable branches, i.e. a branch followed by an inlined
// jumptable. The size is 4 + 4 * number of entries.
unsigned JTI = MI->getOperand(MI->getNumOperands()-2).getJumpTableIndex();
const MachineFunction *MF = MI->getParent()->getParent();
MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
assert(JTI < JT.size());
return getNumJTEntries(JT, JTI) * 4 + 4;
}
default:
// Otherwise, pseudo-instruction sizes are zero.
return 0;
}
}
}
}
/// GetOffsetOf - Return the current offset of the specified machine instruction
/// from the start of the function. This offset changes as stuff is moved
/// around inside the function.
unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
MachineBasicBlock *MBB = MI->getParent();
// The offset is composed of two things: the sum of the sizes of all MBB's
// before this instruction's block, and the offset from the start of the block
// it is in.
unsigned Offset = 0;
// Sum block sizes before MBB.
for (unsigned BB = 0, e = MBB->getNumber(); BB != e; ++BB)
Offset += BBSizes[BB];
// Sum instructions before MI in MBB.
for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
assert(I != MBB->end() && "Didn't find MI in its own basic block?");
if (&*I == MI) return Offset;
Offset += GetInstSize(I);
}
}
/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
/// ID.
static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
const MachineBasicBlock *RHS) {
return LHS->getNumber() < RHS->getNumber();
}
/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
/// machine function, it upsets all of the block numbers. Renumber the blocks
/// and update the arrays that parallel this numbering.
void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
// Renumber the MBB's to keep them consequtive.
NewBB->getParent()->RenumberBlocks(NewBB);
// Insert a size into BBSizes to align it properly with the (newly
// renumbered) block numbers.
BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
// Next, update WaterList. Specifically, we need to add NewMBB as having
// available water after it.
std::vector<MachineBasicBlock*>::iterator IP =
std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
CompareMBBNumbers);
WaterList.insert(IP, NewBB);
}
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update datastructures and renumber blocks to
/// account for this change.
void ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB = new MachineBasicBlock(OrigBB->getBasicBlock());
MachineFunction::iterator MBBI = OrigBB; ++MBBI;
OrigBB->getParent()->getBasicBlockList().insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
// Add an unconditional branch from OrigBB to NewBB.
BuildMI(OrigBB, TII->get(ARM::B)).addMBB(NewBB);
NumSplit++;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
while (!OrigBB->succ_empty()) {
MachineBasicBlock *Succ = *OrigBB->succ_begin();
OrigBB->removeSuccessor(Succ);
NewBB->addSuccessor(Succ);
// This pass should be run after register allocation, so there should be no
// PHI nodes to update.
assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
&& "PHI nodes should be eliminated by now!");
}
// OrigBB branches to NewBB.
OrigBB->addSuccessor(NewBB);
// Update internal data structures to account for the newly inserted MBB.
UpdateForInsertedWaterBlock(NewBB);
// Figure out how large the first NewMBB is.
unsigned NewBBSize = 0;
for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
I != E; ++I)
NewBBSize += GetInstSize(I);
// Set the size of NewBB in BBSizes.
BBSizes[NewBB->getNumber()] = NewBBSize;
// We removed instructions from UserMBB, subtract that off from its size.
// Add 4 to the block to count the unconditional branch we added to it.
BBSizes[OrigBB->getNumber()] -= NewBBSize-4;
}
/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
/// is out-of-range. If so, pick it up the constant pool value and move it some
/// place in-range.
bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, CPUser &U){
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
unsigned UserOffset = GetOffsetOf(UserMI);
unsigned CPEOffset = GetOffsetOf(CPEMI);
DEBUG(std::cerr << "User of CPE#" << CPEMI->getOperand(0).getImm()
<< " max delta=" << U.MaxDisp
<< " at offset " << int(UserOffset-CPEOffset) << "\t"
<< *UserMI);
// Check to see if the CPE is already in-range.
if (UserOffset < CPEOffset) {
// User before the CPE.
if (CPEOffset-UserOffset <= U.MaxDisp)
return false;
} else {
if (UserOffset-CPEOffset <= U.MaxDisp)
return false;
}
// Solution guaranteed to work: split the user's MBB right before the user and
// insert a clone the CPE into the newly created water.
// If the user isn't at the start of its MBB, or if there is a fall-through
// into the user's MBB, split the MBB before the User.
MachineBasicBlock *UserMBB = UserMI->getParent();
if (&UserMBB->front() != UserMI ||
UserMBB == &Fn.front() || // entry MBB of function.
BBHasFallthrough(prior(MachineFunction::iterator(UserMBB)))) {
// TODO: Search for the best place to split the code. In practice, using
// loop nesting information to insert these guys outside of loops would be
// sufficient.
SplitBlockBeforeInstr(UserMI);
// UserMI's BB may have changed.
UserMBB = UserMI->getParent();
}
// Okay, we know we can put an island before UserMBB now, do it!
MachineBasicBlock *NewIsland = new MachineBasicBlock();
Fn.getBasicBlockList().insert(UserMBB, NewIsland);
// Update internal data structures to account for the newly inserted MBB.
UpdateForInsertedWaterBlock(NewIsland);
// Now that we have an island to add the CPE to, clone the original CPE and
// add it to the island.
unsigned ID = NextUID++;
unsigned CPI = CPEMI->getOperand(1).getConstantPoolIndex();
unsigned Size = CPEMI->getOperand(2).getImm();
// Build a new CPE for this user.
U.CPEMI = BuildMI(NewIsland, TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
// Increase the size of the island block to account for the new entry.
BBSizes[NewIsland->getNumber()] += Size;
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
if (UserMI->getOperand(i).isConstantPoolIndex()) {
UserMI->getOperand(i).setConstantPoolIndex(ID);
break;
}
DEBUG(std::cerr << " Moved CPE to #" << ID << " CPI=" << CPI << "\t"
<< *UserMI);
return true;
}

View File

@ -0,0 +1,55 @@
//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Evan Cheng and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ARM specific constantpool value class.
//
//===----------------------------------------------------------------------===//
#include "ARMConstantPoolValue.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/GlobalValue.h"
using namespace llvm;
ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
bool isNonLazy, unsigned char PCAdj)
: MachineConstantPoolValue((const Type*)gv->getType()),
GV(gv), LabelId(id), isNonLazyPtr(isNonLazy), PCAdjust(PCAdj) {}
int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
unsigned AlignMask = (1 << Alignment)-1;
const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
if (Constants[i].isMachineConstantPoolEntry() &&
(Constants[i].Offset & AlignMask) == 0) {
ARMConstantPoolValue *CPV =
(ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
if (CPV->GV == GV && CPV->LabelId == LabelId &&
CPV->isNonLazyPtr == isNonLazyPtr)
return i;
}
}
return -1;
}
void
ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
ID.AddPointer(GV);
ID.AddInteger(LabelId);
ID.AddInteger((unsigned)isNonLazyPtr);
ID.AddInteger(PCAdjust);
}
void ARMConstantPoolValue::print(std::ostream &O) const {
O << GV->getName();
if (isNonLazyPtr) O << "$non_lazy_ptr";
if (PCAdjust != 0) O << "-(LPIC" << LabelId << "+"
<< (unsigned)PCAdjust << ")";
}

View File

@ -0,0 +1,50 @@
//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Evan Cheng and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ARM specific constantpool value class.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
namespace llvm {
/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
/// represent PC relative displacement between the address of the load
/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
class ARMConstantPoolValue : public MachineConstantPoolValue {
GlobalValue *GV; // GlobalValue being loaded.
unsigned LabelId; // Label id of the load.
bool isNonLazyPtr; // True if loading a Mac OS X non_lazy_ptr stub.
unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative.
// 8 for ARM, 4 for Thumb.
public:
ARMConstantPoolValue(GlobalValue *gv, unsigned id, bool isNonLazy = false,
unsigned char PCAdj = 0);
GlobalValue *getGV() const { return GV; }
unsigned getLabelId() const { return LabelId; }
bool isNonLazyPointer() const { return isNonLazyPtr; }
unsigned char getPCAdjustment() const { return PCAdjust; }
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment);
virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
virtual void print(std::ostream &O) const;
};
}
#endif

View File

@ -17,17 +17,15 @@
#include "ARM.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "ARMSubtarget.h"
namespace llvm {
class ARMFrameInfo: public TargetFrameInfo {
class ARMFrameInfo : public TargetFrameInfo {
public:
ARMFrameInfo()
: TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
ARMFrameInfo(const ARMSubtarget &ST)
: TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
}
};
} // End llvm namespace

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,134 @@
//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Evan Cheng and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that ARM uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#ifndef ARMISELLOWERING_H
#define ARMISELLOWERING_H
#include "llvm/Target/TargetLowering.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include <vector>
namespace llvm {
class ARMConstantPoolValue;
class ARMSubtarget;
namespace ARMISD {
// ARM Specific DAG Nodes
enum NodeType {
// Start the numbering where the builting ops and target ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END,
Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
// TargetExternalSymbol, and TargetGlobalAddress.
WrapperCall, // WrapperCall - Same as wrapper, but mark the wrapped
// node as call operand.
WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
CALL, // Function call.
CALL_NOLINK, // Function call with branch not branch-and-link.
tCALL, // Thumb function call.
BRCOND, // Conditional branch.
BR_JT, // Jumptable branch.
RET_FLAG, // Return with a flag operand.
PIC_ADD, // Add with a PC operand and a PIC label.
CMP, // ARM compare instructions.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
CNEG, // ARM conditional negate instructions.
FTOSI, // FP to sint within a FP register.
FTOUI, // FP to uint within a FP register.
SITOF, // sint to FP within a FP register.
UITOF, // uint to FP within a FP register.
MULHILOU, // Lo,Hi = umul LHS, RHS.
MULHILOS, // Lo,Hi = smul LHS, RHS.
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
FMRRD, // double to two gprs.
FMDRR // Two gprs to double.
};
}
//===----------------------------------------------------------------------===//
// ARMTargetLowering - X86 Implementation of the TargetLowering interface
class ARMTargetLowering : public TargetLowering {
int VarArgsFrameIndex; // FrameIndex for start of varargs area.
public:
ARMTargetLowering(TargetMachine &TM);
virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
virtual const char *getTargetNodeName(unsigned Opcode) const;
virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
MachineBasicBlock *MBB);
/// isLegalAddressImmediate - Return true if the integer value or
/// GlobalValue can be used as the offset of the target addressing mode.
virtual bool isLegalAddressImmediate(int64_t V) const;
virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
virtual bool getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
SDOperand &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG);
/// getPostIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if this node can be
/// combined with a load / store to form a post-indexed load / store.
virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDOperand &Base, SDOperand &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG);
virtual void computeMaskedBitsForTargetNode(const SDOperand Op,
uint64_t Mask,
uint64_t &KnownZero,
uint64_t &KnownOne,
unsigned Depth) const;
ConstraintType getConstraintType(char ConstraintLetter) const;
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT::ValueType VT) const;
std::vector<unsigned>
getRegClassForInlineAsmConstraint(const std::string &Constraint,
MVT::ValueType VT) const;
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
/// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;
SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
};
}
#endif // ARMISELLOWERING_H

View File

@ -14,46 +14,409 @@
#include "ARMInstrInfo.h"
#include "ARM.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
ARMInstrInfo::ARMInstrInfo()
static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
: TargetInstrInfo(ARMInsts, sizeof(ARMInsts)/sizeof(ARMInsts[0])),
RI(*this) {
RI(*this, STI) {
}
unsigned ARMInstrInfo::getDWARF_LABELOpcode() const {
return ARM::DWARF_LABEL;
}
const TargetRegisterClass *ARMInstrInfo::getPointerRegClass() const {
return &ARM::IntRegsRegClass;
return &ARM::GPRRegClass;
}
/// Return true if the instruction is a register to register move and
/// leave the source and dest operands in the passed parameters.
///
bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg) const {
unsigned &SrcReg, unsigned &DstReg) const {
MachineOpCode oc = MI.getOpcode();
switch (oc) {
case ARM::MOV: {
assert(MI.getNumOperands() == 4 &&
MI.getOperand(0).isRegister() &&
default:
return false;
case ARM::FCPYS:
case ARM::FCPYD:
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
return true;
case ARM::MOVrr:
case ARM::tMOVrr:
assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() &&
MI.getOperand(1).isRegister() &&
"Invalid ARM MOV instruction");
const MachineOperand &Arg = MI.getOperand(1);
const MachineOperand &Shift = MI.getOperand(2);
if (Arg.isRegister() && Shift.isImmediate() && Shift.getImmedValue() == 0) {
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
return true;
}
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
return true;
}
}
return false;
}
void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const std::vector<MachineOperand> &Cond)const{
// Can only insert uncond branches so far.
assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
BuildMI(&MBB, get(ARM::b)).addMBB(TBB);
unsigned ARMInstrInfo::isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const{
switch (MI->getOpcode()) {
default: break;
case ARM::LDR:
if (MI->getOperand(1).isFrameIndex() &&
MI->getOperand(2).isReg() &&
MI->getOperand(3).isImmediate() &&
MI->getOperand(2).getReg() == 0 &&
MI->getOperand(3).getImmedValue() == 0) {
FrameIndex = MI->getOperand(1).getFrameIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::FLDD:
case ARM::FLDS:
if (MI->getOperand(1).isFrameIndex() &&
MI->getOperand(2).isImmediate() &&
MI->getOperand(2).getImmedValue() == 0) {
FrameIndex = MI->getOperand(1).getFrameIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::tLDRspi:
if (MI->getOperand(1).isFrameIndex() &&
MI->getOperand(2).isImmediate() &&
MI->getOperand(2).getImmedValue() == 0) {
FrameIndex = MI->getOperand(1).getFrameIndex();
return MI->getOperand(0).getReg();
}
break;
}
return 0;
}
unsigned ARMInstrInfo::isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const {
switch (MI->getOpcode()) {
default: break;
case ARM::STR:
if (MI->getOperand(1).isFrameIndex() &&
MI->getOperand(2).isReg() &&
MI->getOperand(3).isImmediate() &&
MI->getOperand(2).getReg() == 0 &&
MI->getOperand(3).getImmedValue() == 0) {
FrameIndex = MI->getOperand(1).getFrameIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::FSTD:
case ARM::FSTS:
if (MI->getOperand(1).isFrameIndex() &&
MI->getOperand(2).isImmediate() &&
MI->getOperand(2).getImmedValue() == 0) {
FrameIndex = MI->getOperand(1).getFrameIndex();
return MI->getOperand(0).getReg();
}
break;
case ARM::tSTRspi:
if (MI->getOperand(1).isFrameIndex() &&
MI->getOperand(2).isImmediate() &&
MI->getOperand(2).getImmedValue() == 0) {
FrameIndex = MI->getOperand(1).getFrameIndex();
return MI->getOperand(0).getReg();
}
break;
}
return 0;
}
static unsigned getUnindexedOpcode(unsigned Opc) {
switch (Opc) {
default: break;
case ARM::LDR_PRE:
case ARM::LDR_POST:
return ARM::LDR;
case ARM::LDRH_PRE:
case ARM::LDRH_POST:
return ARM::LDRH;
case ARM::LDRB_PRE:
case ARM::LDRB_POST:
return ARM::LDRB;
case ARM::LDRSH_PRE:
case ARM::LDRSH_POST:
return ARM::LDRSH;
case ARM::LDRSB_PRE:
case ARM::LDRSB_POST:
return ARM::LDRSB;
case ARM::STR_PRE:
case ARM::STR_POST:
return ARM::STR;
case ARM::STRH_PRE:
case ARM::STRH_POST:
return ARM::STRH;
case ARM::STRB_PRE:
case ARM::STRB_POST:
return ARM::STRB;
}
return 0;
}
MachineInstr *
ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables &LV) const {
if (!EnableARM3Addr)
return NULL;
MachineInstr *MI = MBBI;
unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
default: return NULL;
case ARMII::IndexModePre:
isPre = true;
break;
case ARMII::IndexModePost:
break;
}
// Try spliting an indexed load / store to a un-indexed one plus an add/sub
// operation.
unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
if (MemOpc == 0)
return NULL;
MachineInstr *UpdateMI = NULL;
MachineInstr *MemMI = NULL;
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
unsigned NumOps = MI->getNumOperands();
bool isLoad = (MI->getInstrDescriptor()->Flags & M_LOAD_FLAG) != 0;
const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
const MachineOperand &Base = MI->getOperand(2);
const MachineOperand &Offset = MI->getOperand(NumOps-2);
unsigned WBReg = WB.getReg();
unsigned BaseReg = Base.getReg();
unsigned OffReg = Offset.getReg();
unsigned OffImm = MI->getOperand(NumOps-1).getImm();
switch (AddrMode) {
default:
assert(false && "Unknown indexed op!");
return NULL;
case ARMII::AddrMode2: {
bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
unsigned Amt = ARM_AM::getAM2Offset(OffImm);
if (OffReg == 0) {
int SOImmVal = ARM_AM::getSOImmVal(Amt);
if (SOImmVal == -1)
// Can't encode it in a so_imm operand. This transformation will
// add more than 1 instruction. Abandon!
return NULL;
UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg).addImm(SOImmVal);
} else if (Amt != 0) {
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
UpdateMI = BuildMI(get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
.addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc);
} else
UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg).addReg(OffReg);
break;
}
case ARMII::AddrMode3 : {
bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
unsigned Amt = ARM_AM::getAM3Offset(OffImm);
if (OffReg == 0)
// Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg).addImm(Amt);
else
UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg).addReg(OffReg);
break;
}
}
std::vector<MachineInstr*> NewMIs;
if (isPre) {
if (isLoad)
MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
.addReg(WBReg).addReg(0).addImm(0);
else
MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
.addReg(WBReg).addReg(0).addImm(0);
NewMIs.push_back(MemMI);
NewMIs.push_back(UpdateMI);
} else {
if (isLoad)
MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
.addReg(BaseReg).addReg(0).addImm(0);
else
MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
.addReg(BaseReg).addReg(0).addImm(0);
if (WB.isDead())
UpdateMI->getOperand(0).setIsDead();
NewMIs.push_back(UpdateMI);
NewMIs.push_back(MemMI);
}
// Transfer LiveVariables states, kill / dead info.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isRegister() && MO.getReg() &&
MRegisterInfo::isVirtualRegister(MO.getReg())) {
unsigned Reg = MO.getReg();
LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
if (MO.isDef()) {
MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
if (MO.isDead())
LV.addVirtualRegisterDead(Reg, NewMI);
// Update the defining instruction.
if (VI.DefInst == MI)
VI.DefInst = NewMI;
}
if (MO.isUse() && MO.isKill()) {
for (unsigned j = 0; j < 2; ++j) {
// Look at the two new MI's in reverse order.
MachineInstr *NewMI = NewMIs[j];
MachineOperand *NMO = NewMI->findRegisterUseOperand(Reg);
if (!NMO)
continue;
LV.addVirtualRegisterKilled(Reg, NewMI);
if (VI.removeKill(MI))
VI.Kills.push_back(NewMI);
break;
}
}
}
}
MFI->insert(MBBI, NewMIs[1]);
MFI->insert(MBBI, NewMIs[0]);
return NewMIs[0];
}
// Branch analysis.
bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
std::vector<MachineOperand> &Cond) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode()))
return false;
// Get the last instruction in the block.
MachineInstr *LastInst = I;
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode())) {
if (LastOpc == ARM::B || LastOpc == ARM::tB) {
TBB = LastInst->getOperand(0).getMachineBasicBlock();
return false;
}
if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
// Block ends with fall-through condbranch.
TBB = LastInst->getOperand(0).getMachineBasicBlock();
Cond.push_back(LastInst->getOperand(1));
return false;
}
return true; // Can't handle indirect branch.
}
// Get the instruction before it if it is a terminator.
MachineInstr *SecondLastInst = I;
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() &&
isTerminatorInstr((--I)->getOpcode()))
return true;
// If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
unsigned SecondLastOpc = SecondLastInst->getOpcode();
if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
(SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
Cond.push_back(SecondLastInst->getOperand(1));
FBB = LastInst->getOperand(0).getMachineBasicBlock();
return false;
}
// Otherwise, can't handle this.
return true;
}
void ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return;
--I;
if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
return;
// Remove the branch.
I->eraseFromParent();
I = MBB.end();
if (I == MBB.begin()) return;
--I;
if (I->getOpcode() != BccOpc)
return;
// Remove the branch.
I->eraseFromParent();
}
void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const std::vector<MachineOperand> &Cond) const {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
"ARM branch conditions have two components!");
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch?
BuildMI(&MBB, get(BOpc)).addMBB(TBB);
else
BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm());
return;
}
// Two-way conditional branch.
BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm());
BuildMI(&MBB, get(BOpc)).addMBB(FBB);
}
bool ARMInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
if (MBB.empty()) return false;
switch (MBB.back().getOpcode()) {
case ARM::B:
case ARM::tB: // Uncond branch.
case ARM::BR_JTr: // Jumptable branch.
case ARM::BR_JTm: // Jumptable branch through mem.
case ARM::BR_JTadd: // Jumptable branch add to pc.
return true;
default: return false;
}
}
bool ARMInstrInfo::
ReverseBranchCondition(std::vector<MachineOperand> &Cond) const {
ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
Cond[0].setImm(ARMCC::getOppositeCondition(CC));
return false;
}

View File

@ -1,4 +1,4 @@
//===- ARMInstrInfo.h - ARM Instruction Information --------------*- C++ -*-===//
//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -19,11 +19,56 @@
#include "ARMRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
/// ARMII - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
namespace ARMII {
enum {
//===------------------------------------------------------------------===//
// Instruction Flags.
//===------------------------------------------------------------------===//
// This three-bit field describes the addressing mode used. Zero is unused
// so that we can tell if we forgot to set a value.
AddrModeMask = 0xf,
AddrMode1 = 1,
AddrMode2 = 2,
AddrMode3 = 3,
AddrMode4 = 4,
AddrMode5 = 5,
AddrModeT1 = 6,
AddrModeT2 = 7,
AddrModeT4 = 8,
AddrModeTs = 9, // i8 * 4 for pc and sp relative data
// Size* - Flags to keep track of the size of an instruction.
SizeShift = 4,
SizeMask = 7 << SizeShift,
SizeSpecial = 1, // 0 byte pseudo or special case.
Size8Bytes = 2,
Size4Bytes = 3,
Size2Bytes = 4,
// IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
// and store ops
IndexModeShift = 7,
IndexModeMask = 3 << IndexModeShift,
IndexModePre = 1,
IndexModePost = 2,
// Opcode
OpcodeShift = 9,
OpcodeMask = 0xf << OpcodeShift
};
}
class ARMInstrInfo : public TargetInstrInfo {
const ARMRegisterInfo RI;
public:
ARMInstrInfo();
ARMInstrInfo(const ARMSubtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
@ -35,15 +80,33 @@ public:
/// This is used for addressing modes.
virtual const TargetRegisterClass *getPointerRegClass() const;
/// getDWARF_LABELOpcode - Return the opcode of the target's DWARF_LABEL
/// instruction if it has one. This is used by codegen passes that update
/// DWARF line number info as they modify the code.
virtual unsigned getDWARF_LABELOpcode() const;
/// Return true if the instruction is a register to register move and
/// leave the source and dest operands in the passed parameters.
///
virtual bool isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg) const;
virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables &LV) const;
// Branch analysis.
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
std::vector<MachineOperand> &Cond) const;
virtual void RemoveBranch(MachineBasicBlock &MBB) const;
virtual void InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const std::vector<MachineOperand> &Cond) const;
virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const;
virtual bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const;
};
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,513 @@
//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the Thumb instruction set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Thumb specific DAG Nodes.
//
def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
// TI - Thumb instruction.
// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb];
}
class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb, HasV5T];
}
class ThumbI<dag ops, AddrMode am, SizeFlagVal sz,
string asm, string cstr, list<dag> pattern>
// FIXME: Set all opcodes to 0 for now.
: InstARM<0, am, sz, IndexModeNone, ops, asm, cstr> {
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb];
}
class TI<dag ops, string asm, list<dag> pattern>
: ThumbI<ops, AddrModeNone, Size2Bytes, asm, "", pattern>;
class TI1<dag ops, string asm, list<dag> pattern>
: ThumbI<ops, AddrModeT1, Size2Bytes, asm, "", pattern>;
class TI2<dag ops, string asm, list<dag> pattern>
: ThumbI<ops, AddrModeT2, Size2Bytes, asm, "", pattern>;
class TI4<dag ops, string asm, list<dag> pattern>
: ThumbI<ops, AddrModeT4, Size2Bytes, asm, "", pattern>;
class TIs<dag ops, string asm, list<dag> pattern>
: ThumbI<ops, AddrModeTs, Size2Bytes, asm, "", pattern>;
// Two-address instructions
class TIt<dag ops, string asm, list<dag> pattern>
: ThumbI<ops, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
// BL, BLX(1) are translated by assembler into two instructions
class TIx2<dag ops, string asm, list<dag> pattern>
: ThumbI<ops, AddrModeNone, Size4Bytes, asm, "", pattern>;
def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getValue(), MVT::i32);
}]>;
def imm_comp_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~((uint32_t)N->getValue()), MVT::i32);
}]>;
/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
def imm0_7 : PatLeaf<(i32 imm), [{
return (uint32_t)N->getValue() < 8;
}]>;
def imm0_7_neg : PatLeaf<(i32 imm), [{
return (uint32_t)-N->getValue() < 8;
}], imm_neg_XFORM>;
def imm0_255 : PatLeaf<(i32 imm), [{
return (uint32_t)N->getValue() < 256;
}]>;
def imm0_255_comp : PatLeaf<(i32 imm), [{
return ~((uint32_t)N->getValue()) < 256;
}]>;
def imm8_255 : PatLeaf<(i32 imm), [{
return (uint32_t)N->getValue() >= 8 && (uint32_t)N->getValue() < 256;
}]>;
def imm8_255_neg : PatLeaf<(i32 imm), [{
unsigned Val = -N->getValue();
return Val >= 8 && Val < 256;
}], imm_neg_XFORM>;
// Break imm's up into two pieces: an immediate + a left shift.
// This uses thumb_immshifted to match and thumb_immshifted_val and
// thumb_immshifted_shamt to get the val/shift pieces.
def thumb_immshifted : PatLeaf<(imm), [{
return ARM_AM::isThumbImmShiftedVal((unsigned)N->getValue());
}]>;
def thumb_immshifted_val : SDNodeXForm<imm, [{
unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getValue());
return CurDAG->getTargetConstant(V, MVT::i32);
}]>;
def thumb_immshifted_shamt : SDNodeXForm<imm, [{
unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getValue());
return CurDAG->getTargetConstant(V, MVT::i32);
}]>;
// Define Thumb specific addressing modes.
// t_addrmode_rr := reg + reg
//
def t_addrmode_rr : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
let PrintMethod = "printThumbAddrModeRROperand";
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg);
}
// t_addrmode_ri5_{1|2|4} := reg + imm5 * {1|2|4}
//
def t_addrmode_ri5_1 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_1", []> {
let PrintMethod = "printThumbAddrModeRI5_1Operand";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
def t_addrmode_ri5_2 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_2", []> {
let PrintMethod = "printThumbAddrModeRI5_2Operand";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
def t_addrmode_ri5_4 : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_4", []> {
let PrintMethod = "printThumbAddrModeRI5_4Operand";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
// t_addrmode_sp := sp + imm8 * 4
//
def t_addrmode_sp : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
let PrintMethod = "printThumbAddrModeSPOperand";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//
def tPICADD : TIt<(ops GPR:$dst, GPR:$lhs, pclabel:$cp),
"\n$cp:\n\tadd $dst, pc",
[(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
//
let isReturn = 1, isTerminator = 1 in
def tBX_RET : TI<(ops), "bx lr", [(ARMretflag)]>;
// FIXME: remove when we have a way to marking a MI with these properties.
let isLoad = 1, isReturn = 1, isTerminator = 1 in
def tPOP_RET : TI<(ops reglist:$dst1, variable_ops),
"pop $dst1", []>;
let isCall = 1, noResults = 1,
Defs = [R0, R1, R2, R3, LR,
D0, D1, D2, D3, D4, D5, D6, D7] in {
def tBL : TIx2<(ops i32imm:$func, variable_ops),
"bl ${func:call}",
[(ARMtcall tglobaladdr:$func)]>;
// ARMv5T and above
def tBLXi : TIx2<(ops i32imm:$func, variable_ops),
"blx ${func:call}",
[(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
def tBLXr : TI<(ops GPR:$dst, variable_ops),
"blx $dst",
[(ARMtcall GPR:$dst)]>, Requires<[HasV5T]>;
// ARMv4T
def tBX : TIx2<(ops GPR:$dst, variable_ops),
"cpy lr, pc\n\tbx $dst",
[(ARMcall_nolink GPR:$dst)]>;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1 in
def tB : TI<(ops brtarget:$dst), "b $dst", [(br bb:$dst)]>;
let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in
def tBcc : TI<(ops brtarget:$dst, CCOp:$cc), "b$cc $dst",
[(ARMbrcond bb:$dst, imm:$cc)]>;
//===----------------------------------------------------------------------===//
// Load Store Instructions.
//
let isLoad = 1 in {
def tLDRri : TI4<(ops GPR:$dst, t_addrmode_ri5_4:$addr),
"ldr $dst, $addr",
[(set GPR:$dst, (load t_addrmode_ri5_4:$addr))]>;
def tLDRrr : TI<(ops GPR:$dst, t_addrmode_rr:$addr),
"ldr $dst, $addr",
[(set GPR:$dst, (load t_addrmode_rr:$addr))]>;
// def tLDRpci
def tLDRspi : TIs<(ops GPR:$dst, t_addrmode_sp:$addr),
"ldr $dst, $addr",
[(set GPR:$dst, (load t_addrmode_sp:$addr))]>;
def tLDRBri : TI1<(ops GPR:$dst, t_addrmode_ri5_1:$addr),
"ldrb $dst, $addr",
[(set GPR:$dst, (zextloadi8 t_addrmode_ri5_1:$addr))]>;
def tLDRBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
"ldrb $dst, $addr",
[(set GPR:$dst, (zextloadi8 t_addrmode_rr:$addr))]>;
def tLDRHri : TI2<(ops GPR:$dst, t_addrmode_ri5_2:$addr),
"ldrh $dst, $addr",
[(set GPR:$dst, (zextloadi16 t_addrmode_ri5_2:$addr))]>;
def tLDRHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
"ldrh $dst, $addr",
[(set GPR:$dst, (zextloadi16 t_addrmode_rr:$addr))]>;
def tLDRSBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
"ldrsb $dst, $addr",
[(set GPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
def tLDRSHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
"ldrsh $dst, $addr",
[(set GPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
} // isLoad
let isStore = 1 in {
def tSTRri : TI4<(ops GPR:$src, t_addrmode_ri5_4:$addr),
"str $src, $addr",
[(store GPR:$src, t_addrmode_ri5_4:$addr)]>;
def tSTRrr : TI<(ops GPR:$src, t_addrmode_rr:$addr),
"str $src, $addr",
[(store GPR:$src, t_addrmode_rr:$addr)]>;
def tSTRspi : TIs<(ops GPR:$src, t_addrmode_sp:$addr),
"str $src, $addr",
[(store GPR:$src, t_addrmode_sp:$addr)]>;
def tSTRBri : TI1<(ops GPR:$src, t_addrmode_ri5_1:$addr),
"strb $src, $addr",
[(truncstorei8 GPR:$src, t_addrmode_ri5_1:$addr)]>;
def tSTRBrr : TI1<(ops GPR:$src, t_addrmode_rr:$addr),
"strb $src, $addr",
[(truncstorei8 GPR:$src, t_addrmode_rr:$addr)]>;
def tSTRHri : TI2<(ops GPR:$src, t_addrmode_ri5_2:$addr),
"strh $src, $addr",
[(truncstorei16 GPR:$src, t_addrmode_ri5_1:$addr)]>;
def tSTRHrr : TI2<(ops GPR:$src, t_addrmode_rr:$addr),
"strh $src, $addr",
[(truncstorei16 GPR:$src, t_addrmode_rr:$addr)]>;
}
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
// TODO: A7-44: LDMIA - load multiple
let isLoad = 1 in
def tPOP : TI<(ops reglist:$dst1, variable_ops),
"pop $dst1", []>;
let isStore = 1 in
def tPUSH : TI<(ops reglist:$src1, variable_ops),
"push $src1", []>;
//===----------------------------------------------------------------------===//
// Arithmetic Instructions.
//
def tADDi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
"add $dst, $lhs, $rhs",
[(set GPR:$dst, (add GPR:$lhs, imm0_7:$rhs))]>;
def tADDi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
"add $dst, $rhs",
[(set GPR:$dst, (add GPR:$lhs, imm8_255:$rhs))]>;
def tADDrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"add $dst, $lhs, $rhs",
[(set GPR:$dst, (add GPR:$lhs, GPR:$rhs))]>;
def tADDhirr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"add $dst, $rhs", []>;
def tADDrPCi : TI<(ops GPR:$dst, i32imm:$rhs),
"add $dst, pc, $rhs * 4", []>;
def tADDrSPi : TI<(ops GPR:$dst, GPR:$sp, i32imm:$rhs),
"add $dst, $sp, $rhs * 4", []>;
def tADDspi : TI<(ops GPR:$sp, i32imm:$rhs),
"add $sp, $rhs * 4", []>;
def tAND : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"and $dst, $rhs",
[(set GPR:$dst, (and GPR:$lhs, GPR:$rhs))]>;
def tASRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
"asr $dst, $lhs, $rhs",
[(set GPR:$dst, (sra GPR:$lhs, imm:$rhs))]>;
def tASRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"asr $dst, $rhs",
[(set GPR:$dst, (sra GPR:$lhs, GPR:$rhs))]>;
def tBIC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"bic $dst, $rhs",
[(set GPR:$dst, (and GPR:$lhs, (not GPR:$rhs)))]>;
def tCMN : TI<(ops GPR:$lhs, GPR:$rhs),
"cmn $lhs, $rhs",
[(ARMcmp GPR:$lhs, (ineg GPR:$rhs))]>;
def tCMPi8 : TI<(ops GPR:$lhs, i32imm:$rhs),
"cmp $lhs, $rhs",
[(ARMcmp GPR:$lhs, imm0_255:$rhs)]>;
def tCMPr : TI<(ops GPR:$lhs, GPR:$rhs),
"cmp $lhs, $rhs",
[(ARMcmp GPR:$lhs, GPR:$rhs)]>;
// TODO: A7-37: CMP(3) - cmp hi regs
def tEOR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"eor $dst, $rhs",
[(set GPR:$dst, (xor GPR:$lhs, GPR:$rhs))]>;
def tLSLri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
"lsl $dst, $lhs, $rhs",
[(set GPR:$dst, (shl GPR:$lhs, imm:$rhs))]>;
def tLSLrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"lsl $dst, $rhs",
[(set GPR:$dst, (shl GPR:$lhs, GPR:$rhs))]>;
def tLSRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
"lsr $dst, $lhs, $rhs",
[(set GPR:$dst, (srl GPR:$lhs, imm:$rhs))]>;
def tLSRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"lsr $dst, $rhs",
[(set GPR:$dst, (srl GPR:$lhs, GPR:$rhs))]>;
def tMOVri8 : TI<(ops GPR:$dst, i32imm:$src),
"mov $dst, $src",
[(set GPR:$dst, imm0_255:$src)]>;
// TODO: A7-73: MOV(2) - mov setting flag.
// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
// which is MOV(3). This also supports high registers.
def tMOVrr : TI<(ops GPR:$dst, GPR:$src),
"cpy $dst, $src", []>;
def tMUL : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"mul $dst, $rhs",
[(set GPR:$dst, (mul GPR:$lhs, GPR:$rhs))]>;
def tMVN : TI<(ops GPR:$dst, GPR:$src),
"mvn $dst, $src",
[(set GPR:$dst, (not GPR:$src))]>;
def tNEG : TI<(ops GPR:$dst, GPR:$src),
"neg $dst, $src",
[(set GPR:$dst, (ineg GPR:$src))]>;
def tORR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"orr $dst, $rhs",
[(set GPR:$dst, (or GPR:$lhs, GPR:$rhs))]>;
def tREV : TI<(ops GPR:$dst, GPR:$src),
"rev $dst, $src",
[(set GPR:$dst, (bswap GPR:$src))]>,
Requires<[IsThumb, HasV6]>;
def tREV16 : TI<(ops GPR:$dst, GPR:$src),
"rev16 $dst, $src",
[(set GPR:$dst,
(or (and (srl GPR:$src, 8), 0xFF),
(or (and (shl GPR:$src, 8), 0xFF00),
(or (and (srl GPR:$src, 8), 0xFF0000),
(and (shl GPR:$src, 8), 0xFF000000)))))]>,
Requires<[IsThumb, HasV6]>;
def tREVSH : TI<(ops GPR:$dst, GPR:$src),
"revsh $dst, $src",
[(set GPR:$dst,
(sext_inreg
(or (srl (and GPR:$src, 0xFFFF), 8),
(shl GPR:$src, 8)), i16))]>,
Requires<[IsThumb, HasV6]>;
def tROR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"ror $dst, $rhs",
[(set GPR:$dst, (rotr GPR:$lhs, GPR:$rhs))]>;
def tSBC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"sbc $dst, $rhs",
[(set GPR:$dst, (sube GPR:$lhs, GPR:$rhs))]>;
// TODO: A7-96: STMIA - store multiple.
def tSUBi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
"sub $dst, $lhs, $rhs",
[(set GPR:$dst, (add GPR:$lhs, imm0_7_neg:$rhs))]>;
def tSUBi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
"sub $dst, $rhs",
[(set GPR:$dst, (add GPR:$lhs, imm8_255_neg:$rhs))]>;
def tSUBrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
"sub $dst, $lhs, $rhs",
[(set GPR:$dst, (sub GPR:$lhs, GPR:$rhs))]>;
def tSUBspi : TI<(ops GPR:$sp, i32imm:$rhs),
"sub $sp, $rhs * 4", []>;
def tSXTB : TI<(ops GPR:$dst, GPR:$src),
"sxtb $dst, $src",
[(set GPR:$dst, (sext_inreg GPR:$src, i8))]>,
Requires<[IsThumb, HasV6]>;
def tSXTH : TI<(ops GPR:$dst, GPR:$src),
"sxth $dst, $src",
[(set GPR:$dst, (sext_inreg GPR:$src, i16))]>,
Requires<[IsThumb, HasV6]>;
// TODO: A7-122: TST - test.
def tUXTB : TI<(ops GPR:$dst, GPR:$src),
"uxtb $dst, $src",
[(set GPR:$dst, (and GPR:$src, 0xFF))]>,
Requires<[IsThumb, HasV6]>;
def tUXTH : TI<(ops GPR:$dst, GPR:$src),
"uxth $dst, $src",
[(set GPR:$dst, (and GPR:$src, 0xFFFF))]>,
Requires<[IsThumb, HasV6]>;
// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
// Expanded by the scheduler into a branch sequence.
let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler.
def tMOVCCr :
PseudoInst<(ops GPR:$dst, GPR:$false, GPR:$true, CCOp:$cc),
"@ tMOVCCr $cc",
[(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))]>;
// tLEApcrel - Load a pc-relative address into a register without offending the
// assembler.
def tLEApcrel : TI<(ops GPR:$dst, i32imm:$label),
!strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
"${:private}PCRELL${:uid}+4))\n"),
!strconcat("${:private}PCRELL${:uid}:\n\t",
"add $dst, pc, #PCRELV${:uid}")),
[]>;
def tLEApcrelCall : TI<(ops GPR:$dst, i32imm:$label),
!strconcat(!strconcat(".set PCRELV${:uid}, (${label:call}-(",
"${:private}PCRELL${:uid}+4))\n"),
!strconcat("${:private}PCRELL${:uid}:\n\t",
"add $dst, pc, #PCRELV${:uid}")),
[]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
// ConstantPool, GlobalAddress
def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
def : ThumbPat<(ARMWrapperCall tglobaladdr :$dst),
(tLEApcrelCall tglobaladdr :$dst)>;
def : ThumbPat<(ARMWrapperCall texternalsym:$dst),
(tLEApcrelCall texternalsym:$dst)>;
// Direct calls
def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
// Indirect calls to ARM routines
def : ThumbV5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>;
// zextload i1 -> zextload i8
def : ThumbPat<(zextloadi1 t_addrmode_ri5_1:$addr),
(tLDRBri t_addrmode_ri5_1:$addr)>;
def : ThumbPat<(zextloadi1 t_addrmode_rr:$addr),
(tLDRBri t_addrmode_rr:$addr)>;
// truncstore i1 -> truncstore i8
def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_ri5_1:$dst),
(tSTRBri GPR:$src, t_addrmode_ri5_1:$dst)>;
def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_rr:$dst),
(tSTRBrr GPR:$src, t_addrmode_rr:$dst)>;
// Large immediate handling.
// Two piece imms.
def : ThumbPat<(i32 thumb_immshifted:$src),
(tLSLri (tMOVri8 (thumb_immshifted_val imm:$src)),
(thumb_immshifted_shamt imm:$src))>;
def : ThumbPat<(i32 imm0_255_comp:$src),
(tMVN (tMOVri8 (imm_comp_XFORM imm:$src)))>;

View File

@ -0,0 +1,359 @@
//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the ARM VP instruction set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// ARM VFP Instruction templates.
//
// ARM Float Instruction
class ASI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
}
class ASI5<dag ops, string asm, list<dag> pattern>
: I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
}
// ARM Double Instruction
class ADI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
}
class ADI5<dag ops, string asm, list<dag> pattern>
: I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
}
def SDT_FTOI :
SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
def SDT_ITOF :
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
def SDT_CMPFP0 :
SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDT_FMDRR :
SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTRet, [SDNPInFlag,SDNPOutFlag]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutFlag]>;
def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
let isLoad = 1 in {
def FLDD : ADI5<(ops DPR:$dst, addrmode5:$addr),
"fldd $dst, $addr",
[(set DPR:$dst, (load addrmode5:$addr))]>;
def FLDS : ASI5<(ops SPR:$dst, addrmode5:$addr),
"flds $dst, $addr",
[(set SPR:$dst, (load addrmode5:$addr))]>;
} // isLoad
let isStore = 1 in {
def FSTD : ADI5<(ops DPR:$src, addrmode5:$addr),
"fstd $src, $addr",
[(store DPR:$src, addrmode5:$addr)]>;
def FSTS : ASI5<(ops SPR:$src, addrmode5:$addr),
"fsts $src, $addr",
[(store SPR:$src, addrmode5:$addr)]>;
} // isStore
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
let isLoad = 1 in {
def FLDMD : ADI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops),
"fldm${addr:submode}d ${addr:base}, $dst1",
[]>;
def FLDMS : ASI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops),
"fldm${addr:submode}s ${addr:base}, $dst1",
[]>;
} // isLoad
let isStore = 1 in {
def FSTMD : ADI5<(ops addrmode5:$addr, reglist:$src1, variable_ops),
"fstm${addr:submode}d ${addr:base}, $src1",
[]>;
def FSTMS : ASI5<(ops addrmode5:$addr, reglist:$src1, variable_ops),
"fstm${addr:submode}s ${addr:base}, $src1",
[]>;
} // isStore
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
//===----------------------------------------------------------------------===//
// FP Binary Operations.
//
def FADDD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
"faddd $dst, $a, $b",
[(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
def FADDS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
"fadds $dst, $a, $b",
[(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
def FCMPED : ADI<(ops DPR:$a, DPR:$b),
"fcmped $a, $b",
[(arm_cmpfp DPR:$a, DPR:$b)]>;
def FCMPES : ASI<(ops SPR:$a, SPR:$b),
"fcmpes $a, $b",
[(arm_cmpfp SPR:$a, SPR:$b)]>;
def FDIVD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
"fdivd $dst, $a, $b",
[(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
def FDIVS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
"fdivs $dst, $a, $b",
[(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
def FMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
"fmuld $dst, $a, $b",
[(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
def FMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
"fmuls $dst, $a, $b",
[(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
def FNMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
"fnmuld $dst, $a, $b",
[(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]>;
def FNMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
"fnmuls $dst, $a, $b",
[(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
def FSUBD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
"fsubd $dst, $a, $b",
[(set DPR:$dst, (fsub DPR:$a, DPR:$b))]>;
def FSUBS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
"fsubs $dst, $a, $b",
[(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
//===----------------------------------------------------------------------===//
// FP Unary Operations.
//
def FABSD : ADI<(ops DPR:$dst, DPR:$a),
"fabsd $dst, $a",
[(set DPR:$dst, (fabs DPR:$a))]>;
def FABSS : ASI<(ops SPR:$dst, SPR:$a),
"fabss $dst, $a",
[(set SPR:$dst, (fabs SPR:$a))]>;
def FCMPEZD : ADI<(ops DPR:$a),
"fcmpezd $a",
[(arm_cmpfp0 DPR:$a)]>;
def FCMPEZS : ASI<(ops SPR:$a),
"fcmpezs $a",
[(arm_cmpfp0 SPR:$a)]>;
def FCVTDS : ADI<(ops DPR:$dst, SPR:$a),
"fcvtds $dst, $a",
[(set DPR:$dst, (fextend SPR:$a))]>;
def FCVTSD : ADI<(ops SPR:$dst, DPR:$a),
"fcvtsd $dst, $a",
[(set SPR:$dst, (fround DPR:$a))]>;
def FCPYD : ADI<(ops DPR:$dst, DPR:$a),
"fcpyd $dst, $a",
[/*(set DPR:$dst, DPR:$a)*/]>;
def FCPYS : ASI<(ops SPR:$dst, SPR:$a),
"fcpys $dst, $a",
[/*(set SPR:$dst, SPR:$a)*/]>;
def FNEGD : ADI<(ops DPR:$dst, DPR:$a),
"fnegd $dst, $a",
[(set DPR:$dst, (fneg DPR:$a))]>;
def FNEGS : ASI<(ops SPR:$dst, SPR:$a),
"fnegs $dst, $a",
[(set SPR:$dst, (fneg SPR:$a))]>;
def FSQRTD : ADI<(ops DPR:$dst, DPR:$a),
"fsqrtd $dst, $a",
[(set DPR:$dst, (fsqrt DPR:$a))]>;
def FSQRTS : ASI<(ops SPR:$dst, SPR:$a),
"fsqrts $dst, $a",
[(set SPR:$dst, (fsqrt SPR:$a))]>;
//===----------------------------------------------------------------------===//
// FP <-> GPR Copies. Int <-> FP Conversions.
//
def IMPLICIT_DEF_SPR : PseudoInst<(ops SPR:$rD),
"@ IMPLICIT_DEF_SPR $rD",
[(set SPR:$rD, (undef))]>;
def IMPLICIT_DEF_DPR : PseudoInst<(ops DPR:$rD),
"@ IMPLICIT_DEF_DPR $rD",
[(set DPR:$rD, (undef))]>;
def FMRS : ASI<(ops GPR:$dst, SPR:$src),
"fmrs $dst, $src",
[(set GPR:$dst, (bitconvert SPR:$src))]>;
def FMSR : ASI<(ops SPR:$dst, GPR:$src),
"fmsr $dst, $src",
[(set SPR:$dst, (bitconvert GPR:$src))]>;
def FMRRD : ADI<(ops GPR:$dst1, GPR:$dst2, DPR:$src),
"fmrrd $dst1, $dst2, $src",
[/* FIXME: Can't write pattern for multiple result instr*/]>;
// FMDHR: GPR -> SPR
// FMDLR: GPR -> SPR
def FMDRR : ADI<(ops DPR:$dst, GPR:$src1, GPR:$src2),
"fmdrr $dst, $src1, $src2",
[(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
// FMRDH: SPR -> GPR
// FMRDL: SPR -> GPR
// FMRRS: SPR -> GPR
// FMRX : SPR system reg -> GPR
// FMSRR: GPR -> SPR
def FMSTAT : ASI<(ops), "fmstat", [(arm_fmstat)]>;
// FMXR: GPR -> VFP Sstem reg
// Int to FP:
def FSITOD : ADI<(ops DPR:$dst, SPR:$a),
"fsitod $dst, $a",
[(set DPR:$dst, (arm_sitof SPR:$a))]>;
def FSITOS : ASI<(ops SPR:$dst, SPR:$a),
"fsitos $dst, $a",
[(set SPR:$dst, (arm_sitof SPR:$a))]>;
def FUITOD : ADI<(ops DPR:$dst, SPR:$a),
"fuitod $dst, $a",
[(set DPR:$dst, (arm_uitof SPR:$a))]>;
def FUITOS : ASI<(ops SPR:$dst, SPR:$a),
"fuitos $dst, $a",
[(set SPR:$dst, (arm_uitof SPR:$a))]>;
// FP to Int:
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
def FTOSIZD : ADI<(ops SPR:$dst, DPR:$a),
"ftosizd $dst, $a",
[(set SPR:$dst, (arm_ftosi DPR:$a))]>;
def FTOSIZS : ASI<(ops SPR:$dst, SPR:$a),
"ftosizs $dst, $a",
[(set SPR:$dst, (arm_ftosi SPR:$a))]>;
def FTOUIZD : ADI<(ops SPR:$dst, DPR:$a),
"ftouizd $dst, $a",
[(set SPR:$dst, (arm_ftoui DPR:$a))]>;
def FTOUIZS : ASI<(ops SPR:$dst, SPR:$a),
"ftouizs $dst, $a",
[(set SPR:$dst, (arm_ftoui SPR:$a))]>;
//===----------------------------------------------------------------------===//
// FP FMA Operations.
//
def FMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
"fmacd $dst, $a, $b",
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
def FMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
"fmacs $dst, $a, $b",
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
def FMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
"fmscd $dst, $a, $b",
[(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
def FMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
"fmscs $dst, $a, $b",
[(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
def FNMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
"fnmacd $dst, $a, $b",
[(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
def FNMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
"fnmacs $dst, $a, $b",
[(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
def FNMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
"fnmscd $dst, $a, $b",
[(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
def FNMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
"fnmscs $dst, $a, $b",
[(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
//
def FCPYDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc),
"fcpyd$cc $dst, $true",
[(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))]>,
RegConstraint<"$false = $dst">;
def FCPYScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc),
"fcpys$cc $dst, $true",
[(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))]>,
RegConstraint<"$false = $dst">;
def FNEGDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc),
"fnegd$cc $dst, $true",
[(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))]>,
RegConstraint<"$false = $dst">;
def FNEGScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc),
"fnegs$cc $dst, $true",
[(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))]>,
RegConstraint<"$false = $dst">;

View File

@ -0,0 +1,628 @@
//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Evan Cheng and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that performs load / store related peephole
// optimizations. This pass should be run after register allocation.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMRegisterInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
namespace {
struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
const TargetInstrInfo *TII;
virtual bool runOnMachineFunction(MachineFunction &Fn);
virtual const char *getPassName() const {
return "ARM load / store optimization pass";
}
private:
struct MemOpQueueEntry {
int Offset;
unsigned Position;
MachineBasicBlock::iterator MBBI;
bool Merged;
MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
: Offset(o), Position(p), MBBI(i), Merged(false) {};
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
SmallVector<MachineBasicBlock::iterator, 4>
MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
int Opcode, unsigned Size, MemOpQueue &MemOps);
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
}
/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
/// optimization pass.
FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
return new ARMLoadStoreOpt();
}
static int getLoadStoreMultipleOpcode(int Opcode) {
switch (Opcode) {
case ARM::LDR:
NumLDMGened++;
return ARM::LDM;
case ARM::STR:
NumSTMGened++;
return ARM::STM;
case ARM::FLDS:
NumFLDMGened++;
return ARM::FLDMS;
case ARM::FSTS:
NumFSTMGened++;
return ARM::FSTMS;
case ARM::FLDD:
NumFLDMGened++;
return ARM::FLDMD;
case ARM::FSTD:
NumFSTMGened++;
return ARM::FSTMD;
default: abort();
}
return 0;
}
/// mergeOps - Create and insert a LDM or STM with Base as base register and
/// registers in Regs as the register operands that would be loaded / stored.
/// It returns true if the transformation is done.
static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, int Opcode,
SmallVector<unsigned, 8> &Regs,
const TargetInstrInfo *TII) {
// Only a single register to load / store. Don't bother.
unsigned NumRegs = Regs.size();
if (NumRegs <= 1)
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
if (isAM4 && Offset == 4)
Mode = ARM_AM::ib;
else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
Mode = ARM_AM::da;
else if (isAM4 && Offset == -4 * (int)NumRegs)
Mode = ARM_AM::db;
else if (Offset != 0) {
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
if (NumRegs <= 2)
return false;
unsigned NewBase;
if (Opcode == ARM::LDR)
// If it is a load, then just use one of the destination register to
// use as the new base.
NewBase = Regs[NumRegs-1];
else {
// FIXME: Try scavenging a register to use as a new base.
NewBase = ARM::R12;
}
int BaseOpc = ARM::ADDri;
if (Offset < 0) {
BaseOpc = ARM::SUBri;
Offset = - Offset;
}
int ImmedOffset = ARM_AM::getSOImmVal(Offset);
if (ImmedOffset == -1)
return false; // Probably not worth it then.
BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase).addReg(Base).addImm(ImmedOffset);
Base = NewBase;
}
bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
Opcode = getLoadStoreMultipleOpcode(Opcode);
MachineInstrBuilder MIB = (isAM4)
? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base)
.addImm(ARM_AM::getAM4ModeImm(Mode))
: BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base)
.addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs));
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i], Opcode == isDef);
return true;
}
SmallVector<MachineBasicBlock::iterator, 4>
ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB,
unsigned SIndex, unsigned Base, int Opcode,
unsigned Size, MemOpQueue &MemOps) {
bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
SmallVector<MachineBasicBlock::iterator, 4> Merges;
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
unsigned Pos = MemOps[SIndex].Position;
MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
SmallVector<unsigned, 8> Regs;
unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
Regs.push_back(PReg);
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
int NewOffset = MemOps[i].Offset;
unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
// AM4 - register numbers in ascending order.
// AM5 - consecutive register numbers in ascending order.
if (NewOffset == Offset + (int)Size &&
((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
Offset += Size;
Regs.push_back(Reg);
PRegNum = RegNum;
} else {
// Can't merge this in. Try merge the earlier ones first.
if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) {
Merges.push_back(prior(Loc));
for (unsigned j = SIndex; j < i; ++j) {
MBB.erase(MemOps[j].MBBI);
MemOps[j].Merged = true;
}
}
SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
MergeLDR_STR(MBB, i, Base, Opcode, Size, MemOps);
Merges.append(Merges2.begin(), Merges2.end());
return Merges;
}
if (MemOps[i].Position > Pos) {
Pos = MemOps[i].Position;
Loc = MemOps[i].MBBI;
}
}
if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) {
Merges.push_back(prior(Loc));
for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
MBB.erase(MemOps[i].MBBI);
MemOps[i].Merged = true;
}
}
return Merges;
}
static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
unsigned Bytes) {
return (MI && MI->getOpcode() == ARM::SUBri &&
MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes);
}
static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
unsigned Bytes) {
return (MI && MI->getOpcode() == ARM::ADDri &&
MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes);
}
static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return 0;
case ARM::LDR:
case ARM::STR:
case ARM::FLDS:
case ARM::FSTS:
return 4;
case ARM::FLDD:
case ARM::FSTD:
return 8;
case ARM::LDM:
case ARM::STM:
return (MI->getNumOperands() - 2) * 4;
case ARM::FLDMS:
case ARM::FSTMS:
case ARM::FLDMD:
case ARM::FSTMD:
return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
}
}
/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
///
/// stmia rn, <ra, rb, rc>
/// rn := rn + 4 * 3;
/// =>
/// stmia rn!, <ra, rb, rc>
///
/// rn := rn - 4 * 3;
/// ldmia rn, <ra, rb, rc>
/// =>
/// ldmdb rn!, <ra, rb, rc>
static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(0).getReg();
unsigned Bytes = getLSMultipleTransferSize(MI);
int Opcode = MI->getOpcode();
bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
if (isAM4) {
if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
return false;
// Can't use the updating AM4 sub-mode if the base register is also a dest
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).getReg() == Base)
return false;
}
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
if (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
if (Mode == ARM_AM::ia &&
isMatchingDecrement(PrevMBBI, Base, Bytes)) {
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
MBB.erase(PrevMBBI);
return true;
} else if (Mode == ARM_AM::ib &&
isMatchingDecrement(PrevMBBI, Base, Bytes)) {
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
MBB.erase(PrevMBBI);
return true;
}
}
if (MBBI != MBB.end()) {
MachineBasicBlock::iterator NextMBBI = next(MBBI);
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes)) {
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
MBB.erase(NextMBBI);
return true;
} else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
isMatchingDecrement(NextMBBI, Base, Bytes)) {
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
MBB.erase(NextMBBI);
return true;
}
}
} else {
// FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
return false;
ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
if (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
if (Mode == ARM_AM::ia &&
isMatchingDecrement(PrevMBBI, Base, Bytes)) {
MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
MBB.erase(PrevMBBI);
return true;
}
}
if (MBBI != MBB.end()) {
MachineBasicBlock::iterator NextMBBI = next(MBBI);
if (Mode == ARM_AM::ia &&
isMatchingIncrement(NextMBBI, Base, Bytes)) {
MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
MBB.erase(NextMBBI);
}
return true;
}
}
return false;
}
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
case ARM::LDR: return ARM::LDR_PRE;
case ARM::STR: return ARM::STR_PRE;
case ARM::FLDS: return ARM::FLDMS;
case ARM::FLDD: return ARM::FLDMD;
case ARM::FSTS: return ARM::FSTMS;
case ARM::FSTD: return ARM::FSTMD;
default: abort();
}
return 0;
}
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
case ARM::LDR: return ARM::LDR_POST;
case ARM::STR: return ARM::STR_POST;
case ARM::FLDS: return ARM::FLDMS;
case ARM::FLDD: return ARM::FLDMD;
case ARM::FSTS: return ARM::FSTMS;
case ARM::FSTD: return ARM::FSTMD;
default: abort();
}
return 0;
}
/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const TargetInstrInfo *TII) {
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(1).getReg();
unsigned Bytes = getLSMultipleTransferSize(MI);
int Opcode = MI->getOpcode();
bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
(!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
return false;
bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (isLd && MI->getOperand(0).getReg() == Base)
return false;
bool DoMerge = false;
ARM_AM::AddrOpc AddSub = ARM_AM::add;
unsigned NewOpc = 0;
if (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
if (isMatchingDecrement(PrevMBBI, Base, Bytes)) {
DoMerge = true;
AddSub = ARM_AM::sub;
NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
} else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes)) {
DoMerge = true;
NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
}
if (DoMerge)
MBB.erase(PrevMBBI);
}
if (!DoMerge && MBBI != MBB.end()) {
MachineBasicBlock::iterator NextMBBI = next(MBBI);
if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes)) {
DoMerge = true;
AddSub = ARM_AM::sub;
NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
} else if (isMatchingIncrement(NextMBBI, Base, Bytes)) {
DoMerge = true;
NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
}
if (DoMerge)
MBB.erase(NextMBBI);
}
if (!DoMerge)
return false;
bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
: ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
true, isDPR ? 2 : 1);
if (isLd) {
if (isAM2)
BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, true).addReg(Base).addReg(0).addImm(Offset);
else
BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base)
.addImm(Offset).addReg(MI->getOperand(0).getReg(), true);
} else {
if (isAM2)
BuildMI(MBB, MBBI, TII->get(NewOpc), Base).addReg(MI->getOperand(0).getReg())
.addReg(Base).addReg(0).addImm(Offset);
else
BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base)
.addImm(Offset).addReg(MI->getOperand(0).getReg(), false);
}
MBB.erase(MBBI);
return true;
}
/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
/// ops of the same base and incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
unsigned NumMerges = 0;
unsigned NumMemOps = 0;
MemOpQueue MemOps;
unsigned CurrBase = 0;
int CurrOpc = -1;
unsigned CurrSize = 0;
unsigned Position = 0;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
bool Advance = false;
bool TryMerge = false;
bool Clobber = false;
int Opcode = MBBI->getOpcode();
bool isMemOp = false;
bool isAM2 = false;
unsigned Size = 4;
switch (Opcode) {
case ARM::LDR:
case ARM::STR:
isMemOp =
(MBBI->getOperand(1).isRegister() && MBBI->getOperand(2).getReg() == 0);
isAM2 = true;
break;
case ARM::FLDS:
case ARM::FSTS:
isMemOp = MBBI->getOperand(1).isRegister();
break;
case ARM::FLDD:
case ARM::FSTD:
isMemOp = MBBI->getOperand(1).isRegister();
Size = 8;
break;
}
if (isMemOp) {
unsigned Base = MBBI->getOperand(1).getReg();
unsigned OffIdx = MBBI->getNumOperands()-1;
unsigned OffField = MBBI->getOperand(OffIdx).getImm();
int Offset = isAM2
? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
if (isAM2) {
if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
Offset = -Offset;
} else {
if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
Offset = -Offset;
}
// Watch out for:
// r4 := ldr [r5]
// r5 := ldr [r5, #4]
// r6 := ldr [r5, #8]
//
// The second ldr has effectively broken the chain even though it
// looks like the later ldr(s) use the same base register. Try to
// merge the ldr's so far, including this one. But don't try to
// combine the following ldr(s).
Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
if (CurrBase == 0 && !Clobber) {
// Start of a new chain.
CurrBase = Base;
CurrOpc = Opcode;
CurrSize = Size;
MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
NumMemOps++;
Advance = true;
} else {
if (Clobber) {
TryMerge = true;
Advance = true;
}
if (CurrOpc == Opcode && CurrBase == Base) {
// Continue adding to the queue.
if (Offset > MemOps.back().Offset) {
MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
NumMemOps++;
Advance = true;
} else {
for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
I != E; ++I) {
if (Offset < I->Offset) {
MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
NumMemOps++;
Advance = true;
break;
} else if (Offset == I->Offset) {
// Collision! This can't be merged!
break;
}
}
}
}
}
}
if (Advance) {
++Position;
++MBBI;
} else
TryMerge = true;
if (TryMerge) {
if (NumMemOps > 1) {
SmallVector<MachineBasicBlock::iterator,4> MBBII =
MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,MemOps);
// Try folding preceeding/trailing base inc/dec into the generated
// LDM/STM ops.
for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
if (mergeBaseUpdateLSMultiple(MBB, MBBII[i]))
NumMerges++;
NumMerges += MBBII.size();
}
// Try folding preceeding/trailing base inc/dec into those load/store
// that were not merged to form LDM/STM ops.
for (unsigned i = 0; i != NumMemOps; ++i)
if (!MemOps[i].Merged)
if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII))
NumMerges++;
CurrBase = 0;
CurrOpc = -1;
if (NumMemOps) {
MemOps.clear();
NumMemOps = 0;
}
// If iterator hasn't been advanced and this is not a memory op, skip it.
// It can't start a new chain anyway.
if (!Advance && !isMemOp && MBBI != E) {
++Position;
++MBBI;
}
}
}
return NumMerges > 0;
}
/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
/// (bx lr) into the preceeding stack restore so it directly restore the value
/// of LR into pc.
/// ldmfd sp!, {r7, lr}
/// bx lr
/// =>
/// ldmfd sp!, {r7, pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = prior(MBB.end());
if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
MachineInstr *PrevMI = prior(MBBI);
if (PrevMI->getOpcode() == ARM::LDM) {
MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
if (MO.getReg() == ARM::LR) {
PrevMI->setInstrDescriptor(TII->get(ARM::LDM_RET));
MO.setReg(ARM::PC);
MBB.erase(MBBI);
return true;
}
}
}
return false;
}
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
TII = Fn.getTarget().getInstrInfo();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock &MBB = *MFI;
Modified |= LoadStoreMultipleOpti(MBB);
Modified |= MergeReturnIntoLDM(MBB);
}
return Modified;
}

View File

@ -0,0 +1,136 @@
//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the Evan Cheng and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares ARM-specific per-machine-function information.
//
//===----------------------------------------------------------------------===//
#ifndef ARMMACHINEFUNCTIONINFO_H
#define ARMMACHINEFUNCTIONINFO_H
#include "ARMSubtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
/// ARMFunctionInfo - This class is derived from MachineFunction private
/// ARM target-specific information for each MachineFunction.
class ARMFunctionInfo : public MachineFunctionInfo {
/// isThumb - True if this function is compiled under Thumb mode.
///
bool isThumb;
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
unsigned VarArgsRegSaveSize;
/// FramePtrSpilled - True if FP register is spilled. Set by
/// processFunctionBeforeCalleeSavedScan().
bool FramePtrSpilled;
/// FramePtrSpillOffset - If FramePtrSpilled, this records the frame pointer
/// spill stack offset.
unsigned FramePtrSpillOffset;
/// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
/// register spills areas. For Mac OS X:
///
/// GPR callee-saved (1) : r4, r5, r6, r7, lr
/// --------------------------------------------
/// GPR callee-saved (2) : r8, r10, r11
/// --------------------------------------------
/// DPR callee-saved : d8 - d15
unsigned GPRCS1Offset;
unsigned GPRCS2Offset;
unsigned DPRCSOffset;
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
unsigned GPRCS1Size;
unsigned GPRCS2Size;
unsigned DPRCSSize;
/// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
/// which belong to these spill areas.
std::set<int> GPRCS1Frames;
std::set<int> GPRCS2Frames;
std::set<int> DPRCSFrames;
/// JumpTableUId - Unique id for jumptables.
///
unsigned JumpTableUId;
public:
ARMFunctionInfo() :
isThumb(false),
VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0),
GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {}
ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0),
GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {}
bool isThumbFunction() const { return isThumb; }
unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
bool isFramePtrSpilled() const { return FramePtrSpilled; }
void setFramePtrSpilled(bool s) { FramePtrSpilled = s; }
unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
bool isGPRCalleeSavedArea1Frame(unsigned fi) const {
return GPRCS1Frames.count(fi);
}
bool isGPRCalleeSavedArea2Frame(unsigned fi) const {
return GPRCS2Frames.count(fi);
}
bool isDPRCalleeSavedAreaFrame(unsigned fi) const {
return DPRCSFrames.count(fi);
}
void addGPRCalleeSavedArea1Frame(unsigned fi) {
GPRCS1Frames.insert(fi);
}
void addGPRCalleeSavedArea2Frame(unsigned fi) {
GPRCS2Frames.insert(fi);
}
void addDPRCalleeSavedAreaFrame(unsigned fi) {
DPRCSFrames.insert(fi);
}
unsigned createJumpTableUId() {
return JumpTableUId++;
}
};
} // End llvm namespace
#endif // ARMMACHINEFUNCTIONINFO_H

View File

@ -1,75 +0,0 @@
//===-- ARMMul.cpp - Define TargetMachine for A5CRM -----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the "Instituto Nokia de Tecnologia" and
// is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Modify the ARM multiplication instructions so that Rd{Hi,Lo} and Rm are distinct
//
//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Compiler.h"
using namespace llvm;
namespace {
class VISIBILITY_HIDDEN FixMul : public MachineFunctionPass {
virtual bool runOnMachineFunction(MachineFunction &MF);
};
}
FunctionPass *llvm::createARMFixMulPass() { return new FixMul(); }
bool FixMul::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
BB != E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr *MI = I;
int Op = MI->getOpcode();
if (Op == ARM::MUL ||
Op == ARM::SMULL ||
Op == ARM::UMULL) {
MachineOperand &RdOp = MI->getOperand(0);
MachineOperand &RmOp = MI->getOperand(1);
MachineOperand &RsOp = MI->getOperand(2);
unsigned Rd = RdOp.getReg();
unsigned Rm = RmOp.getReg();
unsigned Rs = RsOp.getReg();
if (Rd == Rm) {
Changed = true;
if (Rd != Rs) {
//Rd and Rm must be distinct, but Rd can be equal to Rs.
//Swap Rs and Rm
RmOp.setReg(Rs);
RsOp.setReg(Rm);
} else {
unsigned scratch = Op == ARM::MUL ? ARM::R12 : ARM::R0;
BuildMI(MBB, I, MF.getTarget().getInstrInfo()->get(ARM::MOV),
scratch).addReg(Rm).addImm(0).addImm(ARMShift::LSL);
RmOp.setReg(scratch);
}
}
}
}
}
return Changed;
}

File diff suppressed because it is too large Load Diff

View File

@ -17,18 +17,36 @@
#include "llvm/Target/MRegisterInfo.h"
#include "ARMGenRegisterInfo.h.inc"
#include <set>
namespace llvm {
class Type;
class TargetInstrInfo;
class TargetInstrInfo;
class ARMSubtarget;
class Type;
struct ARMRegisterInfo : public ARMGenRegisterInfo {
const TargetInstrInfo &TII;
const ARMSubtarget &STI;
private:
/// FramePtr - ARM physical register used as frame ptr.
unsigned FramePtr;
ARMRegisterInfo(const TargetInstrInfo &tii);
public:
ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
/// getRegisterNumbering - Given the enum value for some register, e.g.
/// ARM::LR, return the number that it corresponds to (e.g. 14).
static unsigned getRegisterNumbering(unsigned RegEnum);
/// Code Generation virtual methods...
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI) const;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, int FrameIndex,
@ -43,9 +61,8 @@ struct ARMRegisterInfo : public ARMGenRegisterInfo {
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *RC) const;
virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
unsigned OpNum,
int FrameIndex) const;
MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
int FrameIndex) const;
const unsigned *getCalleeSavedRegs() const;
@ -57,7 +74,7 @@ struct ARMRegisterInfo : public ARMGenRegisterInfo {
void eliminateFrameIndex(MachineBasicBlock::iterator II) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;

View File

@ -1,4 +1,4 @@
//===- ARMRegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -13,129 +13,169 @@
//===----------------------------------------------------------------------===//
// Registers are identified with 4-bit ID numbers.
class ARMReg<string n> : Register<n> {
let Namespace = "ARM";
}
// Ri - 32-bit integer registers
class Ri<bits<4> num, string n> : ARMReg<n> {
class ARMReg<bits<4> num, string n, list<Register> aliases = []> : Register<n> {
field bits<4> Num;
let Num = num;
}
// Rf - 32-bit floating-point registers
class Rf<bits<5> num, string n> : ARMReg<n> {
field bits<5> Num;
let Num = num;
}
// Rd - Slots in the FP register file for 64-bit floating-point values.
class Rd<bits<5> num, string n, list<Register> aliases> : ARMReg<n> {
field bits<5> Num;
let Num = num;
let Namespace = "ARM";
let Aliases = aliases;
}
class ARMFReg<bits<5> num, string n> : Register<n> {
field bits<5> Num;
let Namespace = "ARM";
}
// Integer registers
def R0 : Ri< 0, "R0">, DwarfRegNum<0>;
def R1 : Ri< 1, "R1">, DwarfRegNum<1>;
def R2 : Ri< 2, "R2">, DwarfRegNum<2>;
def R3 : Ri< 3, "R3">, DwarfRegNum<3>;
def R4 : Ri< 4, "R4">, DwarfRegNum<4>;
def R5 : Ri< 5, "R5">, DwarfRegNum<5>;
def R6 : Ri< 6, "R6">, DwarfRegNum<6>;
def R7 : Ri< 7, "R7">, DwarfRegNum<7>;
def R8 : Ri< 8, "R8">, DwarfRegNum<8>;
def R9 : Ri< 9, "R9">, DwarfRegNum<9>;
def R10 : Ri<10, "R10">, DwarfRegNum<10>;
def R11 : Ri<11, "R11">, DwarfRegNum<11>;
def R12 : Ri<12, "R12">, DwarfRegNum<12>;
def R13 : Ri<13, "R13">, DwarfRegNum<13>;
def R14 : Ri<14, "R14">, DwarfRegNum<14>;
def R15 : Ri<15, "R15">, DwarfRegNum<15>;
def R0 : ARMReg< 0, "r0">, DwarfRegNum<0>;
def R1 : ARMReg< 1, "r1">, DwarfRegNum<1>;
def R2 : ARMReg< 2, "r2">, DwarfRegNum<2>;
def R3 : ARMReg< 3, "r3">, DwarfRegNum<3>;
def R4 : ARMReg< 4, "r4">, DwarfRegNum<4>;
def R5 : ARMReg< 5, "r5">, DwarfRegNum<5>;
def R6 : ARMReg< 6, "r6">, DwarfRegNum<6>;
def R7 : ARMReg< 7, "r7">, DwarfRegNum<7>;
def R8 : ARMReg< 8, "r8">, DwarfRegNum<8>;
def R9 : ARMReg< 9, "r9">, DwarfRegNum<9>;
def R10 : ARMReg<10, "r10">, DwarfRegNum<10>;
def R11 : ARMReg<11, "r11">, DwarfRegNum<11>;
def R12 : ARMReg<12, "r12">, DwarfRegNum<12>;
def SP : ARMReg<13, "sp">, DwarfRegNum<13>;
def LR : ARMReg<14, "lr">, DwarfRegNum<14>;
def PC : ARMReg<15, "pc">, DwarfRegNum<15>;
// TODO: update to VFP-v3
// Floating-point registers
def S0 : Rf< 0, "S0">, DwarfRegNum<64>;
def S1 : Rf< 1, "S1">, DwarfRegNum<65>;
def S2 : Rf< 2, "S2">, DwarfRegNum<66>;
def S3 : Rf< 3, "S3">, DwarfRegNum<67>;
def S4 : Rf< 4, "S4">, DwarfRegNum<68>;
def S5 : Rf< 5, "S5">, DwarfRegNum<69>;
def S6 : Rf< 6, "S6">, DwarfRegNum<70>;
def S7 : Rf< 7, "S7">, DwarfRegNum<71>;
def S8 : Rf< 8, "S8">, DwarfRegNum<72>;
def S9 : Rf< 9, "S9">, DwarfRegNum<73>;
def S10 : Rf<10, "S10">, DwarfRegNum<74>;
def S11 : Rf<11, "S11">, DwarfRegNum<75>;
def S12 : Rf<12, "S12">, DwarfRegNum<76>;
def S13 : Rf<13, "S13">, DwarfRegNum<77>;
def S14 : Rf<14, "S14">, DwarfRegNum<78>;
def S15 : Rf<15, "S15">, DwarfRegNum<79>;
def S16 : Rf<16, "S16">, DwarfRegNum<80>;
def S17 : Rf<17, "S17">, DwarfRegNum<81>;
def S18 : Rf<18, "S18">, DwarfRegNum<82>;
def S19 : Rf<19, "S19">, DwarfRegNum<83>;
def S20 : Rf<20, "S20">, DwarfRegNum<84>;
def S21 : Rf<21, "S21">, DwarfRegNum<85>;
def S22 : Rf<22, "S22">, DwarfRegNum<86>;
def S23 : Rf<23, "S23">, DwarfRegNum<87>;
def S24 : Rf<24, "S24">, DwarfRegNum<88>;
def S25 : Rf<25, "S25">, DwarfRegNum<89>;
def S26 : Rf<26, "S26">, DwarfRegNum<90>;
def S27 : Rf<27, "S27">, DwarfRegNum<91>;
def S28 : Rf<28, "S28">, DwarfRegNum<92>;
def S29 : Rf<29, "S29">, DwarfRegNum<93>;
def S30 : Rf<30, "S30">, DwarfRegNum<94>;
def S31 : Rf<31, "S31">, DwarfRegNum<95>;
// Float registers
def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
// Aliases of the S* registers used to hold 64-bit fp values (doubles)
def D0 : Rd< 0, "D0", [S0, S1]>, DwarfRegNum<64>;
def D1 : Rd< 2, "D1", [S2, S3]>, DwarfRegNum<66>;
def D2 : Rd< 4, "D2", [S4, S5]>, DwarfRegNum<68>;
def D3 : Rd< 6, "D3", [S6, S7]>, DwarfRegNum<70>;
def D4 : Rd< 8, "D4", [S8, S9]>, DwarfRegNum<72>;
def D5 : Rd<10, "D5", [S10, S11]>, DwarfRegNum<74>;
def D6 : Rd<12, "D6", [S12, S13]>, DwarfRegNum<76>;
def D7 : Rd<14, "D7", [S14, S15]>, DwarfRegNum<78>;
def D8 : Rd<16, "D8", [S16, S17]>, DwarfRegNum<80>;
def D9 : Rd<18, "D9", [S18, S19]>, DwarfRegNum<82>;
def D10 : Rd<20, "D10", [S20, S21]>, DwarfRegNum<84>;
def D11 : Rd<22, "D11", [S22, S23]>, DwarfRegNum<86>;
def D12 : Rd<24, "D12", [S24, S25]>, DwarfRegNum<88>;
def D13 : Rd<26, "D13", [S26, S27]>, DwarfRegNum<90>;
def D14 : Rd<28, "D14", [S28, S29]>, DwarfRegNum<92>;
def D15 : Rd<30, "D15", [S30, S31]>, DwarfRegNum<94>;
// Aliases of the F* registers used to hold 64-bit fp values (doubles)
def D0 : ARMReg< 0, "d0", [S0, S1]>;
def D1 : ARMReg< 1, "d1", [S2, S3]>;
def D2 : ARMReg< 2, "d2", [S4, S5]>;
def D3 : ARMReg< 3, "d3", [S6, S7]>;
def D4 : ARMReg< 4, "d4", [S8, S9]>;
def D5 : ARMReg< 5, "d5", [S10, S11]>;
def D6 : ARMReg< 6, "d6", [S12, S13]>;
def D7 : ARMReg< 7, "d7", [S14, S15]>;
def D8 : ARMReg< 8, "d8", [S16, S17]>;
def D9 : ARMReg< 9, "d9", [S18, S19]>;
def D10 : ARMReg<10, "d10", [S20, S21]>;
def D11 : ARMReg<11, "d11", [S22, S23]>;
def D12 : ARMReg<12, "d12", [S24, S25]>;
def D13 : ARMReg<13, "d13", [S26, S27]>;
def D14 : ARMReg<14, "d14", [S28, S29]>;
def D15 : ARMReg<15, "d15", [S30, S31]>;
// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
// allocation order...
// pc == Program Counter
// lr == Link Register
// sp == Stack Pointer
// r12 == ip (scratch)
// r7 == Frame Pointer (thumb-style backtraces)
// r11 == Frame Pointer (arm-style backtraces)
// r10 == Stack Limit
//
def IntRegs : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
R7, R8, R9, R10, R11, R12,
R13, R14, R15]> {
def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
R7, R8, R9, R10, R12, R11,
LR, SP, PC]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
// FIXME: We are reserving r12 in case the PEI needs to use it to
// generate large stack offset. Make it available once we have register
// scavenging.
let MethodBodies = [{
IntRegsClass::iterator
IntRegsClass::allocation_order_end(const MachineFunction &MF) const {
// r15 == Program Counter
// r14 == Link Register
// r13 == Stack Pointer
// r12 == ip (scratch)
// r11 == Frame Pointer
// r10 == Stack Limit
if (hasFP(MF))
return end() - 5;
else
return end() - 4;
// FP is R11, R9 is available.
static const unsigned ARM_GPR_AO_1[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
ARM::R8, ARM::R9, ARM::R10,
ARM::LR, ARM::R11 };
// FP is R11, R9 is not available.
static const unsigned ARM_GPR_AO_2[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
ARM::R8, ARM::R10,
ARM::LR, ARM::R11 };
// FP is R7, R9 is available.
static const unsigned ARM_GPR_AO_3[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R8,
ARM::R9, ARM::R10,ARM::R11,
ARM::LR, ARM::R7 };
// FP is R7, R9 is not available.
static const unsigned ARM_GPR_AO_4[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R8,
ARM::R10,ARM::R11,
ARM::LR, ARM::R7 };
// FP is R7, only low registers available.
static const unsigned THUMB_GPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2,
ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
GPRClass::iterator
GPRClass::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb())
return THUMB_GPR_AO;
if (Subtarget.useThumbBacktraces()) {
if (Subtarget.isR9Reserved())
return ARM_GPR_AO_4;
else
return ARM_GPR_AO_3;
} else {
if (Subtarget.isR9Reserved())
return ARM_GPR_AO_2;
else
return ARM_GPR_AO_1;
}
}
GPRClass::iterator
GPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
GPRClass::iterator I;
if (Subtarget.isThumb())
I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
else if (Subtarget.useThumbBacktraces()) {
if (Subtarget.isR9Reserved())
I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
else
I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
} else {
if (Subtarget.isR9Reserved())
I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
else
I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
}
return hasFP(MF) ? I-1 : I;
}
}];
}
def FPRegs : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
def DFPRegs : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
D8, D9, D10, D11, D12, D13, D14, D15]>;
// ARM requires only word alignment for double. It's more performant if it
// is double-word alignment though.
def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
D9, D10, D11, D12, D13, D14, D15]>;

View File

@ -0,0 +1,52 @@
//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Evan Cheng and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the ARM specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#include "ARMSubtarget.h"
#include "ARMGenSubtarget.inc"
#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
// FIXME: this is temporary.
static cl::opt<bool> Thumb("enable-thumb",
cl::desc("Switch to thumb mode in ARM backend"));
ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS)
: ARMArchVersion(V4T), HasVFP2(false), IsDarwin(false),
UseThumbBacktraces(false), IsR9Reserved(false), stackAlignment(8) {
// Determine default and user specified characteristics
std::string CPU = "generic";
// Parse features string.
ParseSubtargetFeatures(FS, CPU);
IsThumb = Thumb;
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
const std::string& TT = M.getTargetTriple();
if (TT.length() > 5) {
IsDarwin = TT.find("-darwin") != std::string::npos;
} else if (TT.empty()) {
#if defined(__APPLE__)
IsDarwin = true;
#endif
}
if (IsDarwin) {
UseThumbBacktraces = true;
IsR9Reserved = true;
stackAlignment = 4;
}
}

View File

@ -0,0 +1,82 @@
//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Evan Cheng and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the ARM specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#ifndef ARMSUBTARGET_H
#define ARMSUBTARGET_H
#include "llvm/Target/TargetSubtarget.h"
#include <string>
namespace llvm {
class Module;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
V4T, V5T, V5TE, V6
};
/// ARMArchVersion - ARM architecture vecrsion: V4T (base), V5T, V5TE,
/// and V6.
ARMArchEnum ARMArchVersion;
/// HasVFP2 - True if the processor supports Vector Floating Point (VFP) V2
/// instructions.
bool HasVFP2;
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
bool IsThumb;
bool IsDarwin;
/// UseThumbBacktraces - True if we use thumb style backtraces.
bool UseThumbBacktraces;
/// IsR9Reserved - True if R9 is a not available as general purpose register.
bool IsR9Reserved;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
public:
/// This constructor initializes the data members to match that
/// of the specified module.
///
ARMSubtarget(const Module &M, const std::string &FS);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
bool hasV4TOps() const { return ARMArchVersion >= V4T; }
bool hasV5TOps() const { return ARMArchVersion >= V5T; }
bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
bool hasV6Ops() const { return ARMArchVersion >= V6; }
bool hasVFP2() const { return HasVFP2; }
bool isDarwin() const { return IsDarwin; }
bool isThumb() const { return IsThumb; }
bool useThumbBacktraces() const { return UseThumbBacktraces; }
bool isR9Reserved() const { return IsR9Reserved; }
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
unsigned getStackAlignment() const { return stackAlignment; }
};
} // End llvm namespace
#endif // ARMSUBTARGET_H

View File

@ -12,18 +12,50 @@
//===----------------------------------------------------------------------===//
#include "ARMTargetAsmInfo.h"
#include "ARMTargetMachine.h"
using namespace llvm;
ARMTargetAsmInfo::ARMTargetAsmInfo(const ARMTargetMachine &TM) {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
if (Subtarget->isDarwin()) {
HasDotTypeDotSizeDirective = false;
PrivateGlobalPrefix = "L";
GlobalPrefix = "_";
ZeroDirective = "\t.space\t";
SetDirective = "\t.set";
WeakRefDirective = "\t.weak_reference\t";
JumpTableDataSection = ".const";
CStringSection = "\t.cstring";
StaticCtorsSection = ".mod_init_func";
StaticDtorsSection = ".mod_term_func";
InlineAsmStart = "@ InlineAsm Start";
InlineAsmEnd = "@ InlineAsm End";
LCOMMDirective = "\t.lcomm\t";
COMMDirectiveTakesAlignment = false;
NeedsSet = true;
DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
} else {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
ZeroDirective = "\t.skip\t";
WeakRefDirective = "\t.weak\t";
StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
}
AlignmentIsInBytes = false;
Data64bitsDirective = 0;
ZeroDirective = "\t.skip\t";
CommentString = "@";
DataSection = "\t.data";
ConstantPoolSection = "\t.text\n";
AlignmentIsInBytes = false;
WeakRefDirective = "\t.weak\t";
StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
}

View File

@ -11,30 +11,32 @@
//
//===----------------------------------------------------------------------===//
#include "ARMTargetAsmInfo.h"
#include "ARMTargetMachine.h"
#include "ARMTargetAsmInfo.h"
#include "ARMFrameInfo.h"
#include "ARM.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachineRegistry.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
cl::desc("Disable load store optimization pass"));
namespace {
// Register the target.
RegisterTarget<ARMTargetMachine> X("arm", " ARM");
}
const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
return new ARMTargetAsmInfo(*this);
}
/// TargetMachine ctor - Create an ILP32 architecture model
///
ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
: DataLayout("e-p:32:32") {
: Subtarget(M, FS), DataLayout("e-p:32:32-d:32"), InstrInfo(Subtarget),
FrameInfo(Subtarget) {
if (Subtarget.isDarwin())
NoFramePointerElim = true;
}
unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
@ -49,14 +51,23 @@ unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
}
const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
return new ARMTargetAsmInfo(*this);
}
// Pass Pipeline Configuration
bool ARMTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
PM.add(createARMISelDag(*this));
return false;
}
bool ARMTargetMachine::addPostRegAlloc(FunctionPassManager &PM, bool Fast) {
PM.add(createARMFixMulPass());
bool ARMTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
// FIXME: temporarily disabling load / store optimization pass for Thumb mode.
if (!Fast && !DisableLdStOpti && !Subtarget.isThumb())
PM.add(createARMLoadStoreOptimizationPass());
PM.add(createARMConstantIslandPass());
return true;
}

View File

@ -20,19 +20,17 @@
#include "llvm/Target/TargetFrameInfo.h"
#include "ARMInstrInfo.h"
#include "ARMFrameInfo.h"
#include "ARMSubtarget.h"
namespace llvm {
class Module;
class ARMTargetMachine : public LLVMTargetMachine {
const TargetData DataLayout; // Calculates type size & alignment
ARMInstrInfo InstrInfo;
ARMFrameInfo FrameInfo;
protected:
virtual const TargetAsmInfo *createTargetAsmInfo() const;
ARMSubtarget Subtarget;
const TargetData DataLayout; // Calculates type size & alignment
ARMInstrInfo InstrInfo;
ARMFrameInfo FrameInfo;
public:
ARMTargetMachine(const Module &M, const std::string &FS);
@ -42,11 +40,14 @@ public:
return &InstrInfo.getRegisterInfo();
}
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
static unsigned getModuleMatchQuality(const Module &M);
virtual const TargetAsmInfo *createTargetAsmInfo() const;
// Pass Pipeline Configuration
virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
std::ostream &Out);
};

View File

@ -7,6 +7,7 @@
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../..
LIBRARYNAME = LLVMARM
TARGET = ARM
@ -15,7 +16,6 @@ TARGET = ARM
BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
ARMGenDAGISel.inc
ARMGenDAGISel.inc ARMGenSubtarget.inc
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,17 @@
//===---------------------------------------------------------------------===//
// Random ideas for the ARM backend (Thumb specific).
//===---------------------------------------------------------------------===//
* Add support for compiling functions in both ARM and Thumb mode, then taking
the smallest.
* Add support for compiling individual basic blocks in thumb mode, when in a
larger ARM function. This can be used for presumed cold code, like paths
to abort (failure path of asserts), EH handling code, etc.
* Thumb doesn't have normal pre/post increment addressing modes, but you can
load/store 32-bit integers with pre/postinc by using load/store multiple
instrs with a single register.
* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
and cmp instructions can use high registers. Also, we can use them as
temporaries to spill values into.

View File

@ -2,69 +2,438 @@
// Random ideas for the ARM backend.
//===---------------------------------------------------------------------===//
Consider implementing a select with two conditional moves:
Reimplement 'select' in terms of 'SEL'.
cmp x, y
moveq dst, a
movne dst, b
* We would really like to support UXTAB16, but we need to prove that the
add doesn't need to overflow between the two 16-bit chunks.
----------------------------------------------------------
* implement predication support
* Implement pre/post increment support. (e.g. PR935)
* Coalesce stack slots!
* Implement smarter constant generation for binops with large immediates.
* Consider materializing FP constants like 0.0f and 1.0f using integer
immediate instructions then copy to FPU. Slower than load into FPU?
%tmp1 = shl int %b, ubyte %c
%tmp4 = add int %a, %tmp1
//===---------------------------------------------------------------------===//
compiles to
The constant island pass is extremely naive. If a constant pool entry is
out of range, it *always* splits a block and inserts a copy of the cp
entry inline. It should:
add r0, r0, r1, lsl r2
1. Check to see if there is already a copy of this constant nearby. If so,
reuse it.
2. Instead of always splitting blocks to insert the constant, insert it in
nearby 'water'.
3. Constant island references should be ref counted. If a constant reference
is out-of-range, and the last reference to a constant is relocated, the
dead constant should be removed.
but
This pass has all the framework needed to implement this, but it hasn't
been done.
%tmp1 = shl int %b, ubyte %c
%tmp4 = add int %tmp1, %a
//===---------------------------------------------------------------------===//
compiles to
mov r1, r1, lsl r2
add r0, r1, r0
We need to start generating predicated instructions. The .td files have a way
to express this now (see the PPC conditional return instruction), but the
branch folding pass (or a new if-cvt pass) should start producing these, at
least in the trivial case.
---------------------------------------------------------
%tmp1 = shl int %b, ubyte 4
%tmp2 = add int %a, %tmp1
Among the obvious wins, doing so can eliminate the need to custom expand
copysign (i.e. we won't need to custom expand it to get the conditional
negate).
compiles to
//===---------------------------------------------------------------------===//
mov r2, #4
add r0, r0, r1, lsl r2
Implement long long "X-3" with instructions that fold the immediate in. These
were disabled due to badness with the ARM carry flag on subtracts.
should be
//===---------------------------------------------------------------------===//
add r0, r0, r1, lsl #4
We currently compile abs:
int foo(int p) { return p < 0 ? -p : p; }
----------------------------------------------------------
into:
add an offset to FLDS/FLDD/FSTD/FSTS addressing mode
_foo:
rsb r1, r0, #0
cmn r0, #1
movgt r1, r0
mov r0, r1
bx lr
----------------------------------------------------------
This is very, uh, literal. This could be a 3 operation sequence:
t = (p sra 31);
res = (p xor t)-t
the function
Which would be better. This occurs in png decode.
void %f() {
//===---------------------------------------------------------------------===//
More load / store optimizations:
1) Look past instructions without side-effects (not load, store, branch, etc.)
when forming the list of loads / stores to optimize.
2) Smarter register allocation?
We are probably missing some opportunities to use ldm / stm. Consider:
ldr r5, [r0]
ldr r4, [r0, #4]
This cannot be merged into a ldm. Perhaps we will need to do the transformation
before register allocation. Then teach the register allocator to allocate a
chunk of consecutive registers.
3) Better representation for block transfer? This is from Olden/power:
fldd d0, [r4]
fstd d0, [r4, #+32]
fldd d0, [r4, #+8]
fstd d0, [r4, #+40]
fldd d0, [r4, #+16]
fstd d0, [r4, #+48]
fldd d0, [r4, #+24]
fstd d0, [r4, #+56]
If we can spare the registers, it would be better to use fldm and fstm here.
Need major register allocator enhancement though.
4) Can we recognize the relative position of constantpool entries? i.e. Treat
ldr r0, LCPI17_3
ldr r1, LCPI17_4
ldr r2, LCPI17_5
as
ldr r0, LCPI17
ldr r1, LCPI17+4
ldr r2, LCPI17+8
Then the ldr's can be combined into a single ldm. See Olden/power.
Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
double 64-bit FP constant:
adr r0, L6
ldmia r0, {r0-r1}
.align 2
L6:
.long -858993459
.long 1074318540
5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
ldrd/strd instead if there are only two destination registers that form an
odd/even pair. However, we probably would pay a penalty if the address is not
aligned on 8-byte boundary. This requires more information on load / store
nodes (and MI's?) then we currently carry.
//===---------------------------------------------------------------------===//
* Consider this silly example:
double bar(double x) {
double r = foo(3.1);
return x+r;
}
_bar:
sub sp, sp, #16
str r4, [sp, #+12]
str r5, [sp, #+8]
str lr, [sp, #+4]
mov r4, r0
mov r5, r1
ldr r0, LCPI2_0
bl _foo
fmsr f0, r0
fcvtsd d0, f0
fmdrr d1, r4, r5
faddd d0, d0, d1
fmrrd r0, r1, d0
ldr lr, [sp, #+4]
ldr r5, [sp, #+8]
ldr r4, [sp, #+12]
add sp, sp, #16
bx lr
Ignore the prologue and epilogue stuff for a second. Note
mov r4, r0
mov r5, r1
the copys to callee-save registers and the fact they are only being used by the
fmdrr instruction. It would have been better had the fmdrr been scheduled
before the call and place the result in a callee-save DPR register. The two
mov ops would not have been necessary.
//===---------------------------------------------------------------------===//
Calling convention related stuff:
* gcc's parameter passing implementation is terrible and we suffer as a result:
e.g.
struct s {
double d1;
int s1;
};
void foo(struct s S) {
printf("%g, %d\n", S.d1, S.s1);
}
'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
then reload them to r1, r2, and r3 before issuing the call (r0 contains the
address of the format string):
stmfd sp!, {r7, lr}
add r7, sp, #0
sub sp, sp, #12
stmia sp, {r0, r1, r2}
ldmia sp, {r1-r2}
ldr r0, L5
ldr r3, [sp, #8]
L2:
add r0, pc, r0
bl L_printf$stub
Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
* Return an aggregate type is even worse:
e.g.
struct s foo(void) {
struct s S = {1.1, 2};
return S;
}
mov ip, r0
ldr r0, L5
sub sp, sp, #12
L2:
add r0, pc, r0
@ lr needed for prologue
ldmia r0, {r0, r1, r2}
stmia sp, {r0, r1, r2}
stmia ip, {r0, r1, r2}
mov r0, ip
add sp, sp, #12
bx lr
r0 (and later ip) is the hidden parameter from caller to store the value in. The
first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
r2 into the address passed in. However, there is one additional stmia that
stores r0, r1, and r2 to some stack location. The store is dead.
The llvm-gcc generated code looks like this:
csretcc void %foo(%struct.s* %agg.result) {
entry:
call void %g( int 1, int 2, int 3, int 4, int 5 )
%S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1]
%memtmp = alloca %struct.s ; <%struct.s*> [#uses=1]
cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2]
call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2]
call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1]
call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
ret void
}
declare void %g(int, int, int, int, int)
llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
into a number of load and stores, or 2) custom lower memcpy (of small size) to
be ldmia / stmia. I think option 2 is better but the current register
allocator cannot allocate a chunk of registers at a time.
Only needs 8 bytes of stack space. We currently allocate 16.
A feasible temporary solution is to use specific physical registers at the
lowering time for small (<= 4 words?) transfer size.
----------------------------------------------------------
* ARM CSRet calling convention requires the hidden argument to be returned by
the callee.
32 x 32 -> 64 multiplications currently uses two instructions. We
should try to declare smull and umull as returning two values.
//===---------------------------------------------------------------------===//
----------------------------------------------------------
We can definitely do a better job on BB placements to eliminate some branches.
It's very common to see llvm generated assembly code that looks like this:
Implement addressing modes 2 (ldrb) and 3 (ldrsb)
LBB3:
...
LBB4:
...
beq LBB3
b LBB2
----------------------------------------------------------
If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
See McCat/18-imp/ComputeBoundingBoxes for an example.
//===---------------------------------------------------------------------===//
We need register scavenging. Currently, the 'ip' register is reserved in case
frame indexes are too big. This means that we generate extra code for stuff
like this:
void foo(unsigned x, unsigned y, unsigned z, unsigned *a, unsigned *b, unsigned *c) {
short Rconst = (short) (16384.0f * 1.40200 + 0.5 );
*a = x * Rconst;
*b = y * Rconst;
*c = z * Rconst;
}
we compile it to:
_foo:
*** stmfd sp!, {r4, r7}
*** add r7, sp, #4
mov r4, #186
orr r4, r4, #89, 24 @ 22784
mul r0, r0, r4
str r0, [r3]
mul r0, r1, r4
ldr r1, [sp, #+8]
str r0, [r1]
mul r0, r2, r4
ldr r1, [sp, #+12]
str r0, [r1]
*** sub sp, r7, #4
*** ldmfd sp!, {r4, r7}
bx lr
GCC produces:
_foo:
ldr ip, L4
mul r0, ip, r0
mul r1, ip, r1
str r0, [r3, #0]
ldr r3, [sp, #0]
mul r2, ip, r2
str r1, [r3, #0]
ldr r3, [sp, #4]
str r2, [r3, #0]
bx lr
L4:
.long 22970
This is apparently all because we couldn't use ip here.
//===---------------------------------------------------------------------===//
Pre-/post- indexed load / stores:
1) We should not make the pre/post- indexed load/store transform if the base ptr
is guaranteed to be live beyond the load/store. This can happen if the base
ptr is live out of the block we are performing the optimization. e.g.
mov r1, r2
ldr r3, [r1], #4
...
vs.
ldr r3, [r2]
add r1, r2, #4
...
In most cases, this is just a wasted optimization. However, sometimes it can
negatively impact the performance because two-address code is more restrictive
when it comes to scheduling.
Unfortunately, liveout information is currently unavailable during DAG combine
time.
2) Consider spliting a indexed load / store into a pair of add/sub + load/store
to solve #1 (in TwoAddressInstructionPass.cpp).
3) Enhance LSR to generate more opportunities for indexed ops.
4) Once we added support for multiple result patterns, write indexed loads
patterns instead of C++ instruction selection code.
5) Use FLDM / FSTM to emulate indexed FP load / store.
//===---------------------------------------------------------------------===//
We should add i64 support to take advantage of the 64-bit load / stores.
We can add a pseudo i64 register class containing pseudo registers that are
register pairs. All other ops (e.g. add, sub) would be expanded as usual.
We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
from the i64 register. These are single moves which can be eliminated if the
destination register is a sub-register of the source. We should implement proper
subreg support in the register allocator to coalesce these away.
There are other minor issues such as multiple instructions for a spill / restore
/ move.
//===---------------------------------------------------------------------===//
Implement support for some more tricky ways to materialize immediates. For
example, to get 0xffff8000, we can use:
mov r9, #&3f8000
sub r9, r9, #&400000
//===---------------------------------------------------------------------===//
We sometimes generate multiple add / sub instructions to update sp in prologue
and epilogue if the inc / dec value is too large to fit in a single immediate
operand. In some cases, perhaps it might be better to load the value from a
constantpool instead.
//===---------------------------------------------------------------------===//
GCC generates significantly better code for this function.
int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
int i = 0;
if (StackPtr != 0) {
while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
Line[i++] = Stack[--StackPtr];
if (LineLen > 32768)
{
while (StackPtr != 0 && i < LineLen)
{
i++;
--StackPtr;
}
}
}
return StackPtr;
}
//===---------------------------------------------------------------------===//
This should compile to the mlas instruction:
int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
//===---------------------------------------------------------------------===//
At some point, we should triage these to see if they still apply to us:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
http://www.inf.u-szeged.hu/gcc-arm/
http://citeseer.ist.psu.edu/debus04linktime.html
//===---------------------------------------------------------------------===//