Second try of initial ARM/Thumb disassembler check-in. It consists of a tablgen

backend (ARMDecoderEmitter) which emits the decoder functions for ARM and Thumb,
and the disassembler core which invokes the decoder function and builds up the
MCInst based on the decoded Opcode.

Reviewed by Chris Latter and Bob Wilson.

llvm-svn: 100233
This commit is contained in:
Johnny Chen 2010-04-02 22:27:38 +00:00
parent 7ad0ad0b9a
commit 7b999ea7b7
16 changed files with 8499 additions and 2 deletions

View File

@ -1612,6 +1612,11 @@ $(ObjDir)/%GenIntrinsics.inc.tmp : %.td $(ObjDir)/.dir
$(Echo) "Building $(<F) intrinsics information with tblgen"
$(Verb) $(TableGen) -gen-tgt-intrinsic -o $(call SYSPATH, $@) $<
$(ObjDir)/ARMGenDecoderTables.inc.tmp : ARM.td $(ObjDir)/.dir
$(Echo) "Building $(<F) decoder tables with tblgen"
$(Verb) $(TableGen) -gen-arm-decoder -o $(call SYSPATH, $@) $<
clean-local::
-$(Verb) $(RM) -f $(INCFiles)

View File

@ -457,6 +457,18 @@ inline int64_t abs64(int64_t x) {
return (x < 0) ? -x : x;
}
/// SignExtend32 - Sign extend B-bit number x to 32-bit int.
/// Usage int32_t r = SignExtend32<5>(x);
template <unsigned B> inline int32_t SignExtend32(int32_t x) {
return (x << (32 - B)) >> (32 - B);
}
/// SignExtend64 - Sign extend B-bit number x to 64-bit int.
/// Usage int64_t r = SignExtend64<5>(x);
template <unsigned B> inline int64_t SignExtend64(int32_t x) {
return (x << (64 - B)) >> (64 - B);
}
} // End llvm namespace
#endif

View File

@ -0,0 +1,532 @@
//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is part of the ARM Disassembler.
// It contains code to implement the public interfaces of ARMDisassembler and
// ThumbDisassembler, both of which are instances of MCDisassembler.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-disassembler"
#include "ARMDisassembler.h"
#include "ARMDisassemblerCore.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
/// ARMDecoderEmitter.cpp TableGen backend. It contains:
///
/// o Mappings from opcode to ARM/Thumb instruction format
///
/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
/// for an ARM instruction.
///
/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
/// function for a Thumb instruction.
///
#include "../ARMGenDecoderTables.inc"
namespace llvm {
/// showBitVector - Use the raw_ostream to log a diagnostic message describing
/// the inidividual bits of the instruction.
///
static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
// Split the bit position markers into more than one lines to fit 80 columns.
os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11"
<< " 10 9 8 7 6 5 4 3 2 1 0 \n";
os << "---------------------------------------------------------------"
<< "----------------------------------\n";
os << '|';
for (unsigned i = 32; i != 0; --i) {
if (insn >> (i - 1) & 0x01)
os << " 1";
else
os << " 0";
os << (i%4 == 1 ? '|' : ':');
}
os << '\n';
// Split the bit position markers into more than one lines to fit 80 columns.
os << "---------------------------------------------------------------"
<< "----------------------------------\n";
os << '\n';
}
/// decodeARMInstruction is a decorator function which tries special cases of
/// instruction matching before calling the auto-generated decoder function.
static unsigned decodeARMInstruction(uint32_t &insn) {
if (slice(insn, 31, 28) == 15)
goto AutoGenedDecoder;
// Special case processing, if any, goes here....
// LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
// The insufficient encoding information of the combined instruction confuses
// the decoder wrt BFC/BFI. Therefore, we try to recover here.
// For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
// For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
if (slice(insn, 3, 0) == 15)
return ARM::BFC;
else
return ARM::BFI;
}
// Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
// As a result, the decoder fails to decode UMULL properly.
if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
return ARM::UMULL;
}
// Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
// As a result, the decoder fails to decode SBFX properly.
if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
return ARM::SBFX;
// And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
// As a result, the decoder fails to decode UBFX properly.
if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
return ARM::UBFX;
// Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
// As a result, the decoder fails to deocode SSAT properly.
if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
// Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
// As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
if (slice(insn, 27, 24) == 0) {
switch (slice(insn, 21, 20)) {
case 2:
switch (slice(insn, 7, 4)) {
case 11:
return ARM::STRHT;
default:
break; // fallthrough
}
break;
case 3:
switch (slice(insn, 7, 4)) {
case 11:
return ARM::LDRHT;
case 13:
return ARM::LDRSBT;
case 15:
return ARM::LDRSHT;
default:
break; // fallthrough
}
break;
default:
break; // fallthrough
}
}
// Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
// As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
// properly.
if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
switch (slice(insn, 7, 4)) {
case 11:
switch (PW) {
case 2: // Offset
return ARM::STRH;
case 3: // Pre-indexed
return ARM::STRH_PRE;
case 0: // Post-indexed
return ARM::STRH_POST;
default:
break; // fallthrough
}
break;
case 13:
switch (PW) {
case 2: // Offset
return ARM::LDRD;
case 3: // Pre-indexed
return ARM::LDRD_PRE;
case 0: // Post-indexed
return ARM::LDRD_POST;
default:
break; // fallthrough
}
break;
case 15:
switch (PW) {
case 2: // Offset
return ARM::STRD;
case 3: // Pre-indexed
return ARM::STRD_PRE;
case 0: // Post-indexed
return ARM::STRD_POST;
default:
break; // fallthrough
}
break;
default:
break; // fallthrough
}
}
// Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
// As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
// properly.
if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
switch (slice(insn, 7, 4)) {
case 11:
switch (PW) {
case 2: // Offset
return ARM::LDRH;
case 3: // Pre-indexed
return ARM::LDRH_PRE;
case 0: // Post-indexed
return ARM::LDRH_POST;
default:
break; // fallthrough
}
break;
case 13:
switch (PW) {
case 2: // Offset
return ARM::LDRSB;
case 3: // Pre-indexed
return ARM::LDRSB_PRE;
case 0: // Post-indexed
return ARM::LDRSB_POST;
default:
break; // fallthrough
}
break;
case 15:
switch (PW) {
case 2: // Offset
return ARM::LDRSH;
case 3: // Pre-indexed
return ARM::LDRSH_PRE;
case 0: // Post-indexed
return ARM::LDRSH_POST;
default:
break; // fallthrough
}
break;
default:
break; // fallthrough
}
}
AutoGenedDecoder:
// Calling the auto-generated decoder function.
return decodeInstruction(insn);
}
// Helper function for special case handling of LDR (literal) and friends.
// See, for example, A6.3.7 Load word: Table A6-18 Load word.
// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
// before returning it.
static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
switch (Opcode) {
default:
return Opcode; // Return unmorphed opcode.
case ARM::t2LDRDi8:
return ARM::t2LDRDpci;
case ARM::t2LDR_POST: case ARM::t2LDR_PRE:
case ARM::t2LDRi12: case ARM::t2LDRi8:
case ARM::t2LDRs:
return ARM::t2LDRpci;
case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE:
case ARM::t2LDRBi12: case ARM::t2LDRBi8:
case ARM::t2LDRBs:
return ARM::t2LDRBpci;
case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE:
case ARM::t2LDRHi12: case ARM::t2LDRHi8:
case ARM::t2LDRHs:
return ARM::t2LDRHpci;
case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE:
case ARM::t2LDRSBi12: case ARM::t2LDRSBi8:
case ARM::t2LDRSBs:
return ARM::t2LDRSBpci;
case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE:
case ARM::t2LDRSHi12: case ARM::t2LDRSHi8:
case ARM::t2LDRSHs:
return ARM::t2LDRSHpci;
}
}
/// decodeThumbSideEffect is a decorator function which can potentially twiddle
/// the instruction or morph the returned opcode under Thumb2.
///
/// First it checks whether the insn is a NEON or VFP instr; if true, bit
/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
/// equivalent instruction and decodeInstruction is called with the transformed
/// insn.
///
/// Next, there is special handling for Load byte/halfword/word instruction by
/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
/// Thumb2 instruction. See comments below for further details.
///
/// Finally, one last check is made to see whether the insn is a NEON/VFP and
/// decodeInstruction(insn) is invoked on the original insn.
///
/// Otherwise, decodeThumbInstruction is called with the original insn.
static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
if (IsThumb2) {
uint16_t op1 = slice(insn, 28, 27);
uint16_t op2 = slice(insn, 26, 20);
// A6.3 32-bit Thumb instruction encoding
// Table A6-9 32-bit Thumb instruction encoding
// The coprocessor instructions of interest are transformed to their ARM
// equivalents.
// --------- Transform Begin Marker ---------
if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
// A7.4 Advanced SIMD data-processing instructions
// U bit of Thumb corresponds to Inst{24} of ARM.
uint16_t U = slice(op1, 1, 1);
// Inst{28-24} of ARM = {1,0,0,1,U};
uint16_t bits28_24 = 9 << 1 | U;
DEBUG(showBitVector(errs(), insn));
setSlice(insn, 28, 24, bits28_24);
return decodeInstruction(insn);
}
if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
// A7.7 Advanced SIMD element or structure load/store instructions
// Inst{27-24} of Thumb = 0b1001
// Inst{27-24} of ARM = 0b0100
DEBUG(showBitVector(errs(), insn));
setSlice(insn, 27, 24, 4);
return decodeInstruction(insn);
}
// --------- Transform End Marker ---------
// See, for example, A6.3.7 Load word: Table A6-18 Load word.
// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
// before returning it to our caller.
if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
&& slice(insn, 19, 16) == 15)
return T2Morph2LoadLiteral(decodeThumbInstruction(insn));
// One last check for NEON/VFP instructions.
if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
return decodeInstruction(insn);
// Fall through.
}
return decodeThumbInstruction(insn);
}
static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) {
switch (Opcode) {
default:
return false;
case ARM::t2PLDi12: case ARM::t2PLDi8:
case ARM::t2PLDr: case ARM::t2PLDs:
case ARM::t2PLDWi12: case ARM::t2PLDWi8:
case ARM::t2PLDWr: case ARM::t2PLDWs:
case ARM::t2PLIi12: case ARM::t2PLIi8:
case ARM::t2PLIr: case ARM::t2PLIs:
return true;
}
}
static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) {
switch (Opcode) {
default:
return 0;
case ARM::t2PLDi12: case ARM::t2PLDi8:
case ARM::t2PLDr: case ARM::t2PLDs:
return ARM::t2PLDpci;
case ARM::t2PLDWi12: case ARM::t2PLDWi8:
case ARM::t2PLDWr: case ARM::t2PLDWs:
return ARM::t2PLDWpci;
case ARM::t2PLIi12: case ARM::t2PLIi8:
case ARM::t2PLIr: case ARM::t2PLIs:
return ARM::t2PLIpci;
}
}
//
// Public interface for the disassembler
//
bool ARMDisassembler::getInstruction(MCInst &MI,
uint64_t &Size,
const MemoryObject &Region,
uint64_t Address,
raw_ostream &os) const {
// The machine instruction.
uint32_t insn;
// We want to read exactly 4 bytes of data.
if (Region.readBytes(Address, 4, (uint8_t*)&insn, NULL) == -1)
return false;
unsigned Opcode = decodeARMInstruction(insn);
ARMFormat Format = ARMFormats[Opcode];
Size = 4;
DEBUG({
errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
<< " Format=" << stringForARMFormat(Format) << '(' << (int)Format
<< ")\n";
showBitVector(errs(), insn);
});
ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
if (!Builder)
return false;
if (!Builder->Build(MI, insn))
return false;
delete Builder;
return true;
}
bool ThumbDisassembler::getInstruction(MCInst &MI,
uint64_t &Size,
const MemoryObject &Region,
uint64_t Address,
raw_ostream &os) const {
// The machine instruction.
uint32_t insn = 0;
uint32_t insn1 = 0;
// A6.1 Thumb instruction set encoding
//
// If bits [15:11] of the halfword being decoded take any of the following
// values, the halfword is the first halfword of a 32-bit instruction:
// o 0b11101
// o 0b11110
// o 0b11111.
//
// Otherwise, the halfword is a 16-bit instruction.
// Read 2 bytes of data first.
if (Region.readBytes(Address, 2, (uint8_t*)&insn, NULL) == -1)
return false;
unsigned bits15_11 = slice(insn, 15, 11);
bool IsThumb2 = false;
// 32-bit instructions if the bits [15:11] of the halfword matches
// { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
IsThumb2 = true;
if (Region.readBytes(Address + 2, 2, (uint8_t*)&insn1, NULL) == -1)
return false;
insn = (insn << 16 | insn1);
}
// The insn could potentially be bit-twiddled in order to be decoded as an ARM
// NEON/VFP opcode. In such case, the modified insn is later disassembled as
// an ARM NEON/VFP instruction.
//
// This is a short term solution for lack of encoding bits specified for the
// Thumb2 NEON/VFP instructions. The long term solution could be adding some
// infrastructure to have each instruction support more than one encodings.
// Which encoding is used would be based on which subtarget the compiler/
// disassembler is working with at the time. This would allow the sharing of
// the NEON patterns between ARM and Thumb2, as well as potential greater
// sharing between the regular ARM instructions and the 32-bit wide Thumb2
// instructions as well.
unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
// A8.6.117/119/120/121.
// PLD/PLDW/PLI instructions with Rn==15 is transformed to the pci variant.
if (Thumb2PreloadOpcodeNoPCI(Opcode) && slice(insn, 19, 16) == 15)
Opcode = T2Morph2Preload2PCI(Opcode);
ARMFormat Format = ARMFormats[Opcode];
Size = IsThumb2 ? 4 : 2;
DEBUG({
errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
<< " Format=" << stringForARMFormat(Format) << '(' << (int)Format
<< ")\n";
showBitVector(errs(), insn);
});
ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
Builder->setSession(const_cast<Session *>(&SO));
if (!Builder)
return false;
if (!Builder->Build(MI, insn))
return false;
delete Builder;
return true;
}
// A8.6.50
static unsigned short CountITSize(unsigned ITMask) {
// First count the trailing zeros of the IT mask.
unsigned TZ = CountTrailingZeros_32(ITMask);
assert(TZ <= 3 && "Encoding error");
return (4 - TZ);
}
/// Init ITState.
void Session::InitIT(unsigned short bits7_0) {
ITCounter = CountITSize(slice(bits7_0, 3, 0));
ITState = bits7_0;
}
/// Update ITState if necessary.
void Session::UpdateIT() {
assert(ITCounter);
--ITCounter;
if (ITCounter == 0)
ITState = 0;
else {
unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1;
setSlice(ITState, 4, 0, NewITState4_0);
}
}
static MCDisassembler *createARMDisassembler(const Target &T) {
return new ARMDisassembler;
}
static MCDisassembler *createThumbDisassembler(const Target &T) {
return new ThumbDisassembler;
}
extern "C" void LLVMInitializeARMDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(TheARMTarget,
createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
createThumbDisassembler);
}
} // namespace llvm

View File

@ -0,0 +1,91 @@
//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is part of the ARM Disassembler.
// It contains the header for ARMDisassembler and ThumbDisassembler, both are
// subclasses of MCDisassembler.
//
//===----------------------------------------------------------------------===//
#ifndef ARMDISASSEMBLER_H
#define ARMDISASSEMBLER_H
#include "llvm/MC/MCDisassembler.h"
namespace llvm {
class MCInst;
class MemoryObject;
class raw_ostream;
/// ARMDisassembler - ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
ARMDisassembler() :
MCDisassembler() {
}
~ARMDisassembler() {
}
/// getInstruction - See MCDisassembler.
bool getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const;
private:
};
// Forward declaration.
class ARMBasicMCBuilder;
/// Session - Keep track of the IT Block progression.
class Session {
friend class ARMBasicMCBuilder;
public:
Session() : ITCounter(0), ITState(0) {}
~Session() {}
/// InitIT - Initializes ITCounter/ITState.
void InitIT(unsigned short bits7_0);
/// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
void UpdateIT();
private:
unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4.
unsigned ITState; // A2.5.2 Consists of IT[7:5] and IT[4:0] initially.
};
/// ThumbDisassembler - Thumb disassembler for all ARM platforms.
class ThumbDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
ThumbDisassembler() :
MCDisassembler(), SO() {
}
~ThumbDisassembler() {
}
/// getInstruction - See MCDisassembler.
bool getInstruction(MCInst &instr,
uint64_t &size,
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream) const;
private:
Session SO;
};
} // namespace llvm
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,280 @@
//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is part of the ARM Disassembler.
//
// The first part defines the enumeration type of ARM instruction format, which
// specifies the encoding used by the instruction, as well as a helper function
// to convert the enums to printable char strings.
//
// It also contains code to represent the concepts of Builder, Builder Factory,
// as well as the Algorithm to solve the problem of disassembling an ARM instr.
//
//===----------------------------------------------------------------------===//
#ifndef ARMDISASSEMBLERCORE_H
#define ARMDISASSEMBLERCORE_H
#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "ARMInstrInfo.h"
#include "ARMDisassembler.h"
namespace llvm {
class ARMUtils {
public:
static const char *OpcodeName(unsigned Opcode);
};
/////////////////////////////////////////////////////
// //
// Enums and Utilities for ARM Instruction Format //
// //
/////////////////////////////////////////////////////
#define ARM_FORMATS \
ENTRY(ARM_FORMAT_PSEUDO, 0) \
ENTRY(ARM_FORMAT_MULFRM, 1) \
ENTRY(ARM_FORMAT_BRFRM, 2) \
ENTRY(ARM_FORMAT_BRMISCFRM, 3) \
ENTRY(ARM_FORMAT_DPFRM, 4) \
ENTRY(ARM_FORMAT_DPSOREGFRM, 5) \
ENTRY(ARM_FORMAT_LDFRM, 6) \
ENTRY(ARM_FORMAT_STFRM, 7) \
ENTRY(ARM_FORMAT_LDMISCFRM, 8) \
ENTRY(ARM_FORMAT_STMISCFRM, 9) \
ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
ENTRY(ARM_FORMAT_EXTFRM, 13) \
ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \
ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \
ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \
ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \
ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \
ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \
ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \
ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \
ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \
ENTRY(ARM_FORMAT_THUMBFRM, 24) \
ENTRY(ARM_FORMAT_NEONFRM, 25) \
ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \
ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \
ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \
ENTRY(ARM_FORMAT_MISCFRM, 29) \
ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \
ENTRY(ARM_FORMAT_NLdSt, 31) \
ENTRY(ARM_FORMAT_N1RegModImm, 32) \
ENTRY(ARM_FORMAT_N2Reg, 33) \
ENTRY(ARM_FORMAT_NVCVT, 34) \
ENTRY(ARM_FORMAT_NVecDupLn, 35) \
ENTRY(ARM_FORMAT_N2RegVecShL, 36) \
ENTRY(ARM_FORMAT_N2RegVecShR, 37) \
ENTRY(ARM_FORMAT_N3Reg, 38) \
ENTRY(ARM_FORMAT_N3RegVecSh, 39) \
ENTRY(ARM_FORMAT_NVecExtract, 40) \
ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
ENTRY(ARM_FORMAT_NVTBL, 42)
// ARM instruction format specifies the encoding used by the instruction.
#define ENTRY(n, v) n = v,
typedef enum {
ARM_FORMATS
ARM_FORMAT_NA
} ARMFormat;
#undef ENTRY
// Converts enum to const char*.
static const inline char *stringForARMFormat(ARMFormat form) {
#define ENTRY(n, v) case n: return #n;
switch(form) {
ARM_FORMATS
case ARM_FORMAT_NA:
default:
return "";
}
#undef ENTRY
}
/// Expands on the enum definitions from ARMBaseInstrInfo.h.
/// They are being used by the disassembler implementation.
namespace ARMII {
enum {
NEONRegMask = 15,
GPRRegMask = 15,
NEON_RegRdShift = 12,
NEON_D_BitShift = 22,
NEON_RegRnShift = 16,
NEON_N_BitShift = 7,
NEON_RegRmShift = 0,
NEON_M_BitShift = 5
};
}
/// Utility function for extracting [From, To] bits from a uint32_t.
static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
assert(From < 32 && To < 32 && From >= To);
return (Bits >> To) & ((1 << (From - To + 1)) - 1);
}
/// Utility function for setting [From, To] bits to Val for a uint32_t.
static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
uint32_t Val) {
assert(From < 32 && To < 32 && From >= To);
uint32_t Mask = ((1 << (From - To + 1)) - 1);
Bits &= ~(Mask << To);
Bits |= (Val & Mask) << To;
}
/// Various utilities for checking the target specific flags.
/// A unary data processing instruction doesn't have an Rn operand.
static inline bool isUnaryDP(unsigned TSFlags) {
return (TSFlags & ARMII::UnaryDP);
}
/// This four-bit field describes the addressing mode used.
/// See also ARMBaseInstrInfo.h.
static inline unsigned getAddrMode(unsigned TSFlags) {
return (TSFlags & ARMII::AddrModeMask);
}
/// {IndexModePre, IndexModePost}
/// Only valid for load and store ops.
/// See also ARMBaseInstrInfo.h.
static inline unsigned getIndexMode(unsigned TSFlags) {
return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
}
/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
static inline bool isPrePostLdSt(unsigned TSFlags) {
return (TSFlags & ARMII::IndexModeMask) != 0;
}
// Forward declaration.
class ARMBasicMCBuilder;
// Builder Object is mostly ignored except in some Thumb disassemble functions.
typedef ARMBasicMCBuilder *BO;
/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
/// and builds the MCOperand list upon disassembly. It returns false on failure
/// or true on success. The number of operands added is updated upon success.
typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
/// ARMAlgorithm - ARMAlgorithm implements the ARM/Thumb disassembly by solving
/// the problem of building the MCOperands of an MCInst. Construction of
/// ARMAlgorithm requires passing in a function pointer with the DisassembleFP
/// data type.
class ARMAlgorithm {
public:
/// GetInstance - GetInstance returns an instance of ARMAlgorithm given the
/// encoding Format. API clients should not free up the returned instance.
static ARMAlgorithm *GetInstance(ARMFormat Format);
/// Return true if this algorithm successfully disassembles the instruction.
/// NumOpsAdded is updated to reflect the number of operands added by the
/// algorithm. NumOpsAdded may be less than NumOps, in which case, there are
/// operands unaccounted for which need to be dealt with by the API client.
bool Solve(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
unsigned &NumOpsAdded, BO Builder) const {
if (Disassemble == NULL)
return false;
return (*Disassemble)(MI, Opcode, insn, NumOps, NumOpsAdded, Builder);
}
private:
ARMAlgorithm(DisassembleFP fp) : Disassemble(fp) {}
ARMAlgorithm(ARMAlgorithm &AA) : Disassemble(AA.Disassemble) {}
virtual ~ARMAlgorithm() {}
DisassembleFP Disassemble;
};
/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
/// knows how to build up the MCOperand list.
class ARMBasicMCBuilder {
unsigned Opcode;
ARMFormat Format;
unsigned short NumOps;
const ARMAlgorithm &Algo;
Session *SP;
public:
ARMBasicMCBuilder(ARMBasicMCBuilder &B)
: Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Algo(B.Algo),
SP(B.SP)
{}
/// Opcode, Format, NumOperands, and Algo make an ARM Basic MCBuilder.
ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num,
const ARMAlgorithm &algo)
: Opcode(opc), Format(format), NumOps(num), Algo(algo), SP(0)
{}
void setSession(Session *sp) {
SP = sp;
}
/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
/// the possible Predicate and SBitModifier, to build the remaining MCOperand
/// constituents.
bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
uint32_t insn, unsigned short NumOpsRemaning);
/// InITBlock - InITBlock returns true if we are inside an IT block.
bool InITBlock() {
if (SP)
return SP->ITCounter > 0;
return false;
}
/// Build - Build delegates to BuildIt to perform the heavy liftling. After
/// that, it invokes RunBuildAfterHook where some housekeepings can be done.
virtual bool Build(MCInst &MI, uint32_t insn) {
bool Status = BuildIt(MI, insn);
return RunBuildAfterHook(Status, MI, insn);
}
/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
/// The general idea is to set the Opcode for the MCInst, followed by adding
/// the appropriate MCOperands to the MCInst. ARM Basic MC Builder delegates
/// to the Algo (ARM Disassemble Algorithm) object to perform Format-specific
/// disassembly, followed by class method TryPredicateAndSBitModifier() to do
/// PredicateOperand and OptionalDefOperand which follow the Dst/Src Operands.
virtual bool BuildIt(MCInst &MI, uint32_t insn);
/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
/// after BuildIt is finished.
virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);
private:
/// Get condition of the current IT instruction.
unsigned GetITCond() {
assert(SP);
return slice(SP->ITState, 7, 4);
}
};
/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
/// infrastructure of an MCInst given the Opcode and Format of the instr.
/// Return NULL if it fails to create/return a proper builder. API clients
/// are responsible for freeing up of the allocated memory. Cacheing can be
/// performed by the API clients to improve performance.
extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
} // namespace llvm
#endif

View File

@ -0,0 +1,16 @@
##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMARMDisassembler
# Hack: we need to include 'main' arm target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

File diff suppressed because it is too large Load Diff

View File

@ -16,8 +16,9 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
ARMGenDAGISel.inc ARMGenSubtarget.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
ARMGenDecoderTables.inc
DIRS = AsmPrinter AsmParser TargetInfo
DIRS = AsmPrinter AsmParser Disassembler TargetInfo
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,62 @@
# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
# CHECK: b #0
0xfe 0xff 0xff 0xea
# CHECK: bfc r8, #0, #16
0x1f 0x80 0xcf 0xe7
# CHECK: bfi r8, r0, #16, #1
0x10 0x88 0xd0 0xe7
# CHECK: cmn r0, #1
0x01 0x00 0x70 0xe3
# CHECK: dmb nshst
0x56 0xf0 0x7f 0xf5
# CHECK: ldr r0, [r2], #15
0x0f 0x00 0x92 0xe4
# CHECK: lsls r0, r2, #31
0x82 0x0f 0xb0 0xe1
# CHECK: mcr2 p0, #0, r2, cr1, cr0, #7
0xf0 0x20 0x01 0xfe
# CHECK: movt r8, #65535
0xff 0x8f 0x4f 0xe3
# CHECK: pkhbt r8, r9, r10, lsl #4
0x1a 0x82 0x89 0xe6
# CHECK: pop {r0, r2, r4, r6, r8, r10}
0x55 0x05 0xbd 0xe8
# CHECK: push {r0, r2, r4, r6, r8, r10}
0x55 0x05 0x2d 0xe9
# CHECK: qsax r8, r9, r10
0x5a 0x8f 0x29 0xe6
# CHECK: rfedb r0!
0x00 0x0a 0x30 0xf9
# CHECK: sbcs r0, pc, #1
0x01 0x00 0xdf 0xe2
# CHECK: sbfx r0, r1, #0, #8
0x51 0x00 0xa7 0xe7
# CHECK: ssat r8, #1, r10, lsl #8
0x1a 0x84 0xa0 0xe6
# CHECK: stmdb r10!, {r4, r5, r6, r7, lr}
0xf0 0x40 0x2a 0xe9
# CHECK: teq r0, #31
0x1f 0x00 0x30 0xe3
# CHECK: ubfx r0, r0, #16, #1
0x50 0x08 0xe0 0xe7

View File

@ -0,0 +1,41 @@
# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
# CHECK: vbif q15, q7, q0
0x50 0xe1 0x7e 0xf3
# CHECK: vcvt.f32.s32 q15, q0, #1
0x50 0xee 0xff 0xf2
# CHECK: vdup.32 q3, d1[0]
0x41 0x6c 0xb4 0xf3
# CHECK: vld4.8 {d0, d1, d2, d3}, [r2], r7
0x07 0x00 0x22 0xf4
# CHECK: vld4.8 {d4, d6, d8, d10}, [r2]
0x0f 0x41 0x22 0xf4
# CHECK: vmov d0, d15
0x1f 0x01 0x2f 0xf2
# CHECK: vmul.f32 d0, d0, d6
0x16 0x0d 0x00 0xf3
# CHECK: vneg.f32 q0, q0
0xc0 0x07 0xb9 0xf3
# CHECK: vqrdmulh.s32 d0, d0, d3[1]
0x63 0x0d 0xa0 0xf2
# CHECK: vrshr.s32 d0, d0, #16
0x10 0x02 0xb0 0xf2
# CHECK: vshll.i16 q3, d1, #16
0x01 0x63 0xb6 0xf3
# CHECK: vsri.32 q15, q0, #1
0x50 0xe4 0xff 0xf3
# CHECK: vtbx.8 d18, {d4, d5, d6}, d7
0x47 0x2a 0xf4 0xf3

View File

@ -0,0 +1,81 @@
# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 | FileCheck %s
# CHECK: add r5, sp, #68
0x11 0xad
# CHECK: adcs r0, r0, #1
0x50 0xf1 0x01 0x00
# CHECK: b #34
0x0f 0xe0
# CHECK: bfi r2, r10, #0, #1
0x6a 0xf3 0x00 0x02
# CHECK: cbnz r7, #20
0x57 0xb9
# CHECK: cmp r3, r4
0xa3 0x42
# CHECK: cmn.w r0, #31
0x10 0xf1 0x1f 0x0f
# CHECK: ldmia r0!, {r1}
0x02 0xc8
# CHECK: ldrd r0, r1, [r7, #64]!
0xf7 0xe9 0x10 0x01
# CHECK: lsls.w r0, pc, #1
0x5f 0xea 0x4f 0x00
# CHECK: mov r11, r7
0xbb 0x46
# CHECK: pkhtb r2, r4, r6, asr #16
0xc4 0xea 0x26 0x42
# CHECK: pop {r2, r4, r6, r8, r10, r12}
0xbd 0xe8 0x54 0x15
# CHECK: push {r2, r4, r6, r8, r10, r12}
0x2d 0xe9 0x54 0x15
# CHECK: rsbs r0, r0, #0
0x40 0x42
# CHECK: strd r0, [r7, #64]
0xc7 0xe9 0x10 0x01
# CHECK: sub sp, #60
0x8f 0xb0
# CHECK: subw r0, pc, #1
0xaf 0xf2 0x01 0x00
# CHECK: uqadd16 r3, r4, r5
0x94 0xfa 0x55 0xf3
# CHECK: usada8 r5, r4, r3, r2
0x74 0xfb 0x03 0x25
# CHECK: uxtab16 r1, r2, r3, ror #8
0x32 0xfa 0x93 0xf1
# IT block begin
# CHECK: ittte eq
0x03 0xbf
# CHECK: moveq r3, #3
0x03 0x23
# CHECK: asreq r1, r0, #5
0x41 0x11
# CHECK: lsleq r1, r0, #28
0x01 0x07
# CHECK: rsbne r1, r2, #0
0x51 0x42
# IT block end

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,50 @@
//===------------ ARMDecoderEmitter.h - Decoder Generator -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is part of the ARM Disassembler.
// It contains the tablegen backend declaration ARMDecoderEmitter.
//
//===----------------------------------------------------------------------===//
#ifndef ARMDECODEREMITTER_H
#define ARMDECODEREMITTER_H
#include "TableGenBackend.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
class ARMDecoderEmitter : public TableGenBackend {
RecordKeeper &Records;
public:
ARMDecoderEmitter(RecordKeeper &R) : Records(R) {
initBackend();
}
~ARMDecoderEmitter() {
shutdownBackend();
}
// run - Output the code emitter
void run(raw_ostream &o);
private:
// Helper class for ARMDecoderEmitter.
class ARMDEBackend;
ARMDEBackend *Backend;
void initBackend();
void shutdownBackend();
};
} // end llvm namespace
#endif

View File

@ -12,6 +12,8 @@
#include "Record.h"
#include "X86DisassemblerTables.h"
#include "X86RecognizableInstr.h"
#include "ARMDecoderEmitter.h"
using namespace llvm;
using namespace llvm::X86Disassembler;
@ -124,6 +126,12 @@ void DisassemblerEmitter::run(raw_ostream &OS) {
return;
}
// Fixed-instruction-length targets use a common disassembler.
if (Target.getName() == "ARM") {
ARMDecoderEmitter(Records).run(OS);
return;
}
throw TGError(Target.getTargetRecord()->getLoc(),
"Unable to generate disassembler for this target");
}

View File

@ -31,6 +31,7 @@
#include "OptParserEmitter.h"
#include "Record.h"
#include "RegisterInfoEmitter.h"
#include "ARMDecoderEmitter.h"
#include "SubtargetEmitter.h"
#include "TGParser.h"
#include "llvm/Support/CommandLine.h"
@ -47,6 +48,7 @@ enum ActionType {
GenEmitter,
GenRegisterEnums, GenRegister, GenRegisterHeader,
GenInstrEnums, GenInstrs, GenAsmWriter, GenAsmMatcher,
GenARMDecoder,
GenDisassembler,
GenCallingConv,
GenClangDiagsDefs,
@ -83,6 +85,8 @@ namespace {
"Generate calling convention descriptions"),
clEnumValN(GenAsmWriter, "gen-asm-writer",
"Generate assembly writer"),
clEnumValN(GenARMDecoder, "gen-arm-decoder",
"Generate decoders for ARM/Thumb"),
clEnumValN(GenDisassembler, "gen-disassembler",
"Generate disassembler"),
clEnumValN(GenAsmMatcher, "gen-asm-matcher",
@ -228,6 +232,9 @@ int main(int argc, char **argv) {
case GenAsmWriter:
AsmWriterEmitter(Records).run(*Out);
break;
case GenARMDecoder:
ARMDecoderEmitter(Records).run(*Out);
break;
case GenAsmMatcher:
AsmMatcherEmitter(Records).run(*Out);
break;