2007-11-18 16:46:26 +08:00
|
|
|
//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 04:36:04 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2007-11-18 16:46:26 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Implement the Lexer for .ll files.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "LLLexer.h"
|
2009-01-02 15:01:27 +08:00
|
|
|
#include "llvm/DerivedTypes.h"
|
|
|
|
#include "llvm/Instruction.h"
|
2009-07-08 02:44:11 +08:00
|
|
|
#include "llvm/LLVMContext.h"
|
2010-09-28 01:42:11 +08:00
|
|
|
#include "llvm/ADT/Twine.h"
|
|
|
|
#include "llvm/Assembly/Parser.h"
|
2009-07-12 04:10:48 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2007-11-18 16:46:26 +08:00
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
2007-12-09 03:03:30 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2009-07-03 06:46:18 +08:00
|
|
|
#include "llvm/Support/SourceMgr.h"
|
2009-01-02 15:01:27 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2010-12-20 04:43:38 +08:00
|
|
|
#include <cctype>
|
2009-08-24 18:34:41 +08:00
|
|
|
#include <cstdio>
|
2009-01-02 15:18:46 +08:00
|
|
|
#include <cstdlib>
|
2009-01-03 06:49:28 +08:00
|
|
|
#include <cstring>
|
2007-11-18 16:46:26 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2010-09-28 01:42:11 +08:00
|
|
|
bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
|
2009-07-03 07:08:13 +08:00
|
|
|
ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
|
2009-01-02 15:01:27 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Helper functions.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
// atoull - Convert an ascii string of decimal digits into the unsigned long
|
|
|
|
// long representation... this does not have to do input error checking,
|
|
|
|
// because we know that the input will be matched by a suitable regex...
|
|
|
|
//
|
2009-01-02 15:01:27 +08:00
|
|
|
uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
|
2007-11-18 16:46:26 +08:00
|
|
|
uint64_t Result = 0;
|
|
|
|
for (; Buffer != End; Buffer++) {
|
|
|
|
uint64_t OldRes = Result;
|
|
|
|
Result *= 10;
|
|
|
|
Result += *Buffer-'0';
|
|
|
|
if (Result < OldRes) { // Uh, oh, overflow detected!!!
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("constant bigger than 64 bits detected!");
|
2007-11-18 16:46:26 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
|
2007-11-18 16:46:26 +08:00
|
|
|
uint64_t Result = 0;
|
|
|
|
for (; Buffer != End; ++Buffer) {
|
|
|
|
uint64_t OldRes = Result;
|
|
|
|
Result *= 16;
|
|
|
|
char C = *Buffer;
|
|
|
|
if (C >= '0' && C <= '9')
|
|
|
|
Result += C-'0';
|
|
|
|
else if (C >= 'A' && C <= 'F')
|
|
|
|
Result += C-'A'+10;
|
|
|
|
else if (C >= 'a' && C <= 'f')
|
|
|
|
Result += C-'a'+10;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
if (Result < OldRes) { // Uh, oh, overflow detected!!!
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("constant bigger than 64 bits detected!");
|
2007-11-18 16:46:26 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
void LLLexer::HexToIntPair(const char *Buffer, const char *End,
|
|
|
|
uint64_t Pair[2]) {
|
2007-11-18 16:46:26 +08:00
|
|
|
Pair[0] = 0;
|
|
|
|
for (int i=0; i<16; i++, Buffer++) {
|
|
|
|
assert(Buffer != End);
|
|
|
|
Pair[0] *= 16;
|
|
|
|
char C = *Buffer;
|
|
|
|
if (C >= '0' && C <= '9')
|
|
|
|
Pair[0] += C-'0';
|
|
|
|
else if (C >= 'A' && C <= 'F')
|
|
|
|
Pair[0] += C-'A'+10;
|
|
|
|
else if (C >= 'a' && C <= 'f')
|
|
|
|
Pair[0] += C-'a'+10;
|
|
|
|
}
|
|
|
|
Pair[1] = 0;
|
|
|
|
for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
|
|
|
|
Pair[1] *= 16;
|
|
|
|
char C = *Buffer;
|
|
|
|
if (C >= '0' && C <= '9')
|
|
|
|
Pair[1] += C-'0';
|
|
|
|
else if (C >= 'A' && C <= 'F')
|
|
|
|
Pair[1] += C-'A'+10;
|
|
|
|
else if (C >= 'a' && C <= 'f')
|
|
|
|
Pair[1] += C-'a'+10;
|
|
|
|
}
|
2007-11-19 02:25:18 +08:00
|
|
|
if (Buffer != End)
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("constant bigger than 128 bits detected!");
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
|
2009-03-24 05:16:53 +08:00
|
|
|
/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
|
|
|
|
/// { low64, high16 } as usual for an APInt.
|
|
|
|
void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
|
|
|
|
uint64_t Pair[2]) {
|
|
|
|
Pair[1] = 0;
|
|
|
|
for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
|
|
|
|
assert(Buffer != End);
|
|
|
|
Pair[1] *= 16;
|
|
|
|
char C = *Buffer;
|
|
|
|
if (C >= '0' && C <= '9')
|
|
|
|
Pair[1] += C-'0';
|
|
|
|
else if (C >= 'A' && C <= 'F')
|
|
|
|
Pair[1] += C-'A'+10;
|
|
|
|
else if (C >= 'a' && C <= 'f')
|
|
|
|
Pair[1] += C-'a'+10;
|
|
|
|
}
|
|
|
|
Pair[0] = 0;
|
|
|
|
for (int i=0; i<16; i++, Buffer++) {
|
|
|
|
Pair[0] *= 16;
|
|
|
|
char C = *Buffer;
|
|
|
|
if (C >= '0' && C <= '9')
|
|
|
|
Pair[0] += C-'0';
|
|
|
|
else if (C >= 'A' && C <= 'F')
|
|
|
|
Pair[0] += C-'A'+10;
|
|
|
|
else if (C >= 'a' && C <= 'f')
|
|
|
|
Pair[0] += C-'a'+10;
|
|
|
|
}
|
|
|
|
if (Buffer != End)
|
|
|
|
Error("constant bigger than 128 bits detected!");
|
|
|
|
}
|
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
|
|
|
|
// appropriate character.
|
|
|
|
static void UnEscapeLexed(std::string &Str) {
|
|
|
|
if (Str.empty()) return;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
|
|
|
|
char *BOut = Buffer;
|
|
|
|
for (char *BIn = Buffer; BIn != EndBuffer; ) {
|
|
|
|
if (BIn[0] == '\\') {
|
|
|
|
if (BIn < EndBuffer-1 && BIn[1] == '\\') {
|
|
|
|
*BOut++ = '\\'; // Two \ becomes one
|
|
|
|
BIn += 2;
|
|
|
|
} else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
|
|
|
|
char Tmp = BIn[3]; BIn[3] = 0; // Terminate string
|
|
|
|
*BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
|
|
|
|
BIn[3] = Tmp; // Restore character
|
|
|
|
BIn += 3; // Skip over handled chars
|
|
|
|
++BOut;
|
|
|
|
} else {
|
|
|
|
*BOut++ = *BIn++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*BOut++ = *BIn++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Str.resize(BOut-Buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// isLabelChar - Return true for [-a-zA-Z$._0-9].
|
|
|
|
static bool isLabelChar(char C) {
|
|
|
|
return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// isLabelTail - Return true if this pointer points to a valid end of a label.
|
|
|
|
static const char *isLabelTail(const char *CurPtr) {
|
|
|
|
while (1) {
|
|
|
|
if (CurPtr[0] == ':') return CurPtr+1;
|
|
|
|
if (!isLabelChar(CurPtr[0])) return 0;
|
|
|
|
++CurPtr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Lexer definition.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2009-07-08 02:44:11 +08:00
|
|
|
LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
|
|
|
|
LLVMContext &C)
|
|
|
|
: CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr = CurBuf->getBufferStart();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string LLLexer::getFilename() const {
|
|
|
|
return CurBuf->getBufferIdentifier();
|
|
|
|
}
|
|
|
|
|
|
|
|
int LLLexer::getNextChar() {
|
|
|
|
char CurChar = *CurPtr++;
|
|
|
|
switch (CurChar) {
|
|
|
|
default: return (unsigned char)CurChar;
|
|
|
|
case 0:
|
|
|
|
// A nul character in the stream is either the end of the current buffer or
|
|
|
|
// a random nul in the file. Disambiguate that here.
|
|
|
|
if (CurPtr-1 != CurBuf->getBufferEnd())
|
|
|
|
return 0; // Just whitespace.
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Otherwise, return end of file.
|
2007-12-16 17:16:12 +08:00
|
|
|
--CurPtr; // Another call to lex will return EOF again.
|
2007-11-18 16:46:26 +08:00
|
|
|
return EOF;
|
2007-12-16 17:16:12 +08:00
|
|
|
}
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
lltok::Kind LLLexer::LexToken() {
|
2007-11-18 16:46:26 +08:00
|
|
|
TokStart = CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
int CurChar = getNextChar();
|
|
|
|
switch (CurChar) {
|
|
|
|
default:
|
|
|
|
// Handle letters: [a-zA-Z_]
|
|
|
|
if (isalpha(CurChar) || CurChar == '_')
|
|
|
|
return LexIdentifier();
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
|
|
|
case EOF: return lltok::Eof;
|
2007-11-18 16:46:26 +08:00
|
|
|
case 0:
|
|
|
|
case ' ':
|
|
|
|
case '\t':
|
|
|
|
case '\n':
|
|
|
|
case '\r':
|
|
|
|
// Ignore whitespace.
|
|
|
|
return LexToken();
|
|
|
|
case '+': return LexPositive();
|
|
|
|
case '@': return LexAt();
|
|
|
|
case '%': return LexPercent();
|
|
|
|
case '"': return LexQuote();
|
|
|
|
case '.':
|
|
|
|
if (const char *Ptr = isLabelTail(CurPtr)) {
|
|
|
|
CurPtr = Ptr;
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart, CurPtr-1);
|
|
|
|
return lltok::LabelStr;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
if (CurPtr[0] == '.' && CurPtr[1] == '.') {
|
|
|
|
CurPtr += 2;
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::dotdotdot;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
case '$':
|
|
|
|
if (const char *Ptr = isLabelTail(CurPtr)) {
|
|
|
|
CurPtr = Ptr;
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart, CurPtr-1);
|
|
|
|
return lltok::LabelStr;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
case ';':
|
|
|
|
SkipLineComment();
|
|
|
|
return LexToken();
|
2009-12-30 12:56:59 +08:00
|
|
|
case '!': return LexExclaim();
|
2007-11-18 16:46:26 +08:00
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
2007-12-16 17:16:12 +08:00
|
|
|
case '-':
|
2007-11-18 16:46:26 +08:00
|
|
|
return LexDigitOrNegative();
|
2009-01-02 15:01:27 +08:00
|
|
|
case '=': return lltok::equal;
|
|
|
|
case '[': return lltok::lsquare;
|
|
|
|
case ']': return lltok::rsquare;
|
|
|
|
case '{': return lltok::lbrace;
|
|
|
|
case '}': return lltok::rbrace;
|
|
|
|
case '<': return lltok::less;
|
|
|
|
case '>': return lltok::greater;
|
|
|
|
case '(': return lltok::lparen;
|
|
|
|
case ')': return lltok::rparen;
|
|
|
|
case ',': return lltok::comma;
|
|
|
|
case '*': return lltok::star;
|
|
|
|
case '\\': return lltok::backslash;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void LLLexer::SkipLineComment() {
|
|
|
|
while (1) {
|
|
|
|
if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// LexAt - Lex all tokens that start with an @ character:
|
2009-01-02 15:01:27 +08:00
|
|
|
/// GlobalVar @\"[^\"]*\"
|
|
|
|
/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
|
|
|
|
/// GlobalVarID @[0-9]+
|
|
|
|
lltok::Kind LLLexer::LexAt() {
|
2007-11-18 16:46:26 +08:00
|
|
|
// Handle AtStringConstant: @\"[^\"]*\"
|
|
|
|
if (CurPtr[0] == '"') {
|
|
|
|
++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
while (1) {
|
|
|
|
int CurChar = getNextChar();
|
2007-12-16 17:16:12 +08:00
|
|
|
|
|
|
|
if (CurChar == EOF) {
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("end of file in global variable name");
|
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
if (CurChar == '"') {
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart+2, CurPtr-1);
|
|
|
|
UnEscapeLexed(StrVal);
|
|
|
|
return lltok::GlobalVar;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
|
2007-12-16 17:16:12 +08:00
|
|
|
if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr[0] == '.' || CurPtr[0] == '_') {
|
|
|
|
++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr[0] == '.' || CurPtr[0] == '_')
|
|
|
|
++CurPtr;
|
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart+1, CurPtr); // Skip @
|
|
|
|
return lltok::GlobalVar;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Handle GlobalVarID: @[0-9]+
|
|
|
|
if (isdigit(CurPtr[0])) {
|
2007-12-16 17:16:12 +08:00
|
|
|
for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
|
|
|
|
/*empty*/;
|
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
uint64_t Val = atoull(TokStart+1, CurPtr);
|
|
|
|
if ((unsigned)Val != Val)
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("invalid value number (too large)!");
|
|
|
|
UIntVal = unsigned(Val);
|
|
|
|
return lltok::GlobalID;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// LexPercent - Lex all tokens that start with a % character:
|
2009-01-02 15:01:27 +08:00
|
|
|
/// LocalVar ::= %\"[^\"]*\"
|
|
|
|
/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
|
|
|
|
/// LocalVarID ::= %[0-9]+
|
|
|
|
lltok::Kind LLLexer::LexPercent() {
|
|
|
|
// Handle LocalVarName: %\"[^\"]*\"
|
2007-11-18 16:46:26 +08:00
|
|
|
if (CurPtr[0] == '"') {
|
|
|
|
++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
while (1) {
|
|
|
|
int CurChar = getNextChar();
|
2007-12-16 17:16:12 +08:00
|
|
|
|
|
|
|
if (CurChar == EOF) {
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("end of file in string constant");
|
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
if (CurChar == '"') {
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart+2, CurPtr-1);
|
|
|
|
UnEscapeLexed(StrVal);
|
|
|
|
return lltok::LocalVar;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
|
2007-12-16 17:16:12 +08:00
|
|
|
if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr[0] == '.' || CurPtr[0] == '_') {
|
|
|
|
++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr[0] == '.' || CurPtr[0] == '_')
|
|
|
|
++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart+1, CurPtr); // Skip %
|
|
|
|
return lltok::LocalVar;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Handle LocalVarID: %[0-9]+
|
|
|
|
if (isdigit(CurPtr[0])) {
|
2007-12-16 17:16:12 +08:00
|
|
|
for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
|
|
|
|
/*empty*/;
|
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
uint64_t Val = atoull(TokStart+1, CurPtr);
|
|
|
|
if ((unsigned)Val != Val)
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("invalid value number (too large)!");
|
|
|
|
UIntVal = unsigned(Val);
|
|
|
|
return lltok::LocalVarID;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// LexQuote - Lex all tokens that start with a " character:
|
|
|
|
/// QuoteLabel "[^"]+":
|
|
|
|
/// StringConstant "[^"]*"
|
2009-01-02 15:01:27 +08:00
|
|
|
lltok::Kind LLLexer::LexQuote() {
|
2007-11-18 16:46:26 +08:00
|
|
|
while (1) {
|
|
|
|
int CurChar = getNextChar();
|
2007-12-16 17:16:12 +08:00
|
|
|
|
|
|
|
if (CurChar == EOF) {
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("end of file in quoted string");
|
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
if (CurChar != '"') continue;
|
|
|
|
|
|
|
|
if (CurPtr[0] != ':') {
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart+1, CurPtr-1);
|
|
|
|
UnEscapeLexed(StrVal);
|
|
|
|
return lltok::StringConstant;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
++CurPtr;
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart+1, CurPtr-2);
|
|
|
|
UnEscapeLexed(StrVal);
|
|
|
|
return lltok::LabelStr;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool JustWhitespaceNewLine(const char *&Ptr) {
|
|
|
|
const char *ThisPtr = Ptr;
|
|
|
|
while (*ThisPtr == ' ' || *ThisPtr == '\t')
|
|
|
|
++ThisPtr;
|
|
|
|
if (*ThisPtr == '\n' || *ThisPtr == '\r') {
|
|
|
|
Ptr = ThisPtr;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-12-30 12:56:59 +08:00
|
|
|
/// LexExclaim:
|
2009-07-29 08:34:02 +08:00
|
|
|
/// !foo
|
2009-12-30 13:02:06 +08:00
|
|
|
/// !
|
2009-12-30 12:56:59 +08:00
|
|
|
lltok::Kind LLLexer::LexExclaim() {
|
2009-12-30 13:02:06 +08:00
|
|
|
// Lex a metadata name as a MetadataVar.
|
2009-07-29 08:34:02 +08:00
|
|
|
if (isalpha(CurPtr[0])) {
|
|
|
|
++CurPtr;
|
|
|
|
while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
|
|
|
|
CurPtr[0] == '.' || CurPtr[0] == '_')
|
|
|
|
++CurPtr;
|
2007-11-18 16:46:26 +08:00
|
|
|
|
2009-07-29 08:34:02 +08:00
|
|
|
StrVal.assign(TokStart+1, CurPtr); // Skip !
|
2009-12-30 13:02:06 +08:00
|
|
|
return lltok::MetadataVar;
|
2009-07-29 08:34:02 +08:00
|
|
|
}
|
2009-12-30 12:56:59 +08:00
|
|
|
return lltok::exclaim;
|
2009-07-29 08:34:02 +08:00
|
|
|
}
|
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
/// LexIdentifier: Handle several related productions:
|
|
|
|
/// Label [-a-zA-Z$._0-9]+:
|
|
|
|
/// IntegerType i[0-9]+
|
|
|
|
/// Keyword sdiv, float, ...
|
|
|
|
/// HexIntConstant [us]0x[0-9A-Fa-f]+
|
2009-01-02 15:01:27 +08:00
|
|
|
lltok::Kind LLLexer::LexIdentifier() {
|
2007-11-18 16:46:26 +08:00
|
|
|
const char *StartChar = CurPtr;
|
|
|
|
const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
|
|
|
|
const char *KeywordEnd = 0;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
for (; isLabelChar(*CurPtr); ++CurPtr) {
|
|
|
|
// If we decide this is an integer, remember the end of the sequence.
|
|
|
|
if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
|
|
|
|
if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
|
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// If we stopped due to a colon, this really is a label.
|
|
|
|
if (*CurPtr == ':') {
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(StartChar-1, CurPtr++);
|
|
|
|
return lltok::LabelStr;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Otherwise, this wasn't a label. If this was valid as an integer type,
|
|
|
|
// return it.
|
|
|
|
if (IntEnd == 0) IntEnd = CurPtr;
|
|
|
|
if (IntEnd != StartChar) {
|
|
|
|
CurPtr = IntEnd;
|
|
|
|
uint64_t NumBits = atoull(StartChar, CurPtr);
|
2007-12-16 17:16:12 +08:00
|
|
|
if (NumBits < IntegerType::MIN_INT_BITS ||
|
2007-11-18 16:46:26 +08:00
|
|
|
NumBits > IntegerType::MAX_INT_BITS) {
|
2009-01-02 15:01:27 +08:00
|
|
|
Error("bitwidth for integer type out of range!");
|
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2009-08-14 05:58:54 +08:00
|
|
|
TyVal = IntegerType::get(Context, NumBits);
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Type;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Otherwise, this was a letter sequence. See which keyword this is.
|
|
|
|
if (KeywordEnd == 0) KeywordEnd = CurPtr;
|
|
|
|
CurPtr = KeywordEnd;
|
|
|
|
--StartChar;
|
|
|
|
unsigned Len = CurPtr-StartChar;
|
2009-01-02 15:01:27 +08:00
|
|
|
#define KEYWORD(STR) \
|
|
|
|
if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
|
|
|
|
return lltok::kw_##STR;
|
|
|
|
|
|
|
|
KEYWORD(begin); KEYWORD(end);
|
|
|
|
KEYWORD(true); KEYWORD(false);
|
|
|
|
KEYWORD(declare); KEYWORD(define);
|
|
|
|
KEYWORD(global); KEYWORD(constant);
|
|
|
|
|
2009-01-16 04:18:42 +08:00
|
|
|
KEYWORD(private);
|
2009-07-20 09:03:30 +08:00
|
|
|
KEYWORD(linker_private);
|
2010-07-02 05:55:59 +08:00
|
|
|
KEYWORD(linker_private_weak);
|
2010-08-21 06:05:50 +08:00
|
|
|
KEYWORD(linker_private_weak_def_auto);
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(internal);
|
2009-04-13 13:44:34 +08:00
|
|
|
KEYWORD(available_externally);
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(linkonce);
|
Introduce new linkage types linkonce_odr, weak_odr, common_odr
and extern_weak_odr. These are the same as the non-odr versions,
except that they indicate that the global will only be overridden
by an *equivalent* global. In C, a function with weak linkage can
be overridden by a function which behaves completely differently.
This means that IP passes have to skip weak functions, since any
deductions made from the function definition might be wrong, since
the definition could be replaced by something completely different
at link time. This is not allowed in C++, thanks to the ODR
(One-Definition-Rule): if a function is replaced by another at
link-time, then the new function must be the same as the original
function. If a language knows that a function or other global can
only be overridden by an equivalent global, it can give it the
weak_odr linkage type, and the optimizers will understand that it
is alright to make deductions based on the function body. The
code generators on the other hand map weak and weak_odr linkage
to the same thing.
llvm-svn: 66339
2009-03-07 23:45:40 +08:00
|
|
|
KEYWORD(linkonce_odr);
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(weak);
|
Introduce new linkage types linkonce_odr, weak_odr, common_odr
and extern_weak_odr. These are the same as the non-odr versions,
except that they indicate that the global will only be overridden
by an *equivalent* global. In C, a function with weak linkage can
be overridden by a function which behaves completely differently.
This means that IP passes have to skip weak functions, since any
deductions made from the function definition might be wrong, since
the definition could be replaced by something completely different
at link time. This is not allowed in C++, thanks to the ODR
(One-Definition-Rule): if a function is replaced by another at
link-time, then the new function must be the same as the original
function. If a language knows that a function or other global can
only be overridden by an equivalent global, it can give it the
weak_odr linkage type, and the optimizers will understand that it
is alright to make deductions based on the function body. The
code generators on the other hand map weak and weak_odr linkage
to the same thing.
llvm-svn: 66339
2009-03-07 23:45:40 +08:00
|
|
|
KEYWORD(weak_odr);
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(appending);
|
|
|
|
KEYWORD(dllimport);
|
|
|
|
KEYWORD(dllexport);
|
|
|
|
KEYWORD(common);
|
|
|
|
KEYWORD(default);
|
|
|
|
KEYWORD(hidden);
|
|
|
|
KEYWORD(protected);
|
2011-01-09 00:42:36 +08:00
|
|
|
KEYWORD(unnamed_addr);
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(extern_weak);
|
|
|
|
KEYWORD(external);
|
|
|
|
KEYWORD(thread_local);
|
|
|
|
KEYWORD(zeroinitializer);
|
|
|
|
KEYWORD(undef);
|
|
|
|
KEYWORD(null);
|
|
|
|
KEYWORD(to);
|
|
|
|
KEYWORD(tail);
|
|
|
|
KEYWORD(target);
|
|
|
|
KEYWORD(triple);
|
|
|
|
KEYWORD(deplibs);
|
|
|
|
KEYWORD(datalayout);
|
|
|
|
KEYWORD(volatile);
|
2009-07-23 06:44:56 +08:00
|
|
|
KEYWORD(nuw);
|
|
|
|
KEYWORD(nsw);
|
2009-07-21 05:19:07 +08:00
|
|
|
KEYWORD(exact);
|
2009-07-28 05:53:46 +08:00
|
|
|
KEYWORD(inbounds);
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(align);
|
|
|
|
KEYWORD(addrspace);
|
|
|
|
KEYWORD(section);
|
|
|
|
KEYWORD(alias);
|
|
|
|
KEYWORD(module);
|
|
|
|
KEYWORD(asm);
|
|
|
|
KEYWORD(sideeffect);
|
2009-10-22 07:28:00 +08:00
|
|
|
KEYWORD(alignstack);
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(gc);
|
|
|
|
|
|
|
|
KEYWORD(ccc);
|
|
|
|
KEYWORD(fastcc);
|
|
|
|
KEYWORD(coldcc);
|
|
|
|
KEYWORD(x86_stdcallcc);
|
|
|
|
KEYWORD(x86_fastcallcc);
|
2010-05-16 17:08:45 +08:00
|
|
|
KEYWORD(x86_thiscallcc);
|
2009-06-17 02:50:49 +08:00
|
|
|
KEYWORD(arm_apcscc);
|
|
|
|
KEYWORD(arm_aapcscc);
|
|
|
|
KEYWORD(arm_aapcs_vfpcc);
|
2009-12-07 10:27:35 +08:00
|
|
|
KEYWORD(msp430_intrcc);
|
2010-09-25 15:46:17 +08:00
|
|
|
KEYWORD(ptx_kernel);
|
|
|
|
KEYWORD(ptx_device);
|
2009-06-17 02:50:49 +08:00
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(cc);
|
|
|
|
KEYWORD(c);
|
|
|
|
|
|
|
|
KEYWORD(signext);
|
|
|
|
KEYWORD(zeroext);
|
|
|
|
KEYWORD(inreg);
|
|
|
|
KEYWORD(sret);
|
|
|
|
KEYWORD(nounwind);
|
|
|
|
KEYWORD(noreturn);
|
|
|
|
KEYWORD(noalias);
|
|
|
|
KEYWORD(nocapture);
|
|
|
|
KEYWORD(byval);
|
|
|
|
KEYWORD(nest);
|
|
|
|
KEYWORD(readnone);
|
|
|
|
KEYWORD(readonly);
|
|
|
|
|
2010-02-06 09:16:28 +08:00
|
|
|
KEYWORD(inlinehint);
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(noinline);
|
|
|
|
KEYWORD(alwaysinline);
|
|
|
|
KEYWORD(optsize);
|
|
|
|
KEYWORD(ssp);
|
|
|
|
KEYWORD(sspreq);
|
2009-06-05 06:05:33 +08:00
|
|
|
KEYWORD(noredzone);
|
2009-06-06 05:57:13 +08:00
|
|
|
KEYWORD(noimplicitfloat);
|
2009-07-18 02:07:26 +08:00
|
|
|
KEYWORD(naked);
|
2010-10-25 23:37:09 +08:00
|
|
|
KEYWORD(hotpatch);
|
2009-01-02 15:01:27 +08:00
|
|
|
|
|
|
|
KEYWORD(type);
|
|
|
|
KEYWORD(opaque);
|
|
|
|
|
|
|
|
KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
|
|
|
|
KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
|
|
|
|
KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
|
|
|
|
KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
|
2009-01-03 06:46:48 +08:00
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
KEYWORD(x);
|
2009-10-28 11:39:23 +08:00
|
|
|
KEYWORD(blockaddress);
|
2007-11-18 16:46:26 +08:00
|
|
|
#undef KEYWORD
|
|
|
|
|
|
|
|
// Keywords for types.
|
2009-01-02 15:01:27 +08:00
|
|
|
#define TYPEKEYWORD(STR, LLVMTY) \
|
2007-11-18 16:46:26 +08:00
|
|
|
if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
|
2009-01-02 15:01:27 +08:00
|
|
|
TyVal = LLVMTY; return lltok::Type; }
|
2009-08-14 05:58:54 +08:00
|
|
|
TYPEKEYWORD("void", Type::getVoidTy(Context));
|
|
|
|
TYPEKEYWORD("float", Type::getFloatTy(Context));
|
|
|
|
TYPEKEYWORD("double", Type::getDoubleTy(Context));
|
|
|
|
TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context));
|
|
|
|
TYPEKEYWORD("fp128", Type::getFP128Ty(Context));
|
|
|
|
TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
|
|
|
|
TYPEKEYWORD("label", Type::getLabelTy(Context));
|
|
|
|
TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
|
2010-09-11 04:55:01 +08:00
|
|
|
TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
|
2007-11-18 16:46:26 +08:00
|
|
|
#undef TYPEKEYWORD
|
|
|
|
|
|
|
|
// Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is
|
|
|
|
// to avoid conflicting with the sext/zext instructions, below.
|
|
|
|
if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
|
|
|
|
// Scan CurPtr ahead, seeing if there is just whitespace before the newline.
|
|
|
|
if (JustWhitespaceNewLine(CurPtr))
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::kw_signext;
|
2007-11-18 16:46:26 +08:00
|
|
|
} else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
|
|
|
|
// Scan CurPtr ahead, seeing if there is just whitespace before the newline.
|
|
|
|
if (JustWhitespaceNewLine(CurPtr))
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::kw_zeroext;
|
2009-10-17 08:00:19 +08:00
|
|
|
} else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) {
|
2009-10-18 13:09:15 +08:00
|
|
|
// FIXME: Remove in LLVM 3.0.
|
|
|
|
// Autoupgrade malloc instruction.
|
2009-10-17 08:00:19 +08:00
|
|
|
return lltok::kw_malloc;
|
2009-10-27 07:43:48 +08:00
|
|
|
} else if (Len == 4 && !memcmp(StartChar, "free", 4)) {
|
|
|
|
// FIXME: Remove in LLVM 3.0.
|
|
|
|
// Autoupgrade malloc instruction.
|
|
|
|
return lltok::kw_free;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Keywords for instructions.
|
2009-01-02 15:01:27 +08:00
|
|
|
#define INSTKEYWORD(STR, Enum) \
|
|
|
|
if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
|
|
|
|
UIntVal = Instruction::Enum; return lltok::kw_##STR; }
|
|
|
|
|
2009-06-05 06:49:04 +08:00
|
|
|
INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
|
|
|
|
INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
|
|
|
|
INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul);
|
2009-01-02 15:01:27 +08:00
|
|
|
INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv);
|
|
|
|
INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem);
|
|
|
|
INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr);
|
|
|
|
INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor);
|
|
|
|
INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp);
|
|
|
|
|
|
|
|
INSTKEYWORD(phi, PHI);
|
|
|
|
INSTKEYWORD(call, Call);
|
|
|
|
INSTKEYWORD(trunc, Trunc);
|
|
|
|
INSTKEYWORD(zext, ZExt);
|
|
|
|
INSTKEYWORD(sext, SExt);
|
|
|
|
INSTKEYWORD(fptrunc, FPTrunc);
|
|
|
|
INSTKEYWORD(fpext, FPExt);
|
|
|
|
INSTKEYWORD(uitofp, UIToFP);
|
|
|
|
INSTKEYWORD(sitofp, SIToFP);
|
|
|
|
INSTKEYWORD(fptoui, FPToUI);
|
|
|
|
INSTKEYWORD(fptosi, FPToSI);
|
|
|
|
INSTKEYWORD(inttoptr, IntToPtr);
|
|
|
|
INSTKEYWORD(ptrtoint, PtrToInt);
|
|
|
|
INSTKEYWORD(bitcast, BitCast);
|
|
|
|
INSTKEYWORD(select, Select);
|
|
|
|
INSTKEYWORD(va_arg, VAArg);
|
|
|
|
INSTKEYWORD(ret, Ret);
|
|
|
|
INSTKEYWORD(br, Br);
|
|
|
|
INSTKEYWORD(switch, Switch);
|
2009-10-28 08:19:10 +08:00
|
|
|
INSTKEYWORD(indirectbr, IndirectBr);
|
2009-01-02 15:01:27 +08:00
|
|
|
INSTKEYWORD(invoke, Invoke);
|
|
|
|
INSTKEYWORD(unwind, Unwind);
|
|
|
|
INSTKEYWORD(unreachable, Unreachable);
|
|
|
|
|
|
|
|
INSTKEYWORD(alloca, Alloca);
|
|
|
|
INSTKEYWORD(load, Load);
|
|
|
|
INSTKEYWORD(store, Store);
|
|
|
|
INSTKEYWORD(getelementptr, GetElementPtr);
|
|
|
|
|
|
|
|
INSTKEYWORD(extractelement, ExtractElement);
|
|
|
|
INSTKEYWORD(insertelement, InsertElement);
|
|
|
|
INSTKEYWORD(shufflevector, ShuffleVector);
|
|
|
|
INSTKEYWORD(getresult, ExtractValue);
|
|
|
|
INSTKEYWORD(extractvalue, ExtractValue);
|
|
|
|
INSTKEYWORD(insertvalue, InsertValue);
|
2007-12-16 17:16:12 +08:00
|
|
|
#undef INSTKEYWORD
|
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
|
|
|
|
// the CFE to avoid forcing it to deal with 64-bit numbers.
|
|
|
|
if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
|
|
|
|
TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
|
|
|
|
int len = CurPtr-TokStart-3;
|
|
|
|
uint32_t bits = len * 4;
|
2009-08-13 10:33:34 +08:00
|
|
|
APInt Tmp(bits, StringRef(TokStart+3, len), 16);
|
2007-11-18 16:46:26 +08:00
|
|
|
uint32_t activeBits = Tmp.getActiveBits();
|
|
|
|
if (activeBits > 0 && activeBits < bits)
|
2010-12-07 16:25:19 +08:00
|
|
|
Tmp = Tmp.trunc(activeBits);
|
2009-01-02 15:01:27 +08:00
|
|
|
APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
|
|
|
|
return lltok::APSInt;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-19 02:43:24 +08:00
|
|
|
// If this is "cc1234", return this as just "cc".
|
2007-11-18 16:46:26 +08:00
|
|
|
if (TokStart[0] == 'c' && TokStart[1] == 'c') {
|
|
|
|
CurPtr = TokStart+2;
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::kw_cc;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-19 02:43:24 +08:00
|
|
|
// If this starts with "call", return it as CALL. This is to support old
|
|
|
|
// broken .ll files. FIXME: remove this with LLVM 3.0.
|
|
|
|
if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
|
|
|
|
CurPtr = TokStart+4;
|
2009-01-02 15:01:27 +08:00
|
|
|
UIntVal = Instruction::Call;
|
|
|
|
return lltok::kw_call;
|
2007-11-19 02:43:24 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
// Finally, if this isn't known, return an error.
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr = TokStart+1;
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Lex0x: Handle productions that start with 0x, knowing that it matches and
|
|
|
|
/// that this is not a label:
|
|
|
|
/// HexFPConstant 0x[0-9A-Fa-f]+
|
|
|
|
/// HexFP80Constant 0xK[0-9A-Fa-f]+
|
|
|
|
/// HexFP128Constant 0xL[0-9A-Fa-f]+
|
|
|
|
/// HexPPC128Constant 0xM[0-9A-Fa-f]+
|
2009-01-02 15:01:27 +08:00
|
|
|
lltok::Kind LLLexer::Lex0x() {
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr = TokStart + 2;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
char Kind;
|
|
|
|
if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
|
|
|
|
Kind = *CurPtr++;
|
|
|
|
} else {
|
|
|
|
Kind = 'J';
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!isxdigit(CurPtr[0])) {
|
2009-01-02 15:01:27 +08:00
|
|
|
// Bad token, return it as an error.
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr = TokStart+1;
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
while (isxdigit(CurPtr[0]))
|
|
|
|
++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
if (Kind == 'J') {
|
|
|
|
// HexFPConstant - Floating point constant represented in IEEE format as a
|
|
|
|
// hexadecimal number for when exponential notation is not precise enough.
|
|
|
|
// Float and double only.
|
2009-01-02 15:01:27 +08:00
|
|
|
APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
|
|
|
|
return lltok::APFloat;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
uint64_t Pair[2];
|
|
|
|
switch (Kind) {
|
2009-07-15 00:55:14 +08:00
|
|
|
default: llvm_unreachable("Unknown kind!");
|
2007-11-18 16:46:26 +08:00
|
|
|
case 'K':
|
|
|
|
// F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
|
2009-03-24 05:16:53 +08:00
|
|
|
FP80HexToIntPair(TokStart+3, CurPtr, Pair);
|
2009-01-02 15:01:27 +08:00
|
|
|
APFloatVal = APFloat(APInt(80, 2, Pair));
|
|
|
|
return lltok::APFloat;
|
2007-11-18 16:46:26 +08:00
|
|
|
case 'L':
|
|
|
|
// F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
|
2009-03-24 05:16:53 +08:00
|
|
|
HexToIntPair(TokStart+3, CurPtr, Pair);
|
2009-01-02 15:01:27 +08:00
|
|
|
APFloatVal = APFloat(APInt(128, 2, Pair), true);
|
|
|
|
return lltok::APFloat;
|
2007-11-18 16:46:26 +08:00
|
|
|
case 'M':
|
|
|
|
// PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
|
2009-03-24 05:16:53 +08:00
|
|
|
HexToIntPair(TokStart+3, CurPtr, Pair);
|
2009-01-02 15:01:27 +08:00
|
|
|
APFloatVal = APFloat(APInt(128, 2, Pair));
|
|
|
|
return lltok::APFloat;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// LexIdentifier: Handle several related productions:
|
|
|
|
/// Label [-a-zA-Z$._0-9]+:
|
|
|
|
/// NInteger -[0-9]+
|
|
|
|
/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
|
|
|
|
/// PInteger [0-9]+
|
|
|
|
/// HexFPConstant 0x[0-9A-Fa-f]+
|
|
|
|
/// HexFP80Constant 0xK[0-9A-Fa-f]+
|
|
|
|
/// HexFP128Constant 0xL[0-9A-Fa-f]+
|
|
|
|
/// HexPPC128Constant 0xM[0-9A-Fa-f]+
|
2009-01-02 15:01:27 +08:00
|
|
|
lltok::Kind LLLexer::LexDigitOrNegative() {
|
2007-11-18 16:46:26 +08:00
|
|
|
// If the letter after the negative is a number, this is probably a label.
|
|
|
|
if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
|
|
|
|
// Okay, this is not a number after the -, it's probably a label.
|
|
|
|
if (const char *End = isLabelTail(CurPtr)) {
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart, End-1);
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr = End;
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::LabelStr;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// At this point, it is either a label, int or fp constant.
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Skip digits, we have at least one.
|
2007-12-16 17:16:12 +08:00
|
|
|
for (; isdigit(CurPtr[0]); ++CurPtr)
|
|
|
|
/*empty*/;
|
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Check to see if this really is a label afterall, e.g. "-1:".
|
|
|
|
if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
|
|
|
|
if (const char *End = isLabelTail(CurPtr)) {
|
2009-01-02 15:01:27 +08:00
|
|
|
StrVal.assign(TokStart, End-1);
|
2007-11-18 16:46:26 +08:00
|
|
|
CurPtr = End;
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::LabelStr;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// If the next character is a '.', then it is a fp value, otherwise its
|
|
|
|
// integer.
|
|
|
|
if (CurPtr[0] != '.') {
|
|
|
|
if (TokStart[0] == '0' && TokStart[1] == 'x')
|
|
|
|
return Lex0x();
|
|
|
|
unsigned Len = CurPtr-TokStart;
|
|
|
|
uint32_t numBits = ((Len * 64) / 19) + 2;
|
2009-08-13 10:33:34 +08:00
|
|
|
APInt Tmp(numBits, StringRef(TokStart, Len), 10);
|
2007-11-18 16:46:26 +08:00
|
|
|
if (TokStart[0] == '-') {
|
|
|
|
uint32_t minBits = Tmp.getMinSignedBits();
|
|
|
|
if (minBits > 0 && minBits < numBits)
|
2010-12-07 16:25:19 +08:00
|
|
|
Tmp = Tmp.trunc(minBits);
|
2009-01-02 15:01:27 +08:00
|
|
|
APSIntVal = APSInt(Tmp, false);
|
2007-11-18 16:46:26 +08:00
|
|
|
} else {
|
|
|
|
uint32_t activeBits = Tmp.getActiveBits();
|
|
|
|
if (activeBits > 0 && activeBits < numBits)
|
2010-12-07 16:25:19 +08:00
|
|
|
Tmp = Tmp.trunc(activeBits);
|
2009-01-02 15:01:27 +08:00
|
|
|
APSIntVal = APSInt(Tmp, true);
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::APSInt;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Skip over [0-9]*([eE][-+]?[0-9]+)?
|
|
|
|
while (isdigit(CurPtr[0])) ++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
|
2007-12-16 17:16:12 +08:00
|
|
|
if (isdigit(CurPtr[1]) ||
|
2007-11-18 16:46:26 +08:00
|
|
|
((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
|
|
|
|
CurPtr += 2;
|
|
|
|
while (isdigit(CurPtr[0])) ++CurPtr;
|
|
|
|
}
|
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2010-12-20 04:42:43 +08:00
|
|
|
APFloatVal = APFloat(std::atof(TokStart));
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::APFloat;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
|
2009-01-02 15:01:27 +08:00
|
|
|
lltok::Kind LLLexer::LexPositive() {
|
2007-11-18 16:46:26 +08:00
|
|
|
// If the letter after the negative is a number, this is probably not a
|
|
|
|
// label.
|
|
|
|
if (!isdigit(CurPtr[0]))
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Skip digits.
|
2007-12-16 17:16:12 +08:00
|
|
|
for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
|
|
|
|
/*empty*/;
|
2007-11-18 16:46:26 +08:00
|
|
|
|
|
|
|
// At this point, we need a '.'.
|
|
|
|
if (CurPtr[0] != '.') {
|
|
|
|
CurPtr = TokStart+1;
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::Error;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
// Skip over [0-9]*([eE][-+]?[0-9]+)?
|
|
|
|
while (isdigit(CurPtr[0])) ++CurPtr;
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2007-11-18 16:46:26 +08:00
|
|
|
if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
|
2007-12-16 17:16:12 +08:00
|
|
|
if (isdigit(CurPtr[1]) ||
|
2007-11-18 16:46:26 +08:00
|
|
|
((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
|
|
|
|
CurPtr += 2;
|
|
|
|
while (isdigit(CurPtr[0])) ++CurPtr;
|
|
|
|
}
|
|
|
|
}
|
2007-12-16 17:16:12 +08:00
|
|
|
|
2010-12-20 04:42:43 +08:00
|
|
|
APFloatVal = APFloat(std::atof(TokStart));
|
2009-01-02 15:01:27 +08:00
|
|
|
return lltok::APFloat;
|
2007-11-18 16:46:26 +08:00
|
|
|
}
|