forked from OSchip/llvm-project
Make readExpr return an Expr object instead of a vector of tokens.
Previously, we handled an expression as a vector of tokens. In other words, an expression was a vector of uncooked raw StringRefs. When we need a value of an expression, we used ExprParser to run the expression. The separation was needed essentially because parse time is too early to evaluate an expression. In order to evaluate an expression, we need to finalize section sizes. Because linker script parsing is done at very early stage of the linking process, we can't evaluate expressions while parsing. The above mechanism worked fairly well, but there were a few drawbacks. One thing is that we sometimes have to parse the same expression more than once in order to find the end of the expression. In some contexts, linker script expressions have no clear end marker. So, we needed to recognize balanced expressions and ternary operators. The other is poor error reporting. Since expressions are parsed basically twice, and some information that is available at the first stage is lost in the second stage, it was hard to print out apprpriate error messages. This patch fixes the issues with a new approach. Now the expression parsing is integrated into ScriptParser. ExprParser class is removed. Expressions are represented as lambdas instead of vectors of tokens. Lambdas captures information they need to run themselves when they are created. In this way, ends of expressions are naturally detected, and errors are handled in the usual way. This patch also reduces the amount of code. Differential Revision: https://reviews.llvm.org/D22728 llvm-svn: 276574
This commit is contained in:
parent
85c25b4d50
commit
708019c41b
|
@ -55,188 +55,6 @@ bool InputSectionDescription::classof(const BaseCommand *C) {
|
|||
return C->Kind == InputSectionKind;
|
||||
}
|
||||
|
||||
// This is an operator-precedence parser to parse and evaluate
|
||||
// a linker script expression. For each linker script arithmetic
|
||||
// expression (e.g. ". = . + 0x1000"), a new instance of ExprParser
|
||||
// is created and ran.
|
||||
namespace {
|
||||
class ExprParser : public ScriptParserBase {
|
||||
public:
|
||||
ExprParser(std::vector<StringRef> &Tokens, uint64_t Dot)
|
||||
: ScriptParserBase(Tokens), Dot(Dot) {}
|
||||
|
||||
uint64_t run();
|
||||
|
||||
private:
|
||||
uint64_t parsePrimary();
|
||||
uint64_t parseTernary(uint64_t Cond);
|
||||
uint64_t apply(StringRef Op, uint64_t L, uint64_t R);
|
||||
uint64_t parseExpr1(uint64_t Lhs, int MinPrec);
|
||||
uint64_t parseExpr();
|
||||
|
||||
uint64_t Dot;
|
||||
};
|
||||
}
|
||||
|
||||
static int precedence(StringRef Op) {
|
||||
return StringSwitch<int>(Op)
|
||||
.Case("*", 4)
|
||||
.Case("/", 4)
|
||||
.Case("+", 3)
|
||||
.Case("-", 3)
|
||||
.Case("<", 2)
|
||||
.Case(">", 2)
|
||||
.Case(">=", 2)
|
||||
.Case("<=", 2)
|
||||
.Case("==", 2)
|
||||
.Case("!=", 2)
|
||||
.Case("&", 1)
|
||||
.Default(-1);
|
||||
}
|
||||
|
||||
static uint64_t evalExpr(std::vector<StringRef> &Tokens, uint64_t Dot) {
|
||||
return ExprParser(Tokens, Dot).run();
|
||||
}
|
||||
|
||||
uint64_t ExprParser::run() {
|
||||
uint64_t V = parseExpr();
|
||||
if (!atEOF() && !Error)
|
||||
setError("stray token: " + peek());
|
||||
return V;
|
||||
}
|
||||
|
||||
uint64_t static getConstantValue(StringRef C) {
|
||||
if (C == "COMMONPAGESIZE" || C == "MAXPAGESIZE")
|
||||
return Target->PageSize;
|
||||
error("unknown constant: " + C);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This is a part of the operator-precedence parser to evaluate
|
||||
// arithmetic expressions in SECTIONS command. This function evaluates an
|
||||
// integer literal, a parenthesized expression, the ALIGN function,
|
||||
// or the special variable ".".
|
||||
uint64_t ExprParser::parsePrimary() {
|
||||
StringRef Tok = next();
|
||||
if (Tok == ".")
|
||||
return Dot;
|
||||
if (Tok == "(") {
|
||||
uint64_t V = parseExpr();
|
||||
expect(")");
|
||||
return V;
|
||||
}
|
||||
if (Tok == "ALIGN") {
|
||||
expect("(");
|
||||
uint64_t V = parseExpr();
|
||||
expect(")");
|
||||
return alignTo(Dot, V);
|
||||
}
|
||||
if (Tok == "CONSTANT") {
|
||||
expect("(");
|
||||
uint64_t V = getConstantValue(next());
|
||||
expect(")");
|
||||
return V;
|
||||
}
|
||||
// Documentations says there are two ways to compute
|
||||
// the value of DATA_SEGMENT_ALIGN command, depending on whether the second
|
||||
// uses fewer COMMONPAGESIZE sized pages for the data segment(area between the
|
||||
// result of this expression and `DATA_SEGMENT_END') than the first or not.
|
||||
// That is possible optimization, that we do not support, so we compute that
|
||||
// function always as (ALIGN(MAXPAGESIZE) + (. & (MAXPAGESIZE - 1))) now.
|
||||
if (Tok == "DATA_SEGMENT_ALIGN") {
|
||||
expect("(");
|
||||
uint64_t L = parseExpr();
|
||||
expect(",");
|
||||
parseExpr();
|
||||
expect(")");
|
||||
return alignTo(Dot, L) + (Dot & (L - 1));
|
||||
}
|
||||
// Since we do not support the optimization from comment above,
|
||||
// we can just ignore that command.
|
||||
if (Tok == "DATA_SEGMENT_END") {
|
||||
expect("(");
|
||||
expect(".");
|
||||
expect(")");
|
||||
return Dot;
|
||||
}
|
||||
uint64_t V = 0;
|
||||
if (Tok.getAsInteger(0, V))
|
||||
setError("malformed number: " + Tok);
|
||||
return V;
|
||||
}
|
||||
|
||||
uint64_t ExprParser::parseTernary(uint64_t Cond) {
|
||||
next();
|
||||
uint64_t V = parseExpr();
|
||||
expect(":");
|
||||
uint64_t W = parseExpr();
|
||||
return Cond ? V : W;
|
||||
}
|
||||
|
||||
uint64_t ExprParser::apply(StringRef Op, uint64_t L, uint64_t R) {
|
||||
if (Op == "*")
|
||||
return L * R;
|
||||
if (Op == "/") {
|
||||
if (R == 0) {
|
||||
error("division by zero");
|
||||
return 0;
|
||||
}
|
||||
return L / R;
|
||||
}
|
||||
if (Op == "+")
|
||||
return L + R;
|
||||
if (Op == "-")
|
||||
return L - R;
|
||||
if (Op == "<")
|
||||
return L < R;
|
||||
if (Op == ">")
|
||||
return L > R;
|
||||
if (Op == ">=")
|
||||
return L >= R;
|
||||
if (Op == "<=")
|
||||
return L <= R;
|
||||
if (Op == "==")
|
||||
return L == R;
|
||||
if (Op == "!=")
|
||||
return L != R;
|
||||
if (Op == "&")
|
||||
return L & R;
|
||||
llvm_unreachable("invalid operator");
|
||||
}
|
||||
|
||||
// This is a part of the operator-precedence parser.
|
||||
// This function assumes that the remaining token stream starts
|
||||
// with an operator.
|
||||
uint64_t ExprParser::parseExpr1(uint64_t Lhs, int MinPrec) {
|
||||
while (!atEOF()) {
|
||||
// Read an operator and an expression.
|
||||
StringRef Op1 = peek();
|
||||
if (Op1 == "?")
|
||||
return parseTernary(Lhs);
|
||||
if (precedence(Op1) < MinPrec)
|
||||
return Lhs;
|
||||
next();
|
||||
uint64_t Rhs = parsePrimary();
|
||||
|
||||
// Evaluate the remaining part of the expression first if the
|
||||
// next operator has greater precedence than the previous one.
|
||||
// For example, if we have read "+" and "3", and if the next
|
||||
// operator is "*", then we'll evaluate 3 * ... part first.
|
||||
while (!atEOF()) {
|
||||
StringRef Op2 = peek();
|
||||
if (precedence(Op2) <= precedence(Op1))
|
||||
break;
|
||||
Rhs = parseExpr1(Rhs, precedence(Op2));
|
||||
}
|
||||
|
||||
Lhs = apply(Op1, Lhs, Rhs);
|
||||
}
|
||||
return Lhs;
|
||||
}
|
||||
|
||||
// Reads and evaluates an arithmetic expression.
|
||||
uint64_t ExprParser::parseExpr() { return parseExpr1(parsePrimary(), 0); }
|
||||
|
||||
template <class ELFT> static bool isDiscarded(InputSectionBase<ELFT> *S) {
|
||||
return !S || !S->Live;
|
||||
}
|
||||
|
@ -334,7 +152,7 @@ LinkerScript<ELFT>::createSections(OutputSectionFactory<ELFT> &Factory) {
|
|||
|
||||
template <class ELFT>
|
||||
void LinkerScript<ELFT>::dispatchAssignment(SymbolAssignment *Cmd) {
|
||||
uint64_t Val = evalExpr(Cmd->Expr, Dot);
|
||||
uint64_t Val = Cmd->Expression(Dot);
|
||||
if (Cmd->Name == ".") {
|
||||
Dot = Val;
|
||||
} else if (!Cmd->Ignore) {
|
||||
|
@ -597,13 +415,18 @@ private:
|
|||
void readSearchDir();
|
||||
void readSections();
|
||||
|
||||
void readLocationCounterValue();
|
||||
void readAssignment();
|
||||
void readOutputSectionDescription(StringRef OutSec);
|
||||
std::vector<StringRef> readOutputSectionPhdrs();
|
||||
unsigned readPhdrType();
|
||||
void readProvide(bool Hidden);
|
||||
SymbolAssignment *readSymbolAssignment(StringRef Name);
|
||||
std::vector<StringRef> readSectionsCommandExpr();
|
||||
|
||||
Expr readExpr();
|
||||
Expr readExpr1(Expr Lhs, int MinPrec);
|
||||
Expr readPrimary();
|
||||
Expr readTernary(Expr Cond);
|
||||
Expr combine(StringRef Op, Expr Lhs, Expr Rhs);
|
||||
|
||||
const static StringMap<Handler> Cmd;
|
||||
ScriptConfiguration &Opt = *ScriptConfig;
|
||||
|
@ -795,29 +618,45 @@ void ScriptParser::readSections() {
|
|||
while (!Error && !skip("}")) {
|
||||
StringRef Tok = peek();
|
||||
if (Tok == ".") {
|
||||
readLocationCounterValue();
|
||||
readAssignment();
|
||||
continue;
|
||||
}
|
||||
next();
|
||||
if (Tok == "PROVIDE")
|
||||
if (Tok == "PROVIDE") {
|
||||
readProvide(false);
|
||||
else if (Tok == "PROVIDE_HIDDEN")
|
||||
} else if (Tok == "PROVIDE_HIDDEN") {
|
||||
readProvide(true);
|
||||
else if (peek() == "=")
|
||||
} else if (peek() == "=") {
|
||||
readSymbolAssignment(Tok);
|
||||
else
|
||||
expect(";");
|
||||
} else {
|
||||
readOutputSectionDescription(Tok);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ScriptParser::readLocationCounterValue() {
|
||||
static int precedence(StringRef Op) {
|
||||
return StringSwitch<int>(Op)
|
||||
.Case("*", 4)
|
||||
.Case("/", 4)
|
||||
.Case("+", 3)
|
||||
.Case("-", 3)
|
||||
.Case("<", 2)
|
||||
.Case(">", 2)
|
||||
.Case(">=", 2)
|
||||
.Case("<=", 2)
|
||||
.Case("==", 2)
|
||||
.Case("!=", 2)
|
||||
.Case("&", 1)
|
||||
.Default(-1);
|
||||
}
|
||||
|
||||
void ScriptParser::readAssignment() {
|
||||
expect(".");
|
||||
expect("=");
|
||||
std::vector<StringRef> Expr = readSectionsCommandExpr();
|
||||
if (Expr.empty())
|
||||
error("error in location counter expression");
|
||||
else
|
||||
Opt.Commands.push_back(llvm::make_unique<SymbolAssignment>(".", Expr));
|
||||
Expr E = readExpr();
|
||||
expect(";");
|
||||
Opt.Commands.push_back(llvm::make_unique<SymbolAssignment>(".", E));
|
||||
}
|
||||
|
||||
void ScriptParser::readOutputSectionDescription(StringRef OutSec) {
|
||||
|
@ -881,35 +720,142 @@ void ScriptParser::readProvide(bool Hidden) {
|
|||
|
||||
SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef Name) {
|
||||
expect("=");
|
||||
std::vector<StringRef> Expr = readSectionsCommandExpr();
|
||||
if (Expr.empty()) {
|
||||
error("error in symbol assignment expression");
|
||||
} else {
|
||||
Opt.Commands.push_back(llvm::make_unique<SymbolAssignment>(Name, Expr));
|
||||
Expr E = readExpr();
|
||||
Opt.Commands.push_back(llvm::make_unique<SymbolAssignment>(Name, E));
|
||||
return static_cast<SymbolAssignment *>(Opt.Commands.back().get());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// This function reads balanced expression until semicolon is seen.
|
||||
std::vector<StringRef> ScriptParser::readSectionsCommandExpr() {
|
||||
int Braces = 0;
|
||||
std::vector<StringRef> Expr;
|
||||
while (!Error) {
|
||||
StringRef Tok = peek();
|
||||
// This is an operator-precedence parser to parse a linker
|
||||
// script expression.
|
||||
Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); }
|
||||
|
||||
if (Tok == "(")
|
||||
Braces++;
|
||||
else if (Tok == ")")
|
||||
if (--Braces < 0)
|
||||
// This is a part of the operator-precedence parser. This function
|
||||
// assumes that the remaining token stream starts with an operator.
|
||||
Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) {
|
||||
while (!atEOF() && !Error) {
|
||||
// Read an operator and an expression.
|
||||
StringRef Op1 = peek();
|
||||
if (Op1 == "?")
|
||||
return readTernary(Lhs);
|
||||
if (precedence(Op1) < MinPrec)
|
||||
break;
|
||||
|
||||
next();
|
||||
if (Tok == ";")
|
||||
Expr Rhs = readPrimary();
|
||||
|
||||
// Evaluate the remaining part of the expression first if the
|
||||
// next operator has greater precedence than the previous one.
|
||||
// For example, if we have read "+" and "3", and if the next
|
||||
// operator is "*", then we'll evaluate 3 * ... part first.
|
||||
while (!atEOF()) {
|
||||
StringRef Op2 = peek();
|
||||
if (precedence(Op2) <= precedence(Op1))
|
||||
break;
|
||||
Expr.push_back(Tok);
|
||||
Rhs = readExpr1(Rhs, precedence(Op2));
|
||||
}
|
||||
return Expr;
|
||||
|
||||
Lhs = combine(Op1, Lhs, Rhs);
|
||||
}
|
||||
return Lhs;
|
||||
}
|
||||
|
||||
uint64_t static getConstant(StringRef S) {
|
||||
if (S == "COMMONPAGESIZE" || S == "MAXPAGESIZE")
|
||||
return Target->PageSize;
|
||||
error("unknown constant: " + S);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Expr ScriptParser::readPrimary() {
|
||||
StringRef Tok = next();
|
||||
|
||||
if (Tok == ".")
|
||||
return [](uint64_t Dot) { return Dot; };
|
||||
|
||||
if (Tok == "(") {
|
||||
Expr E = readExpr();
|
||||
expect(")");
|
||||
return E;
|
||||
}
|
||||
|
||||
// Built-in functions are parsed here.
|
||||
// https://sourceware.org/binutils/docs/ld/Builtin-Functions.html.
|
||||
if (Tok == "ALIGN") {
|
||||
expect("(");
|
||||
Expr E = readExpr();
|
||||
expect(")");
|
||||
return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); };
|
||||
}
|
||||
if (Tok == "CONSTANT") {
|
||||
expect("(");
|
||||
StringRef Tok = next();
|
||||
expect(")");
|
||||
return [=](uint64_t Dot) { return getConstant(Tok); };
|
||||
}
|
||||
if (Tok == "DATA_SEGMENT_ALIGN") {
|
||||
expect("(");
|
||||
Expr E = readExpr();
|
||||
expect(",");
|
||||
readExpr();
|
||||
expect(")");
|
||||
return [=](uint64_t Dot) -> uint64_t {
|
||||
uint64_t Val = E(Dot);
|
||||
return alignTo(Dot, Val) + (Dot & (Val - 1));
|
||||
};
|
||||
}
|
||||
if (Tok == "DATA_SEGMENT_END") {
|
||||
expect("(");
|
||||
expect(".");
|
||||
expect(")");
|
||||
return [](uint64_t Dot) { return Dot; };
|
||||
}
|
||||
|
||||
// Parse a number literal
|
||||
uint64_t V = 0;
|
||||
if (Tok.getAsInteger(0, V))
|
||||
setError("malformed number: " + Tok);
|
||||
return [=](uint64_t Dot) { return V; };
|
||||
}
|
||||
|
||||
Expr ScriptParser::readTernary(Expr Cond) {
|
||||
next();
|
||||
Expr L = readExpr();
|
||||
expect(":");
|
||||
Expr R = readExpr();
|
||||
return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); };
|
||||
}
|
||||
|
||||
Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) {
|
||||
if (Op == "*")
|
||||
return [=](uint64_t Dot) { return L(Dot) * R(Dot); };
|
||||
if (Op == "/") {
|
||||
return [=](uint64_t Dot) -> uint64_t {
|
||||
uint64_t RHS = R(Dot);
|
||||
if (RHS == 0) {
|
||||
error("division by zero");
|
||||
return 0;
|
||||
}
|
||||
return L(Dot) / RHS;
|
||||
};
|
||||
}
|
||||
if (Op == "+")
|
||||
return [=](uint64_t Dot) { return L(Dot) + R(Dot); };
|
||||
if (Op == "-")
|
||||
return [=](uint64_t Dot) { return L(Dot) - R(Dot); };
|
||||
if (Op == "<")
|
||||
return [=](uint64_t Dot) { return L(Dot) < R(Dot); };
|
||||
if (Op == ">")
|
||||
return [=](uint64_t Dot) { return L(Dot) > R(Dot); };
|
||||
if (Op == ">=")
|
||||
return [=](uint64_t Dot) { return L(Dot) >= R(Dot); };
|
||||
if (Op == "<=")
|
||||
return [=](uint64_t Dot) { return L(Dot) <= R(Dot); };
|
||||
if (Op == "==")
|
||||
return [=](uint64_t Dot) { return L(Dot) == R(Dot); };
|
||||
if (Op == "!=")
|
||||
return [=](uint64_t Dot) { return L(Dot) != R(Dot); };
|
||||
if (Op == "&")
|
||||
return [=](uint64_t Dot) { return L(Dot) & R(Dot); };
|
||||
llvm_unreachable("invalid operator");
|
||||
}
|
||||
|
||||
std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() {
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/Support/Allocator.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include <functional>
|
||||
|
||||
namespace lld {
|
||||
namespace elf {
|
||||
|
@ -23,6 +24,8 @@ template <class ELFT> class InputSectionBase;
|
|||
template <class ELFT> class OutputSectionBase;
|
||||
template <class ELFT> class OutputSectionFactory;
|
||||
|
||||
typedef std::function<uint64_t(uint64_t)> Expr;
|
||||
|
||||
// Parses a linker script. Calling this function updates
|
||||
// Config and ScriptConfig.
|
||||
void readLinkerScript(MemoryBufferRef MB);
|
||||
|
@ -46,11 +49,11 @@ struct BaseCommand {
|
|||
};
|
||||
|
||||
struct SymbolAssignment : BaseCommand {
|
||||
SymbolAssignment(StringRef Name, std::vector<StringRef> &Expr)
|
||||
: BaseCommand(AssignmentKind), Name(Name), Expr(std::move(Expr)) {}
|
||||
SymbolAssignment(StringRef Name, Expr E)
|
||||
: BaseCommand(AssignmentKind), Name(Name), Expression(E) {}
|
||||
static bool classof(const BaseCommand *C);
|
||||
StringRef Name;
|
||||
std::vector<StringRef> Expr;
|
||||
Expr Expression;
|
||||
bool Provide = false;
|
||||
// Hidden and Ignore can be true, only if Provide is true
|
||||
bool Hidden = false;
|
||||
|
|
|
@ -311,7 +311,7 @@
|
|||
# RUN: }" > %t.script
|
||||
# RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \
|
||||
# RUN: FileCheck --check-prefix=BRACKETERR %s
|
||||
# BRACKETERR: unexpected EOF
|
||||
# BRACKETERR: ) expected, but got ;
|
||||
|
||||
## Missing opening bracket.
|
||||
# RUN: echo "SECTIONS { \
|
||||
|
@ -319,7 +319,7 @@
|
|||
# RUN: }" > %t.script
|
||||
# RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \
|
||||
# RUN: FileCheck --check-prefix=BRACKETERR2 %s
|
||||
# BRACKETERR2: expected, but got *
|
||||
# BRACKETERR2: ; expected, but got )
|
||||
|
||||
## Empty expression.
|
||||
# RUN: echo "SECTIONS { \
|
||||
|
@ -327,7 +327,7 @@
|
|||
# RUN: }" > %t.script
|
||||
# RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \
|
||||
# RUN: FileCheck --check-prefix=ERREXPR %s
|
||||
# ERREXPR: error in location counter expression
|
||||
# ERREXPR: malformed number: ;
|
||||
|
||||
## Div by zero error.
|
||||
# RUN: echo "SECTIONS { \
|
||||
|
@ -343,7 +343,7 @@
|
|||
# RUN: }" > %t.script
|
||||
# RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \
|
||||
# RUN: FileCheck --check-prefix=TERNERR %s
|
||||
# TERNERR: unexpected EOF
|
||||
# TERNERR: : expected, but got ;
|
||||
|
||||
.globl _start
|
||||
_start:
|
||||
|
|
Loading…
Reference in New Issue