Implement enough of a lexer and parser for MLIR to parse extfunc's without

arguments.

PiperOrigin-RevId: 201706570
This commit is contained in:
Chris Lattner 2018-06-22 10:39:19 -07:00 committed by jpienaar
parent 5fc587ecf8
commit 9b9f7ff5d4
9 changed files with 603 additions and 19 deletions

View File

@ -0,0 +1,38 @@
//===- Parser.h - MLIR Parser Library Interface -----------------*- C++ -*-===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file is contains the interface to the MLIR parser library.
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_PARSER_H
#define MLIR_PARSER_H
namespace llvm {
class SourceMgr;
}
namespace mlir {
class Module;
/// This parses the file specified by the indicated SourceMgr and returns an
/// MLIR module if it was valid. If not, it emits diagnostics and returns null.
Module *parseSourceFile(llvm::SourceMgr &sourceMgr);
} // end namespace mlir
#endif // MLIR_PARSER_H

137
mlir/lib/Parser/Lexer.cpp Normal file
View File

@ -0,0 +1,137 @@
//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements the lexer for the MLIR textual form.
//
//===----------------------------------------------------------------------===//
#include "Lexer.h"
#include "llvm/Support/SourceMgr.h"
using namespace mlir;
using llvm::SMLoc;
using llvm::SourceMgr;
Lexer::Lexer(llvm::SourceMgr &sourceMgr) : sourceMgr(sourceMgr) {
auto bufferID = sourceMgr.getMainFileID();
curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
curPtr = curBuffer.begin();
}
/// emitError - Emit an error message and return an Token::error token.
Token Lexer::emitError(const char *loc, const Twine &message) {
// TODO(clattner): If/when we want to implement a -verify mode, this will need
// to package up errors into SMDiagnostic and report them.
sourceMgr.PrintMessage(SMLoc::getFromPointer(loc), SourceMgr::DK_Error,
message);
return formToken(Token::error, loc);
}
Token Lexer::lexToken() {
const char *tokStart = curPtr;
switch (*curPtr++) {
default:
// Handle bare identifiers.
if (isalpha(curPtr[-1]))
return lexBareIdentifierOrKeyword(tokStart);
// Unknown character, emit an error.
return emitError(tokStart, "unexpected character");
case 0:
// This may either be a nul character in the source file or may be the EOF
// marker that llvm::MemoryBuffer guarantees will be there.
if (curPtr-1 == curBuffer.end())
return formToken(Token::eof, tokStart);
LLVM_FALLTHROUGH;
case ' ':
case '\t':
case '\n':
case '\r':
// Ignore whitespace.
return lexToken();
case '(': return formToken(Token::l_paren, tokStart);
case ')': return formToken(Token::r_paren, tokStart);
case '<': return formToken(Token::less, tokStart);
case '>': return formToken(Token::greater, tokStart);
case ';': return lexComment();
case '@': return lexAtIdentifier(tokStart);
}
}
/// Lex a comment line, starting with a semicolon.
///
/// TODO: add a regex for comments here and to the spec.
///
Token Lexer::lexComment() {
while (true) {
switch (*curPtr++) {
case '\n':
case '\r':
// Newline is end of comment.
return lexToken();
case 0:
// If this is the end of the buffer, end the comment.
if (curPtr-1 == curBuffer.end()) {
--curPtr;
return lexToken();
}
LLVM_FALLTHROUGH;
default:
// Skip over other characters.
break;
}
}
}
/// Lex a bare identifier or keyword that starts with a letter.
///
/// bare-id ::= letter (letter|digit)*
///
Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
// Match the rest of the identifier regex: [0-9a-zA-Z]*
while (isalpha(*curPtr) || isdigit(*curPtr))
++curPtr;
// Check to see if this identifier is a keyword.
StringRef spelling(tokStart, curPtr-tokStart);
Token::TokenKind kind = llvm::StringSwitch<Token::TokenKind>(spelling)
.Case("cfgfunc", Token::kw_cfgfunc)
.Case("extfunc", Token::kw_extfunc)
.Case("mlfunc", Token::kw_mlfunc)
.Default(Token::bare_identifier);
return Token(kind, spelling);
}
/// Lex an '@foo' identifier.
///
/// function-id ::= `@` bare-id
///
Token Lexer::lexAtIdentifier(const char *tokStart) {
// These always start with a letter.
if (!isalpha(*curPtr++))
return emitError(curPtr-1, "expected letter in @ identifier");
while (isalpha(*curPtr) || isdigit(*curPtr))
++curPtr;
return formToken(Token::at_identifier, tokStart);
}

65
mlir/lib/Parser/Lexer.h Normal file
View File

@ -0,0 +1,65 @@
//===- Lexer.h - MLIR Lexer Interface ---------------------------*- C++ -*-===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file declares the MLIR Lexer class.
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_LIB_PARSER_LEXER_H
#define MLIR_LIB_PARSER_LEXER_H
#include "Token.h"
namespace llvm {
class SourceMgr;
}
namespace mlir {
/// This class breaks up the current file into a token stream.
class Lexer {
llvm::SourceMgr &sourceMgr;
StringRef curBuffer;
const char *curPtr;
Lexer(const Lexer&) = delete;
void operator=(const Lexer&) = delete;
public:
explicit Lexer(llvm::SourceMgr &sourceMgr);
llvm::SourceMgr &getSourceMgr() { return sourceMgr; }
Token lexToken();
private:
// Helpers.
Token formToken(Token::TokenKind kind, const char *tokStart) {
return Token(kind, StringRef(tokStart, curPtr-tokStart));
}
Token emitError(const char *loc, const Twine &message);
// Lexer implementation methods.
Token lexComment();
Token lexBareIdentifierOrKeyword(const char *tokStart);
Token lexAtIdentifier(const char *tokStart);
};
} // end namespace mlir
#endif // MLIR_LIB_PARSER_LEXER_H

186
mlir/lib/Parser/Parser.cpp Normal file
View File

@ -0,0 +1,186 @@
//===- Parser.cpp - MLIR Parser Implementation ----------------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements the parser for the MLIR textual form.
//
//===----------------------------------------------------------------------===//
#include "mlir/Parser.h"
#include "Lexer.h"
#include "mlir/IR/Module.h"
#include "llvm/Support/SourceMgr.h"
using namespace mlir;
using llvm::SourceMgr;
namespace {
/// Simple enum to make code read better. Failure is "true" in a boolean
/// context.
enum ParseResult {
ParseSuccess,
ParseFailure
};
/// Main parser implementation.
class Parser {
public:
Parser(llvm::SourceMgr &sourceMgr) : lex(sourceMgr), curToken(lex.lexToken()){
module.reset(new Module());
}
Module *parseModule();
private:
// State.
Lexer lex;
// This is the next token that hasn't been consumed yet.
Token curToken;
// This is the result module we are parsing into.
std::unique_ptr<Module> module;
private:
// Helper methods.
/// Emit an error and return failure.
ParseResult emitError(const Twine &message);
/// Advance the current lexer onto the next token.
void consumeToken() {
assert(curToken.isNot(Token::eof, Token::error) &&
"shouldn't advance past EOF or errors");
curToken = lex.lexToken();
}
/// Advance the current lexer onto the next token, asserting what the expected
/// current token is. This is preferred to the above method because it leads
/// to more self-documenting code with better checking.
void consumeToken(Token::TokenKind kind) {
assert(curToken.is(kind) && "consumed an unexpected token");
consumeToken();
}
// Type parsing.
// Top level entity parsing.
ParseResult parseFunctionSignature(StringRef &name);
ParseResult parseExtFunc();
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Helper methods.
//===----------------------------------------------------------------------===//
ParseResult Parser::emitError(const Twine &message) {
// TODO(clattner): If/when we want to implement a -verify mode, this will need
// to package up errors into SMDiagnostic and report them.
lex.getSourceMgr().PrintMessage(curToken.getLoc(), SourceMgr::DK_Error,
message);
return ParseFailure;
}
//===----------------------------------------------------------------------===//
// Type Parsing
//===----------------------------------------------------------------------===//
// ... TODO
//===----------------------------------------------------------------------===//
// Top-level entity parsing.
//===----------------------------------------------------------------------===//
/// Parse a function signature, starting with a name and including the parameter
/// list.
///
/// argument-list ::= type (`,` type)* | /*empty*/
/// function-signature ::= function-id `(` argument-list `)` (`->` type-list)?
///
ParseResult Parser::parseFunctionSignature(StringRef &name) {
if (curToken.isNot(Token::at_identifier))
return emitError("expected a function identifier like '@foo'");
name = curToken.getSpelling().drop_front();
consumeToken(Token::at_identifier);
if (curToken.isNot(Token::l_paren))
return emitError("expected '(' in function signature");
consumeToken(Token::l_paren);
// TODO: This should actually parse the full grammar here.
if (curToken.isNot(Token::r_paren))
return emitError("expected ')' in function signature");
consumeToken(Token::r_paren);
return ParseSuccess;
}
/// External function declarations.
///
/// ext-func ::= `extfunc` function-signature
///
ParseResult Parser::parseExtFunc() {
consumeToken(Token::kw_extfunc);
StringRef name;
if (parseFunctionSignature(name))
return ParseFailure;
// Okay, the external function definition was parsed correctly.
module->functionList.push_back(new Function(name));
return ParseSuccess;
}
/// This is the top-level module parser.
Module *Parser::parseModule() {
while (1) {
switch (curToken.getKind()) {
default:
emitError("expected a top level entity");
return nullptr;
// If we got to the end of the file, then we're done.
case Token::eof:
return module.release();
// If we got an error token, then the lexer already emitted an error, just
// stop. Someday we could introduce error recovery if there was demand for
// it.
case Token::error:
return nullptr;
case Token::kw_extfunc:
if (parseExtFunc())
return nullptr;
break;
// TODO: cfgfunc, mlfunc, affine entity declarations, etc.
}
}
}
//===----------------------------------------------------------------------===//
/// This parses the file specified by the indicated SourceMgr and returns an
/// MLIR module if it was valid. If not, it emits diagnostics and returns null.
Module *mlir::parseSourceFile(llvm::SourceMgr &sourceMgr) {
return Parser(sourceMgr).parseModule();
}

37
mlir/lib/Parser/Token.cpp Normal file
View File

@ -0,0 +1,37 @@
//===- Token.cpp - MLIR Token Implementation ------------------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements the Token class for the MLIR textual form.
//
//===----------------------------------------------------------------------===//
#include "Token.h"
using namespace mlir;
using llvm::SMLoc;
using llvm::SMRange;
SMLoc Token::getLoc() const {
return SMLoc::getFromPointer(spelling.data());
}
SMLoc Token::getEndLoc() const {
return SMLoc::getFromPointer(spelling.data() + spelling.size());
}
SMRange Token::getLocRange() const {
return SMRange(getLoc(), getEndLoc());
}

98
mlir/lib/Parser/Token.h Normal file
View File

@ -0,0 +1,98 @@
//===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
#ifndef MLIR_LIB_PARSER_TOKEN_H
#define MLIR_LIB_PARSER_TOKEN_H
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/SMLoc.h"
namespace mlir {
/// This represents a token in the MLIR syntax.
class Token {
public:
enum TokenKind {
// Markers
eof, error,
// Identifiers.
bare_identifier, // foo
at_identifier, // @foo
// TODO: @@foo, etc.
// Punctuation.
l_paren, r_paren, // ( )
less, greater, // < >
// TODO: More punctuation.
// Keywords.
kw_cfgfunc,
kw_extfunc,
kw_mlfunc,
// TODO: More keywords.
};
Token(TokenKind kind, StringRef spelling)
: kind(kind), spelling(spelling) {}
// Return the bytes that make up this token.
StringRef getSpelling() const { return spelling; }
// Token classification.
TokenKind getKind() const { return kind; }
bool is(TokenKind K) const { return kind == K; }
bool isAny(TokenKind k1, TokenKind k2) const {
return is(k1) || is(k2);
}
/// Return true if this token is one of the specified kinds.
template <typename ...T>
bool isAny(TokenKind k1, TokenKind k2, TokenKind k3, T... others) const {
if (is(k1))
return true;
return isAny(k2, k3, others...);
}
bool isNot(TokenKind k) const { return kind != k; }
/// Return true if this token isn't one of the specified kinds.
template <typename ...T>
bool isNot(TokenKind k1, TokenKind k2, T... others) const {
return !isAny(k1, k2, others...);
}
/// Location processing.
llvm::SMLoc getLoc() const;
llvm::SMLoc getEndLoc() const;
llvm::SMRange getLocRange() const;
private:
/// Discriminator that indicates the sort of token this is.
TokenKind kind;
/// A reference to the entire token contents; this is always a pointer into
/// a memory buffer owned by the source manager.
StringRef spelling;
};
} // end namespace mlir
#endif // MLIR_LIB_PARSER_TOKEN_H

View File

@ -1,15 +1,7 @@
// TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN
// statements (perhaps through using lit config substitutions).
//
// RUN: %S/../../mlir-opt --help | FileCheck --check-prefix=CHECKHELP %s
// RUN: %S/../../mlir-opt %s -o - | FileCheck %s
//
// CHECKHELP: OVERVIEW: MLIR modular optimizer driver
; TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN
; statements (perhaps through using lit config substitutions).
;
; RUN: %S/../../mlir-opt --help | FileCheck %s
;
; CHECK: OVERVIEW: MLIR modular optimizer driver
// Right now the input is completely ignored.
extfunc @foo()
extfunc @bar()
// CHECK: extfunc @foo()
// CHECK: extfunc @bar()

15
mlir/test/IR/parser.mlir Normal file
View File

@ -0,0 +1,15 @@
; TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN
; statements (perhaps through using lit config substitutions).
;
; RUN: %S/../../mlir-opt %s -o - | FileCheck %s
; CHECK: extfunc @foo()
extfunc @foo()
; CHECK: extfunc @bar()
extfunc @bar()
; CHECK: extfunc @baz()
extfunc @baz()

View File

@ -22,7 +22,9 @@
//===----------------------------------------------------------------------===//
#include "mlir/IR/Module.h"
#include "mlir/Parser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/ToolOutputFile.h"
@ -56,13 +58,27 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "MLIR modular optimizer driver\n");
// Instantiate an IR object.
Module m;
m.functionList.push_back(new Function("foo"));
m.functionList.push_back(new Function("bar"));
// Set up the input file.
auto fileOrErr = MemoryBuffer::getFileOrSTDIN(inputFilename);
if (std::error_code error = fileOrErr.getError()) {
llvm::errs() << argv[0] << ": could not open input file '" << inputFilename
<< "': " << error.message() << "\n";
return 1;
}
// Tell sourceMgr about this buffer, which is what the parser will pick up.
SourceMgr sourceMgr;
sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), SMLoc());
// Parse the input file and emit any errors.
std::unique_ptr<Module> module(parseSourceFile(sourceMgr));
if (!module) return 1;
// Print the output.
auto output = getOutputStream();
m.print(output->os());
module->print(output->os());
output->keep();
// Success.
return 0;
}