From ab60afb234704165a622b57e9ba0476d8d533074 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Tue, 14 Aug 2018 01:16:45 -0700 Subject: [PATCH] [mlir] Allow C-style escapes in Lexer This patch passes the raw, unescaped value through to the rest of the stack. Partial escaping is a total pain to deal with, so we either need to implement escaping properly (ideally using a third party library like absl, I don't think LLVM has one that can handle the proper gamut of escape codes) or don't escape. I chose the latter for this patch. PiperOrigin-RevId: 208608945 --- mlir/lib/Parser/Lexer.cpp | 6 ++++++ mlir/lib/Parser/Token.cpp | 5 ++--- mlir/test/IR/parser.mlir | 8 ++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp index 91fa8ad666af..885608a02737 100644 --- a/mlir/lib/Parser/Lexer.cpp +++ b/mlir/lib/Parser/Lexer.cpp @@ -323,6 +323,12 @@ Token Lexer::lexString(const char *tokStart) { case '\v': case '\f': return emitError(curPtr-1, "expected '\"' in string literal"); + case '\\': + // Handle explicitly \" -> ". + // TODO(someone): define more escaping rules. + if (*curPtr == '"') + ++curPtr; + continue; default: continue; diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp index 9fc2b2f40d18..2bce68153c27 100644 --- a/mlir/lib/Parser/Token.cpp +++ b/mlir/lib/Parser/Token.cpp @@ -81,9 +81,8 @@ Optional Token::getIntTypeBitwidth() const { /// Given a 'string' token, return its value, including removing the quote /// characters and unescaping the contents of the string. std::string Token::getStringValue() const { - // TODO: Handle escaping. - - // Just drop the quotes off for now. + // Start by dropping the quotes. + // TODO: Un-escape the string here instead of passing through the raw content. return getSpelling().drop_front().drop_back().str(); } diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index 30c6d457e56d..175127a5817b 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -315,3 +315,11 @@ bb0: "foo"(){bar: tensor} : () -> () return } + +// CHECK-LABEL: cfgfunc @stringquote +cfgfunc @stringquote() -> () { +bb0: +// CHECK: "foo"() {bar: "a\"quoted\"string"} : () -> () + "foo"(){bar: "a\"quoted\"string"} : () -> () + return +}