[MLIR Parser] Improve QoI for "expected token" errors

A typical problem with missing a token is that the missing token is at the end of a line. The problem with this is that the error message gets reported on the start of the following line (which is where the next / invalid token is) which can be confusing. Handle this by noticing this case and backing up to the end of the previous line. Differential Revision: https://reviews.llvm.org/D125295
2022-05-10 10:22:25 +01:00 · 2022-05-10 10:22:25 +01:00 · ad3b358180
parent 0c1000cbd6
commit ad3b358180
11 changed files with 109 additions and 41 deletions
--- a/mlir/lib/Parser/Parser.cpp
+++ b/mlir/lib/Parser/Parser.cpp
@ -134,7 +134,7 @@ Parser::parseCommaSeparatedListUntil(Token::Kind rightToken,
  // Handle the empty case.
  if (getToken().is(rightToken)) {
    if (!allowEmptyList)
-      return emitError("expected list element");
+      return emitWrongTokenError("expected list element");
    consumeToken(rightToken);
    return success();
  }
@ -147,6 +147,15 @@ Parser::parseCommaSeparatedListUntil(Token::Kind rightToken,
  return success();
 }

+InFlightDiagnostic Parser::emitError(const Twine &message) {
+  auto loc = state.curToken.getLoc();
+  if (state.curToken.isNot(Token::eof))
+    return emitError(loc, message);
+
+  // If the error is to be emitted at EOF, move it back one character.
+  return emitError(SMLoc::getFromPointer(loc.getPointer() - 1), message);
+}
+
 InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
  auto diag = mlir::emitError(getEncodedSourceLocation(loc), message);

@ -157,13 +166,55 @@ InFlightDiagnostic Parser::emitError(SMLoc loc, const Twine &message) {
  return diag;
 }

+/// Emit an error about a "wrong token".  If the current token is at the
+/// start of a source line, this will apply heuristics to back up and report
+/// the error at the end of the previous line, which is where the expected
+/// token is supposed to be.
+InFlightDiagnostic Parser::emitWrongTokenError(const Twine &message) {
+  auto loc = state.curToken.getLoc();
+
+  // If the error is to be emitted at EOF, move it back one character.
+  if (state.curToken.is(Token::eof))
+    loc = SMLoc::getFromPointer(loc.getPointer() - 1);
+
+  // Determine if the token is at the start of the current line.
+  const char *bufferStart = state.lex.getBufferBegin();
+  const char *curPtr = loc.getPointer();
+
+  // Back up over entirely blank lines.
+  while (1) {
+    // Back up until we see a \n, but don't look past the buffer start.
+    curPtr = StringRef(bufferStart, curPtr - bufferStart).rtrim(" \t").end();
+
+    // For tokens with no preceding source line, just emit at the original
+    // location.
+    if (curPtr == bufferStart || curPtr[-1] != '\n')
+      return emitError(loc, message);
+
+    // Check to see if the preceding line has a comment on it.  We assume that a
+    // `//` is the start of a comment, which is mostly correct.
+    // TODO: This will do the wrong thing for // in a string literal.
+    --curPtr;
+    auto prevLine = StringRef(bufferStart, curPtr - bufferStart);
+    size_t newLineIndex = prevLine.rfind('\n');
+    if (newLineIndex != StringRef::npos)
+      prevLine = prevLine.drop_front(newLineIndex);
+    size_t commentStart = prevLine.find("//");
+    if (commentStart != StringRef::npos)
+      curPtr = prevLine.begin() + commentStart;
+
+    // Otherwise, we can move backwards at least this line.
+    loc = SMLoc::getFromPointer(curPtr);
+  }
+}
+
 /// Consume the specified token if present and return success.  On failure,
 /// output a diagnostic and return failure.
 ParseResult Parser::parseToken(Token::Kind expectedToken,
                               const Twine &message) {
  if (consumeIf(expectedToken))
    return success();
-  return emitError(message);
+  return emitWrongTokenError(message);
 }

 /// Parse an optional integer value from the stream.
@ -872,23 +923,23 @@ ParseResult OperationParser::parseOperation() {
    // Parse the group of result ids.
    auto parseNextResult = [&]() -> ParseResult {
      // Parse the next result id.
-      if (!getToken().is(Token::percent_identifier))
-        return emitError("expected valid ssa identifier");
-
      Token nameTok = getToken();
-      consumeToken(Token::percent_identifier);
+      if (parseToken(Token::percent_identifier,
+                     "expected valid ssa identifier"))
+        return failure();

      // If the next token is a ':', we parse the expected result count.
      size_t expectedSubResults = 1;
      if (consumeIf(Token::colon)) {
        // Check that the next token is an integer.
        if (!getToken().is(Token::integer))
-          return emitError("expected integer number of results");
+          return emitWrongTokenError("expected integer number of results");

        // Check that number of results is > 0.
        auto val = getToken().getUInt64IntegerValue();
        if (!val.hasValue() || val.getValue() < 1)
-          return emitError("expected named operation to have atleast 1 result");
+          return emitError(
+              "expected named operation to have at least 1 result");
        consumeToken(Token::integer);
        expectedSubResults = *val;
      }
@ -912,7 +963,7 @@ ParseResult OperationParser::parseOperation() {
  else if (nameTok.is(Token::string))
    op = parseGenericOperation();
  else
-    return emitError("expected operation name in quotes");
+    return emitWrongTokenError("expected operation name in quotes");

  // If parsing of the basic operation failed, then this whole thing fails.
  if (!op)
@ -967,7 +1018,7 @@ ParseResult OperationParser::parseOperation() {
 ParseResult OperationParser::parseSuccessor(Block *&dest) {
  // Verify branch is identifier and get the matching block.
  if (!getToken().is(Token::caret_identifier))
-    return emitError("expected block name");
+    return emitWrongTokenError("expected block name");
  dest = getBlockNamed(getTokenSpelling(), getToken().getLoc());
  consumeToken();
  return success();
@ -1296,8 +1347,6 @@ public:
                               Delimiter delimiter = Delimiter::None,
                               bool allowResultNumber = true,
                               int requiredOperandCount = -1) override {
-    auto startLoc = parser.getToken().getLoc();
-
    // The no-delimiter case has some special handling for better diagnostics.
    if (delimiter == Delimiter::None) {
      // parseCommaSeparatedList doesn't handle the missing case for "none",
@ -1309,10 +1358,9 @@ public:
          return success();

        // Otherwise, try to produce a nice error message.
-        if (parser.getToken().is(Token::l_paren) ||
-            parser.getToken().is(Token::l_square))
-          return emitError(startLoc, "unexpected delimiter");
-        return emitError(startLoc, "invalid operand");
+        if (parser.getToken().isAny(Token::l_paren, Token::l_square))
+          return parser.emitError("unexpected delimiter");
+        return parser.emitWrongTokenError("expected operand");
      }
    }

@ -1320,6 +1368,7 @@ public:
      return parseOperand(result.emplace_back(), allowResultNumber);
    };

+    auto startLoc = parser.getToken().getLoc();
    if (parseCommaSeparatedList(delimiter, parseOneOperand, " in operand list"))
      return failure();

--- a/mlir/lib/Parser/Parser.h
+++ b/mlir/lib/Parser/Parser.h
@ -68,17 +68,15 @@ public:
  //===--------------------------------------------------------------------===//

  /// Emit an error and return failure.
-  InFlightDiagnostic emitError(const Twine &message = {}) {
-    // If the error is to be emitted at EOF, move it back one character.
-    if (state.curToken.is(Token::eof)) {
-      return emitError(
-          SMLoc::getFromPointer(state.curToken.getLoc().getPointer() - 1),
-          message);
-    }
-    return emitError(state.curToken.getLoc(), message);
-  }
+  InFlightDiagnostic emitError(const Twine &message = {});
  InFlightDiagnostic emitError(SMLoc loc, const Twine &message = {});

+  /// Emit an error about a "wrong token".  If the current token is at the
+  /// start of a source line, this will apply heuristics to back up and report
+  /// the error at the end of the previous line, which is where the expected
+  /// token is supposed to be.
+  InFlightDiagnostic emitWrongTokenError(const Twine &message = {});
+
  /// Encode the specified source location information into an attribute for
  /// attachment to the IR.
  Location getEncodedSourceLocation(SMLoc loc) {
--- a/mlir/test/Dialect/ControlFlow/invalid.mlir
+++ b/mlir/test/Dialect/ControlFlow/invalid.mlir
@ -38,8 +38,8 @@ func.func @switch_wrong_type_case_value(%flag : i32, %caseOperand : i32) {
 func.func @switch_missing_comma(%flag : i32, %caseOperand : i32) {
  cf.switch %flag : i32, [
    default: ^bb1(%caseOperand : i32),
-    45: ^bb2(%caseOperand : i32)
    // expected-error@+1 {{expected ']'}}
+    45: ^bb2(%caseOperand : i32)
    43: ^bb3(%caseOperand : i32)
  ]

--- a/mlir/test/Dialect/LLVMIR/global.mlir
+++ b/mlir/test/Dialect/LLVMIR/global.mlir
@ -144,7 +144,7 @@ llvm.mlir.global internal @more_than_one_type(0) : i64, i32
 llvm.mlir.global internal @foo(0: i32) : i32

 func.func @bar() {
-  // expected-error @+2{{expected ':'}}
+  // expected-error @+1{{expected ':'}}
  llvm.mlir.addressof @foo
 }

--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@ -60,7 +60,7 @@ func.func @index_dim_negative(%arg0: memref<f32>) {
 // -----

 func.func @generic_no_region(%arg0: memref<f32>) {
-  // expected-error @+5 {{expected '{' to begin a region}}
+  // expected-error @+4 {{expected '{' to begin a region}}
  linalg.generic {
    indexing_maps =  [ affine_map<() -> (0)> ],
    iterator_types = []
--- a/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir
@ -24,7 +24,7 @@ func.func @branch_argument() -> () {
 // -----

 func.func @missing_accessor() -> () {
-  // expected-error @+2 {{expected block name}}
+  // expected-error @+1 {{expected block name}}
  spv.Branch
 }

--- a/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir
@ -90,7 +90,7 @@ func.func @logicalBinary2(%arg0 : vector<4xi1>, %arg1 : vector<4xi1>)

 func.func @logicalBinary(%arg0 : i1, %arg1 : i1)
 {
-  // expected-error @+2 {{expected ':'}}
+  // expected-error @+1 {{expected ':'}}
  %0 = spv.LogicalAnd %arg0, %arg1
  return
 }
@ -139,7 +139,7 @@ func.func @logicalUnary2(%arg0 : vector<4xi1>)

 func.func @logicalUnary(%arg0 : i1)
 {
-  // expected-error @+2 {{expected ':'}}
+  // expected-error @+1 {{expected ':'}}
  %0 = spv.LogicalNot %arg0
  return
 }
@ -230,7 +230,7 @@ func.func @select_op_vec_condn_vec(%arg0: vector<3xi1>) -> () {
 func.func @select_op(%arg0: i1) -> () {
  %0 = spv.Constant 2 : i32
  %1 = spv.Constant 3 : i32
-  // expected-error @+2 {{expected ','}}
+  // expected-error @+1 {{expected ','}}
  %2 = spv.Select %arg0, %0, %1 : i1
  return
 }
--- a/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/memory-ops.mlir
@ -251,7 +251,7 @@ func.func @simple_load_missing_operand() -> () {

 func.func @simple_load_missing_rettype() -> () {
  %0 = spv.Variable : !spv.ptr<f32, Function>
-  // expected-error @+2 {{expected ':'}}
+  // expected-error @+1 {{expected ':'}}
  %1 = spv.Load "Function" %0
  return
 }
@ -396,7 +396,7 @@ func.func @simple_store_missing_ptr_type(%arg0 : f32) -> () {

 func.func @simple_store_missing_operand(%arg0 : f32) -> () {
  %0 = spv.Variable : !spv.ptr<f32, Function>
-  // expected-error @+1 {{custom op 'spv.Store' invalid operand}} : f32
+  // expected-error @+1 {{expected operand}}
  spv.Store  "Function" , %arg0 : f32
  return
 }
--- a/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/misc-ops.mlir
@ -23,7 +23,7 @@ func.func @undef() -> () {
 // -----

 func.func @undef() -> () {
-  // expected-error @+2{{expected ':'}}
+  // expected-error @+1{{expected ':'}}
  %0 = spv.Undef
  spv.Return
 }
--- a/mlir/test/IR/invalid-locations.mlir
+++ b/mlir/test/IR/invalid-locations.mlir
@ -11,7 +11,7 @@ func.func @location_missing_l_paren() {

 func.func @location_missing_r_paren() {
 ^bb:
-  return loc(unknown // expected-error@+1 {{expected ')' in location}}
+  return loc(unknown // expected-error {{expected ')' in location}}
 }

 // -----
@ -60,7 +60,7 @@ func.func @location_callsite_missing_caller() {

 func.func @location_callsite_missing_r_paren() {
 ^bb:
-  return loc(callsite( unknown at unknown  // expected-error@+1 {{expected ')' in callsite location}}
+  return loc(callsite( unknown at unknown  // expected-error {{expected ')' in callsite location}}
 }

 // -----
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@ -158,7 +158,7 @@ func.func @block_arg_no_type() {

 func.func @block_arg_no_close_paren() {
 ^bb42:
-  cf.br ^bb2( // expected-error@+1 {{expected ':'}}
+  cf.br ^bb2( // expected-error {{expected ':'}}
  return
 }

@ -245,7 +245,7 @@ func.func @malformed_for_to() {

 func.func @incomplete_for() {
  affine.for %i = 1 to 10 step 2
-}        // expected-error {{expected '{' to begin a region}}
+}        // expected-error @-1 {{expected '{' to begin a region}}

 // -----

@ -1099,7 +1099,7 @@ func.func @multi_result_missing_count() {
 // -----

 func.func @multi_result_zero_count() {
-  // expected-error@+1 {{expected named operation to have atleast 1 result}}
+  // expected-error@+1 {{expected named operation to have at least 1 result}}
  %0:0 = "foo" () : () -> (i32, i32)
  return
 }
@ -1651,6 +1651,27 @@ func.func @foo() {} // expected-error {{expected non-empty function body}}

 // -----

-// expected-error@+2 {{expected ']'}}
+// expected-error@+1 {{expected ']'}}
 "f"() { b = [@m:

+// -----
+
+// This makes sure we emit an error at the end of the correct line, the : is
+// expected at the end of foo, not on the return line.
+func.func @error_at_end_of_line() {
+  // expected-error@+1 {{expected ':' followed by operation type}}
+  %0 = "foo"() 
+  return
+}
+
+// -----
+
+// This makes sure we emit an error at the end of the correct line, the : is
+// expected at the end of foo, not on the return line.
+func.func @error_at_end_of_line() {
+  %0 = "foo"() 
+  // expected-error@-1 {{expected ':' followed by operation type}}
+
+  // This is a comment and so is the thing above.
+  return
+}