LiteralSupport: Don't assert() on invalid input

When using clangd, it's possible to trigger assertions in NumericLiteralParser and CharLiteralParser when switching git branches. This commit removes the initial asserts on invalid input and replaces those asserts with the error handling mechanism from those respective classes instead. This allows clangd to gracefully recover without crashing. See https://github.com/clangd/clangd/issues/888 for more information on the clangd crashes.
2021-11-16 15:46:34 +00:00 · 2021-11-16 15:46:34 +00:00 · 5a6dac66db
parent 1a84d1c81e
commit 5a6dac66db
2 changed files with 19 additions and 8 deletions
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@ -269,7 +269,9 @@ def err_bad_character_encoding : Error<
 def warn_bad_character_encoding : ExtWarn<
  "illegal character encoding in character literal">,
  InGroup<InvalidSourceEncoding>;
-def err_lexing_string : Error<"failure when lexing a string">;
+def err_lexing_string : Error<"failure when lexing a string literal">;
+def err_lexing_char : Error<"failure when lexing a character literal">;
+def err_lexing_numeric : Error<"failure when lexing a numeric literal">;
 def err_placeholder_in_source : Error<"editor placeholder in source file">;

 //===----------------------------------------------------------------------===//
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@ -693,12 +693,6 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
    : SM(SM), LangOpts(LangOpts), Diags(Diags),
      ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {

-  // This routine assumes that the range begin/end matches the regex for integer
-  // and FP constants (specifically, the 'pp-number' regex), and assumes that
-  // the byte at "*end" is both valid and not part of the regex.  Because of
-  // this, it doesn't have to check for 'overscan' in various places.
-  assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");
-
  s = DigitsBegin = ThisTokBegin;
  saw_exponent = false;
  saw_period = false;
@ -718,6 +712,16 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
  isAccum = false;
  hadError = false;

+  // This routine assumes that the range begin/end matches the regex for integer
+  // and FP constants (specifically, the 'pp-number' regex), and assumes that
+  // the byte at "*end" is both valid and not part of the regex.  Because of
+  // this, it doesn't have to check for 'overscan' in various places.
+  if (isPreprocessingNumberBody(*ThisTokEnd)) {
+    Diags.Report(TokLoc, diag::err_lexing_numeric);
+    hadError = true;
+    return;
+  }
+
  if (*s == '0') { // parse radix
    ParseNumberStartingWithZero(TokLoc);
    if (hadError)
@ -1432,7 +1436,12 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
    ++begin;

  // Skip over the entry quote.
-  assert(begin[0] == '\'' && "Invalid token lexed");
+  if (begin[0] != '\'') {
+    PP.Diag(Loc, diag::err_lexing_char);
+    HadError = true;
+    return;
+  }
+
  ++begin;

  // Remove an optional ud-suffix.