Rollup merge of #125117 - dev-ardi:improve-parser, r=wesleywiser,fmease

Improve parser Fixes #124935. - Add a few more help diagnostics to incorrect semicolons - Overall improved that function - Addded a few comments - Renamed diff_marker fns to git_diff_marker
2024-05-18 18:44:14 +02:00 · 2024-05-18 18:44:14 +02:00 · f9bf759e83
parent 685a80f7a0 f8433a82b4
commit f9bf759e83
9 changed files with 81 additions and 70 deletions
--- a/compiler/rustc_ast/src/ast.rs
+++ b/compiler/rustc_ast/src/ast.rs
@ -3262,6 +3262,7 @@ pub enum ItemKind {
 }

 impl ItemKind {
+    /// "a" or "an"
    pub fn article(&self) -> &'static str {
        use ItemKind::*;
        match self {
--- a/compiler/rustc_parse/src/errors.rs
+++ b/compiler/rustc_parse/src/errors.rs
@ -83,7 +83,7 @@ pub(crate) struct IncorrectSemicolon<'a> {
    #[suggestion(style = "short", code = "", applicability = "machine-applicable")]
    pub span: Span,
    #[help]
-    pub opt_help: Option<()>,
+    pub show_help: bool,
    pub name: &'a str,
 }

--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@ -241,7 +241,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
        // we have no way of tracking this in the lexer itself, so we piggyback on the parser
        let mut in_cond = false;
        while parser.token != token::Eof {
-            if let Err(diff_err) = parser.err_diff_marker() {
+            if let Err(diff_err) = parser.err_vcs_conflict_marker() {
                diff_errs.push(diff_err);
            } else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) {
                in_cond = true;
--- a/compiler/rustc_parse/src/parser/diagnostics.rs
+++ b/compiler/rustc_parse/src/parser/diagnostics.rs
@ -1817,34 +1817,31 @@ impl<'a> Parser<'a> {
        Ok(P(T::recovered(Some(P(QSelf { ty, path_span, position: 0 })), path)))
    }

-    pub fn maybe_consume_incorrect_semicolon(&mut self, items: &[P<Item>]) -> bool {
-        if self.token.kind == TokenKind::Semi {
-            self.bump();
+    /// This function gets called in places where a semicolon is NOT expected and if there's a
+    /// semicolon it emits the appropriate error and returns true.
+    pub fn maybe_consume_incorrect_semicolon(&mut self, previous_item: Option<&Item>) -> bool {
+        if self.token.kind != TokenKind::Semi {
+            return false;
+        }

-            let mut err =
-                IncorrectSemicolon { span: self.prev_token.span, opt_help: None, name: "" };
-
-            if !items.is_empty() {
-                let previous_item = &items[items.len() - 1];
-                let previous_item_kind_name = match previous_item.kind {
+        // Check previous item to add it to the diagnostic, for example to say
+        // `enum declarations are not followed by a semicolon`
+        let err = match previous_item {
+            Some(previous_item) => {
+                let name = match previous_item.kind {
                    // Say "braced struct" because tuple-structs and
                    // braceless-empty-struct declarations do take a semicolon.
-                    ItemKind::Struct(..) => Some("braced struct"),
-                    ItemKind::Enum(..) => Some("enum"),
-                    ItemKind::Trait(..) => Some("trait"),
-                    ItemKind::Union(..) => Some("union"),
-                    _ => None,
+                    ItemKind::Struct(..) => "braced struct",
+                    _ => previous_item.kind.descr(),
                };
-                if let Some(name) = previous_item_kind_name {
-                    err.opt_help = Some(());
-                    err.name = name;
-                }
+                IncorrectSemicolon { span: self.token.span, name, show_help: true }
            }
-            self.dcx().emit_err(err);
-            true
-        } else {
-            false
-        }
+            None => IncorrectSemicolon { span: self.token.span, name: "", show_help: false },
+        };
+        self.dcx().emit_err(err);
+
+        self.bump();
+        true
    }

    /// Creates a `Diag` for an unexpected token `t` and tries to recover if it is a
@ -2957,13 +2954,23 @@ impl<'a> Parser<'a> {
        err
    }

-    pub fn is_diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> bool {
+    /// This checks if this is a conflict marker, depending of the parameter passed.
+    ///
+    /// * `>>>>>`
+    /// * `=====`
+    /// * `<<<<<`
+    ///
+    pub fn is_vcs_conflict_marker(
+        &mut self,
+        long_kind: &TokenKind,
+        short_kind: &TokenKind,
+    ) -> bool {
        (0..3).all(|i| self.look_ahead(i, |tok| tok == long_kind))
            && self.look_ahead(3, |tok| tok == short_kind)
    }

-    fn diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> {
-        if self.is_diff_marker(long_kind, short_kind) {
+    fn conflict_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> {
+        if self.is_vcs_conflict_marker(long_kind, short_kind) {
            let lo = self.token.span;
            for _ in 0..4 {
                self.bump();
@ -2973,15 +2980,16 @@ impl<'a> Parser<'a> {
        None
    }

-    pub fn recover_diff_marker(&mut self) {
-        if let Err(err) = self.err_diff_marker() {
+    pub fn recover_vcs_conflict_marker(&mut self) {
+        if let Err(err) = self.err_vcs_conflict_marker() {
            err.emit();
            FatalError.raise();
        }
    }

-    pub fn err_diff_marker(&mut self) -> PResult<'a, ()> {
-        let Some(start) = self.diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) else {
+    pub fn err_vcs_conflict_marker(&mut self) -> PResult<'a, ()> {
+        let Some(start) = self.conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt)
+        else {
            return Ok(());
        };
        let mut spans = Vec::with_capacity(3);
@ -2993,13 +3001,15 @@ impl<'a> Parser<'a> {
            if self.token.kind == TokenKind::Eof {
                break;
            }
-            if let Some(span) = self.diff_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or)) {
+            if let Some(span) = self.conflict_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or))
+            {
                middlediff3 = Some(span);
            }
-            if let Some(span) = self.diff_marker(&TokenKind::EqEq, &TokenKind::Eq) {
+            if let Some(span) = self.conflict_marker(&TokenKind::EqEq, &TokenKind::Eq) {
                middle = Some(span);
            }
-            if let Some(span) = self.diff_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt) {
+            if let Some(span) = self.conflict_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt)
+            {
                spans.push(span);
                end = Some(span);
                break;
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@ -3734,7 +3734,7 @@ impl<'a> Parser<'a> {
    /// Parses `ident (COLON expr)?`.
    fn parse_expr_field(&mut self) -> PResult<'a, ExprField> {
        let attrs = self.parse_outer_attributes()?;
-        self.recover_diff_marker();
+        self.recover_vcs_conflict_marker();
        self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
            let lo = this.token.span;

--- a/compiler/rustc_parse/src/parser/item.rs
+++ b/compiler/rustc_parse/src/parser/item.rs
@ -49,6 +49,7 @@ impl<'a> Parser<'a> {
    }

    /// Parses the contents of a module (inner attributes followed by module items).
+    /// We exit once we hit `term`
    pub fn parse_mod(
        &mut self,
        term: &TokenKind,
@ -59,13 +60,13 @@ impl<'a> Parser<'a> {
        let post_attr_lo = self.token.span;
        let mut items = ThinVec::new();
        while let Some(item) = self.parse_item(ForceCollect::No)? {
+            self.maybe_consume_incorrect_semicolon(Some(&item));
            items.push(item);
-            self.maybe_consume_incorrect_semicolon(&items);
        }

        if !self.eat(term) {
            let token_str = super::token_descr(&self.token);
-            if !self.maybe_consume_incorrect_semicolon(&items) {
+            if !self.maybe_consume_incorrect_semicolon(items.last().map(|x| &**x)) {
                let msg = format!("expected item, found {token_str}");
                let mut err = self.dcx().struct_span_err(self.token.span, msg);
                let span = self.token.span;
@ -101,9 +102,9 @@ impl<'a> Parser<'a> {
        fn_parse_mode: FnParseMode,
        force_collect: ForceCollect,
    ) -> PResult<'a, Option<Item>> {
-        self.recover_diff_marker();
+        self.recover_vcs_conflict_marker();
        let attrs = self.parse_outer_attributes()?;
-        self.recover_diff_marker();
+        self.recover_vcs_conflict_marker();
        self.parse_item_common(attrs, true, false, fn_parse_mode, force_collect)
    }

@ -194,12 +195,12 @@ impl<'a> Parser<'a> {
        fn_parse_mode: FnParseMode,
        case: Case,
    ) -> PResult<'a, Option<ItemInfo>> {
-        let def_final = def == &Defaultness::Final;
+        let check_pub = def == &Defaultness::Final;
        let mut def_ = || mem::replace(def, Defaultness::Final);

        let info = if self.eat_keyword_case(kw::Use, case) {
            self.parse_use_item()?
-        } else if self.check_fn_front_matter(def_final, case) {
+        } else if self.check_fn_front_matter(check_pub, case) {
            // FUNCTION ITEM
            let (ident, sig, generics, body) =
                self.parse_fn(attrs, fn_parse_mode, lo, vis, case)?;
@ -310,7 +311,7 @@ impl<'a> Parser<'a> {
        Ok(Some(info))
    }

-    fn recover_import_as_use(&mut self) -> PResult<'a, Option<(Ident, ItemKind)>> {
+    fn recover_import_as_use(&mut self) -> PResult<'a, Option<ItemInfo>> {
        let span = self.token.span;
        let token_name = super::token_descr(&self.token);
        let snapshot = self.create_snapshot_for_diagnostic();
@ -328,7 +329,7 @@ impl<'a> Parser<'a> {
        }
    }

-    fn parse_use_item(&mut self) -> PResult<'a, (Ident, ItemKind)> {
+    fn parse_use_item(&mut self) -> PResult<'a, ItemInfo> {
        let tree = self.parse_use_tree()?;
        if let Err(mut e) = self.expect_semi() {
            match tree.kind {
@ -738,7 +739,7 @@ impl<'a> Parser<'a> {
            if self.recover_doc_comment_before_brace() {
                continue;
            }
-            self.recover_diff_marker();
+            self.recover_vcs_conflict_marker();
            match parse_item(self) {
                Ok(None) => {
                    let mut is_unnecessary_semicolon = !items.is_empty()
@ -1085,7 +1086,7 @@ impl<'a> Parser<'a> {
    /// ```
    fn parse_use_tree_list(&mut self) -> PResult<'a, ThinVec<(UseTree, ast::NodeId)>> {
        self.parse_delim_comma_seq(Delimiter::Brace, |p| {
-            p.recover_diff_marker();
+            p.recover_vcs_conflict_marker();
            Ok((p.parse_use_tree()?, DUMMY_NODE_ID))
        })
        .map(|(r, _)| r)
@ -1512,9 +1513,9 @@ impl<'a> Parser<'a> {
    }

    fn parse_enum_variant(&mut self, span: Span) -> PResult<'a, Option<Variant>> {
-        self.recover_diff_marker();
+        self.recover_vcs_conflict_marker();
        let variant_attrs = self.parse_outer_attributes()?;
-        self.recover_diff_marker();
+        self.recover_vcs_conflict_marker();
        let help = "enum variants can be `Variant`, `Variant = <integer>`, \
                    `Variant(Type, ..., TypeN)` or `Variant { fields: Types }`";
        self.collect_tokens_trailing_token(
@ -1703,6 +1704,10 @@ impl<'a> Parser<'a> {
        Ok((class_name, ItemKind::Union(vdata, generics)))
    }

+    /// This function parses the fields of record structs:
+    ///
+    ///   - `struct S { ... }`
+    ///   - `enum E { Variant { ... } }`
    pub(crate) fn parse_record_struct_body(
        &mut self,
        adt_ty: &str,
@ -1729,19 +1734,10 @@ impl<'a> Parser<'a> {
            self.eat(&token::CloseDelim(Delimiter::Brace));
        } else {
            let token_str = super::token_descr(&self.token);
-            let msg = format!(
-                "expected {}`{{` after struct name, found {}",
-                if parsed_where { "" } else { "`where`, or " },
-                token_str
-            );
+            let where_str = if parsed_where { "" } else { "`where`, or " };
+            let msg = format!("expected {where_str}`{{` after struct name, found {token_str}");
            let mut err = self.dcx().struct_span_err(self.token.span, msg);
-            err.span_label(
-                self.token.span,
-                format!(
-                    "expected {}`{{` after struct name",
-                    if parsed_where { "" } else { "`where`, or " }
-                ),
-            );
+            err.span_label(self.token.span, format!("expected {where_str}`{{` after struct name",));
            return Err(err);
        }

@ -1755,7 +1751,7 @@ impl<'a> Parser<'a> {
            let attrs = p.parse_outer_attributes()?;
            p.collect_tokens_trailing_token(attrs, ForceCollect::No, |p, attrs| {
                let mut snapshot = None;
-                if p.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
+                if p.is_vcs_conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
                    // Account for `<<<<<<<` diff markers. We can't proactively error here because
                    // that can be a valid type start, so we snapshot and reparse only we've
                    // encountered another parse error.
@ -1766,7 +1762,7 @@ impl<'a> Parser<'a> {
                    Ok(vis) => vis,
                    Err(err) => {
                        if let Some(ref mut snapshot) = snapshot {
-                            snapshot.recover_diff_marker();
+                            snapshot.recover_vcs_conflict_marker();
                        }
                        return Err(err);
                    }
@ -1775,7 +1771,7 @@ impl<'a> Parser<'a> {
                    Ok(ty) => ty,
                    Err(err) => {
                        if let Some(ref mut snapshot) = snapshot {
-                            snapshot.recover_diff_marker();
+                            snapshot.recover_vcs_conflict_marker();
                        }
                        return Err(err);
                    }
@ -1800,9 +1796,9 @@ impl<'a> Parser<'a> {

    /// Parses an element of a struct declaration.
    fn parse_field_def(&mut self, adt_ty: &str) -> PResult<'a, FieldDef> {
-        self.recover_diff_marker();
+        self.recover_vcs_conflict_marker();
        let attrs = self.parse_outer_attributes()?;
-        self.recover_diff_marker();
+        self.recover_vcs_conflict_marker();
        self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
            let lo = this.token.span;
            let vis = this.parse_visibility(FollowedByType::No)?;
@ -2662,7 +2658,7 @@ impl<'a> Parser<'a> {
        }

        let (mut params, _) = self.parse_paren_comma_seq(|p| {
-            p.recover_diff_marker();
+            p.recover_vcs_conflict_marker();
            let snapshot = p.create_snapshot_for_diagnostic();
            let param = p.parse_param_general(req_name, first_param).or_else(|e| {
                let guar = e.emit();
--- a/compiler/rustc_parse/src/parser/stmt.rs
+++ b/compiler/rustc_parse/src/parser/stmt.rs
@ -567,7 +567,7 @@ impl<'a> Parser<'a> {
            if self.token == token::Eof {
                break;
            }
-            if self.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
+            if self.is_vcs_conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
                // Account for `<<<<<<<` diff markers. We can't proactively error here because
                // that can be a valid path start, so we snapshot and reparse only we've
                // encountered another parse error.
@ -576,7 +576,7 @@ impl<'a> Parser<'a> {
            let stmt = match self.parse_full_stmt(recover) {
                Err(mut err) if recover.yes() => {
                    if let Some(ref mut snapshot) = snapshot {
-                        snapshot.recover_diff_marker();
+                        snapshot.recover_vcs_conflict_marker();
                    }
                    if self.token == token::Colon {
                        // if a previous and next token of the current one is
--- a/src/librustdoc/doctest.rs
+++ b/src/librustdoc/doctest.rs
@ -684,9 +684,9 @@ pub(crate) fn make_test(
                    }
                }

-                // The supplied slice is only used for diagnostics,
+                // The supplied item is only used for diagnostics,
                // which are swallowed here anyway.
-                parser.maybe_consume_incorrect_semicolon(&[]);
+                parser.maybe_consume_incorrect_semicolon(None);
            }

            // Reset errors so that they won't be reported as compiler bugs when dropping the
--- a/tests/ui/suggestions/recover-from-semicolon-trailing-item.stderr
+++ b/tests/ui/suggestions/recover-from-semicolon-trailing-item.stderr
@ -3,6 +3,8 @@ error: expected item, found `;`
   |
 LL | mod M {};
   |         ^ help: remove this semicolon
+   |
+   = help: module declarations are not followed by a semicolon

 error: expected item, found `;`
  --> $DIR/recover-from-semicolon-trailing-item.rs:4:12
@ -17,6 +19,8 @@ error: expected item, found `;`
   |
 LL | fn foo(a: usize) {};
   |                    ^ help: remove this semicolon
+   |
+   = help: function declarations are not followed by a semicolon

 error[E0308]: mismatched types
  --> $DIR/recover-from-semicolon-trailing-item.rs:10:20