Rollup merge of #125117 - dev-ardi:improve-parser, r=wesleywiser,fmease

Improve parser

Fixes #124935.

- Add a few more help diagnostics to incorrect semicolons
- Overall improved that function
- Addded a few comments
- Renamed diff_marker fns to git_diff_marker
This commit is contained in:
Matthias Krüger 2024-05-18 18:44:14 +02:00 committed by GitHub
commit f9bf759e83
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 81 additions and 70 deletions

View File

@ -3262,6 +3262,7 @@ pub enum ItemKind {
}
impl ItemKind {
/// "a" or "an"
pub fn article(&self) -> &'static str {
use ItemKind::*;
match self {

View File

@ -83,7 +83,7 @@ pub(crate) struct IncorrectSemicolon<'a> {
#[suggestion(style = "short", code = "", applicability = "machine-applicable")]
pub span: Span,
#[help]
pub opt_help: Option<()>,
pub show_help: bool,
pub name: &'a str,
}

View File

@ -241,7 +241,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> {
// we have no way of tracking this in the lexer itself, so we piggyback on the parser
let mut in_cond = false;
while parser.token != token::Eof {
if let Err(diff_err) = parser.err_diff_marker() {
if let Err(diff_err) = parser.err_vcs_conflict_marker() {
diff_errs.push(diff_err);
} else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) {
in_cond = true;

View File

@ -1817,34 +1817,31 @@ impl<'a> Parser<'a> {
Ok(P(T::recovered(Some(P(QSelf { ty, path_span, position: 0 })), path)))
}
pub fn maybe_consume_incorrect_semicolon(&mut self, items: &[P<Item>]) -> bool {
if self.token.kind == TokenKind::Semi {
self.bump();
/// This function gets called in places where a semicolon is NOT expected and if there's a
/// semicolon it emits the appropriate error and returns true.
pub fn maybe_consume_incorrect_semicolon(&mut self, previous_item: Option<&Item>) -> bool {
if self.token.kind != TokenKind::Semi {
return false;
}
let mut err =
IncorrectSemicolon { span: self.prev_token.span, opt_help: None, name: "" };
if !items.is_empty() {
let previous_item = &items[items.len() - 1];
let previous_item_kind_name = match previous_item.kind {
// Check previous item to add it to the diagnostic, for example to say
// `enum declarations are not followed by a semicolon`
let err = match previous_item {
Some(previous_item) => {
let name = match previous_item.kind {
// Say "braced struct" because tuple-structs and
// braceless-empty-struct declarations do take a semicolon.
ItemKind::Struct(..) => Some("braced struct"),
ItemKind::Enum(..) => Some("enum"),
ItemKind::Trait(..) => Some("trait"),
ItemKind::Union(..) => Some("union"),
_ => None,
ItemKind::Struct(..) => "braced struct",
_ => previous_item.kind.descr(),
};
if let Some(name) = previous_item_kind_name {
err.opt_help = Some(());
err.name = name;
}
IncorrectSemicolon { span: self.token.span, name, show_help: true }
}
self.dcx().emit_err(err);
true
} else {
false
}
None => IncorrectSemicolon { span: self.token.span, name: "", show_help: false },
};
self.dcx().emit_err(err);
self.bump();
true
}
/// Creates a `Diag` for an unexpected token `t` and tries to recover if it is a
@ -2957,13 +2954,23 @@ impl<'a> Parser<'a> {
err
}
pub fn is_diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> bool {
/// This checks if this is a conflict marker, depending of the parameter passed.
///
/// * `>>>>>`
/// * `=====`
/// * `<<<<<`
///
pub fn is_vcs_conflict_marker(
&mut self,
long_kind: &TokenKind,
short_kind: &TokenKind,
) -> bool {
(0..3).all(|i| self.look_ahead(i, |tok| tok == long_kind))
&& self.look_ahead(3, |tok| tok == short_kind)
}
fn diff_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> {
if self.is_diff_marker(long_kind, short_kind) {
fn conflict_marker(&mut self, long_kind: &TokenKind, short_kind: &TokenKind) -> Option<Span> {
if self.is_vcs_conflict_marker(long_kind, short_kind) {
let lo = self.token.span;
for _ in 0..4 {
self.bump();
@ -2973,15 +2980,16 @@ impl<'a> Parser<'a> {
None
}
pub fn recover_diff_marker(&mut self) {
if let Err(err) = self.err_diff_marker() {
pub fn recover_vcs_conflict_marker(&mut self) {
if let Err(err) = self.err_vcs_conflict_marker() {
err.emit();
FatalError.raise();
}
}
pub fn err_diff_marker(&mut self) -> PResult<'a, ()> {
let Some(start) = self.diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) else {
pub fn err_vcs_conflict_marker(&mut self) -> PResult<'a, ()> {
let Some(start) = self.conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt)
else {
return Ok(());
};
let mut spans = Vec::with_capacity(3);
@ -2993,13 +3001,15 @@ impl<'a> Parser<'a> {
if self.token.kind == TokenKind::Eof {
break;
}
if let Some(span) = self.diff_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or)) {
if let Some(span) = self.conflict_marker(&TokenKind::OrOr, &TokenKind::BinOp(token::Or))
{
middlediff3 = Some(span);
}
if let Some(span) = self.diff_marker(&TokenKind::EqEq, &TokenKind::Eq) {
if let Some(span) = self.conflict_marker(&TokenKind::EqEq, &TokenKind::Eq) {
middle = Some(span);
}
if let Some(span) = self.diff_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt) {
if let Some(span) = self.conflict_marker(&TokenKind::BinOp(token::Shr), &TokenKind::Gt)
{
spans.push(span);
end = Some(span);
break;

View File

@ -3734,7 +3734,7 @@ impl<'a> Parser<'a> {
/// Parses `ident (COLON expr)?`.
fn parse_expr_field(&mut self) -> PResult<'a, ExprField> {
let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
let lo = this.token.span;

View File

@ -49,6 +49,7 @@ impl<'a> Parser<'a> {
}
/// Parses the contents of a module (inner attributes followed by module items).
/// We exit once we hit `term`
pub fn parse_mod(
&mut self,
term: &TokenKind,
@ -59,13 +60,13 @@ impl<'a> Parser<'a> {
let post_attr_lo = self.token.span;
let mut items = ThinVec::new();
while let Some(item) = self.parse_item(ForceCollect::No)? {
self.maybe_consume_incorrect_semicolon(Some(&item));
items.push(item);
self.maybe_consume_incorrect_semicolon(&items);
}
if !self.eat(term) {
let token_str = super::token_descr(&self.token);
if !self.maybe_consume_incorrect_semicolon(&items) {
if !self.maybe_consume_incorrect_semicolon(items.last().map(|x| &**x)) {
let msg = format!("expected item, found {token_str}");
let mut err = self.dcx().struct_span_err(self.token.span, msg);
let span = self.token.span;
@ -101,9 +102,9 @@ impl<'a> Parser<'a> {
fn_parse_mode: FnParseMode,
force_collect: ForceCollect,
) -> PResult<'a, Option<Item>> {
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
self.parse_item_common(attrs, true, false, fn_parse_mode, force_collect)
}
@ -194,12 +195,12 @@ impl<'a> Parser<'a> {
fn_parse_mode: FnParseMode,
case: Case,
) -> PResult<'a, Option<ItemInfo>> {
let def_final = def == &Defaultness::Final;
let check_pub = def == &Defaultness::Final;
let mut def_ = || mem::replace(def, Defaultness::Final);
let info = if self.eat_keyword_case(kw::Use, case) {
self.parse_use_item()?
} else if self.check_fn_front_matter(def_final, case) {
} else if self.check_fn_front_matter(check_pub, case) {
// FUNCTION ITEM
let (ident, sig, generics, body) =
self.parse_fn(attrs, fn_parse_mode, lo, vis, case)?;
@ -310,7 +311,7 @@ impl<'a> Parser<'a> {
Ok(Some(info))
}
fn recover_import_as_use(&mut self) -> PResult<'a, Option<(Ident, ItemKind)>> {
fn recover_import_as_use(&mut self) -> PResult<'a, Option<ItemInfo>> {
let span = self.token.span;
let token_name = super::token_descr(&self.token);
let snapshot = self.create_snapshot_for_diagnostic();
@ -328,7 +329,7 @@ impl<'a> Parser<'a> {
}
}
fn parse_use_item(&mut self) -> PResult<'a, (Ident, ItemKind)> {
fn parse_use_item(&mut self) -> PResult<'a, ItemInfo> {
let tree = self.parse_use_tree()?;
if let Err(mut e) = self.expect_semi() {
match tree.kind {
@ -738,7 +739,7 @@ impl<'a> Parser<'a> {
if self.recover_doc_comment_before_brace() {
continue;
}
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
match parse_item(self) {
Ok(None) => {
let mut is_unnecessary_semicolon = !items.is_empty()
@ -1085,7 +1086,7 @@ impl<'a> Parser<'a> {
/// ```
fn parse_use_tree_list(&mut self) -> PResult<'a, ThinVec<(UseTree, ast::NodeId)>> {
self.parse_delim_comma_seq(Delimiter::Brace, |p| {
p.recover_diff_marker();
p.recover_vcs_conflict_marker();
Ok((p.parse_use_tree()?, DUMMY_NODE_ID))
})
.map(|(r, _)| r)
@ -1512,9 +1513,9 @@ impl<'a> Parser<'a> {
}
fn parse_enum_variant(&mut self, span: Span) -> PResult<'a, Option<Variant>> {
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
let variant_attrs = self.parse_outer_attributes()?;
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
let help = "enum variants can be `Variant`, `Variant = <integer>`, \
`Variant(Type, ..., TypeN)` or `Variant { fields: Types }`";
self.collect_tokens_trailing_token(
@ -1703,6 +1704,10 @@ impl<'a> Parser<'a> {
Ok((class_name, ItemKind::Union(vdata, generics)))
}
/// This function parses the fields of record structs:
///
/// - `struct S { ... }`
/// - `enum E { Variant { ... } }`
pub(crate) fn parse_record_struct_body(
&mut self,
adt_ty: &str,
@ -1729,19 +1734,10 @@ impl<'a> Parser<'a> {
self.eat(&token::CloseDelim(Delimiter::Brace));
} else {
let token_str = super::token_descr(&self.token);
let msg = format!(
"expected {}`{{` after struct name, found {}",
if parsed_where { "" } else { "`where`, or " },
token_str
);
let where_str = if parsed_where { "" } else { "`where`, or " };
let msg = format!("expected {where_str}`{{` after struct name, found {token_str}");
let mut err = self.dcx().struct_span_err(self.token.span, msg);
err.span_label(
self.token.span,
format!(
"expected {}`{{` after struct name",
if parsed_where { "" } else { "`where`, or " }
),
);
err.span_label(self.token.span, format!("expected {where_str}`{{` after struct name",));
return Err(err);
}
@ -1755,7 +1751,7 @@ impl<'a> Parser<'a> {
let attrs = p.parse_outer_attributes()?;
p.collect_tokens_trailing_token(attrs, ForceCollect::No, |p, attrs| {
let mut snapshot = None;
if p.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
if p.is_vcs_conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
// Account for `<<<<<<<` diff markers. We can't proactively error here because
// that can be a valid type start, so we snapshot and reparse only we've
// encountered another parse error.
@ -1766,7 +1762,7 @@ impl<'a> Parser<'a> {
Ok(vis) => vis,
Err(err) => {
if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker();
snapshot.recover_vcs_conflict_marker();
}
return Err(err);
}
@ -1775,7 +1771,7 @@ impl<'a> Parser<'a> {
Ok(ty) => ty,
Err(err) => {
if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker();
snapshot.recover_vcs_conflict_marker();
}
return Err(err);
}
@ -1800,9 +1796,9 @@ impl<'a> Parser<'a> {
/// Parses an element of a struct declaration.
fn parse_field_def(&mut self, adt_ty: &str) -> PResult<'a, FieldDef> {
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
let attrs = self.parse_outer_attributes()?;
self.recover_diff_marker();
self.recover_vcs_conflict_marker();
self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| {
let lo = this.token.span;
let vis = this.parse_visibility(FollowedByType::No)?;
@ -2662,7 +2658,7 @@ impl<'a> Parser<'a> {
}
let (mut params, _) = self.parse_paren_comma_seq(|p| {
p.recover_diff_marker();
p.recover_vcs_conflict_marker();
let snapshot = p.create_snapshot_for_diagnostic();
let param = p.parse_param_general(req_name, first_param).or_else(|e| {
let guar = e.emit();

View File

@ -567,7 +567,7 @@ impl<'a> Parser<'a> {
if self.token == token::Eof {
break;
}
if self.is_diff_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
if self.is_vcs_conflict_marker(&TokenKind::BinOp(token::Shl), &TokenKind::Lt) {
// Account for `<<<<<<<` diff markers. We can't proactively error here because
// that can be a valid path start, so we snapshot and reparse only we've
// encountered another parse error.
@ -576,7 +576,7 @@ impl<'a> Parser<'a> {
let stmt = match self.parse_full_stmt(recover) {
Err(mut err) if recover.yes() => {
if let Some(ref mut snapshot) = snapshot {
snapshot.recover_diff_marker();
snapshot.recover_vcs_conflict_marker();
}
if self.token == token::Colon {
// if a previous and next token of the current one is

View File

@ -684,9 +684,9 @@ pub(crate) fn make_test(
}
}
// The supplied slice is only used for diagnostics,
// The supplied item is only used for diagnostics,
// which are swallowed here anyway.
parser.maybe_consume_incorrect_semicolon(&[]);
parser.maybe_consume_incorrect_semicolon(None);
}
// Reset errors so that they won't be reported as compiler bugs when dropping the

View File

@ -3,6 +3,8 @@ error: expected item, found `;`
|
LL | mod M {};
| ^ help: remove this semicolon
|
= help: module declarations are not followed by a semicolon
error: expected item, found `;`
--> $DIR/recover-from-semicolon-trailing-item.rs:4:12
@ -17,6 +19,8 @@ error: expected item, found `;`
|
LL | fn foo(a: usize) {};
| ^ help: remove this semicolon
|
= help: function declarations are not followed by a semicolon
error[E0308]: mismatched types
--> $DIR/recover-from-semicolon-trailing-item.rs:10:20