From f20a13998178d7abc5cba7421d5758ce48b0c588 Mon Sep 17 00:00:00 2001 From: Tshepang Lekhonkhobe Date: Sun, 3 Jan 2016 11:14:09 +0200 Subject: [PATCH 1/4] run rustfmt on syntax::parse::lexer --- src/libsyntax/parse/lexer/comments.rs | 122 +-- src/libsyntax/parse/lexer/mod.rs | 1171 +++++++++++++++---------- 2 files changed, 772 insertions(+), 521 deletions(-) diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index d2156d7cb68..5365870d3fe 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -43,10 +43,8 @@ pub struct Comment { } pub fn is_doc_comment(s: &str) -> bool { - (s.starts_with("///") && super::is_doc_comment(s)) || - s.starts_with("//!") || - (s.starts_with("/**") && is_block_doc_comment(s)) || - s.starts_with("/*!") + (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") || + (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!") } pub fn doc_comment_style(comment: &str) -> ast::AttrStyle { @@ -64,18 +62,18 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String { let mut i = 0; let mut j = lines.len(); // first line of all-stars should be omitted - if !lines.is_empty() && - lines[0].chars().all(|c| c == '*') { + if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { i += 1; } while i < j && lines[i].trim().is_empty() { i += 1; } // like the first, a last line of all stars should be omitted - if j > i && lines[j - 1] - .chars() - .skip(1) - .all(|c| c == '*') { + if j > i && + lines[j - 1] + .chars() + .skip(1) + .all(|c| c == '*') { j -= 1; } while j > i && lines[j - 1].trim().is_empty() { @@ -85,7 +83,7 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String { } /// remove a "[ \t]*\*" block from each line, if possible - fn horizontal_trim(lines: Vec ) -> Vec { + fn horizontal_trim(lines: Vec) -> Vec { let mut i = usize::MAX; let mut can_trim = true; let mut first = true; @@ -114,9 +112,9 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String { } if can_trim { - lines.iter().map(|line| { - (&line[i + 1..line.len()]).to_string() - }).collect() + lines.iter() + .map(|line| (&line[i + 1..line.len()]).to_string()) + .collect() } else { lines } @@ -132,9 +130,9 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String { if comment.starts_with("/*") { let lines = comment[3..comment.len() - 2] - .lines() - .map(|s| s.to_string()) - .collect:: >(); + .lines() + .map(|s| s.to_string()) + .collect::>(); let lines = vertical_trim(lines); let lines = horizontal_trim(lines); @@ -154,8 +152,7 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec) { }); } -fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, - comments: &mut Vec) { +fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, comments: &mut Vec) { while is_whitespace(rdr.curr) && !rdr.is_eof() { if rdr.col == CharPos(0) && rdr.curr_is('\n') { push_blank_line_comment(rdr, &mut *comments); @@ -165,19 +162,25 @@ fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, } -fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool, +fn read_shebang_comment(rdr: &mut StringReader, + code_to_the_left: bool, comments: &mut Vec) { debug!(">>> shebang comment"); let p = rdr.last_pos; debug!("<<< shebang comment"); comments.push(Comment { - style: if code_to_the_left { Trailing } else { Isolated }, - lines: vec!(rdr.read_one_line_comment()), - pos: p + style: if code_to_the_left { + Trailing + } else { + Isolated + }, + lines: vec![rdr.read_one_line_comment()], + pos: p, }); } -fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool, +fn read_line_comments(rdr: &mut StringReader, + code_to_the_left: bool, comments: &mut Vec) { debug!(">>> line comments"); let p = rdr.last_pos; @@ -195,9 +198,13 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool, debug!("<<< line comments"); if !lines.is_empty() { comments.push(Comment { - style: if code_to_the_left { Trailing } else { Isolated }, + style: if code_to_the_left { + Trailing + } else { + Isolated + }, lines: lines, - pos: p + pos: p, }); } } @@ -220,8 +227,7 @@ fn all_whitespace(s: &str, col: CharPos) -> Option { return Some(cursor); } -fn trim_whitespace_prefix_and_push_line(lines: &mut Vec , - s: String, col: CharPos) { +fn trim_whitespace_prefix_and_push_line(lines: &mut Vec, s: String, col: CharPos) { let len = s.len(); let s1 = match all_whitespace(&s[..], col) { Some(col) => { @@ -239,7 +245,7 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut Vec , fn read_block_comment(rdr: &mut StringReader, code_to_the_left: bool, - comments: &mut Vec ) { + comments: &mut Vec) { debug!(">>> block comment"); let p = rdr.last_pos; let mut lines: Vec = Vec::new(); @@ -261,7 +267,7 @@ fn read_block_comment(rdr: &mut StringReader, rdr.bump(); } if is_block_doc_comment(&curr_line[..]) { - return + return; } assert!(!curr_line.contains('\n')); lines.push(curr_line); @@ -273,9 +279,7 @@ fn read_block_comment(rdr: &mut StringReader, panic!(rdr.fatal("unterminated block comment")); } if rdr.curr_is('\n') { - trim_whitespace_prefix_and_push_line(&mut lines, - curr_line, - col); + trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); curr_line = String::new(); rdr.bump(); } else { @@ -291,30 +295,36 @@ fn read_block_comment(rdr: &mut StringReader, rdr.bump(); curr_line.push('/'); level -= 1; - } else { rdr.bump(); } + } else { + rdr.bump(); + } } } } if !curr_line.is_empty() { - trim_whitespace_prefix_and_push_line(&mut lines, - curr_line, - col); + trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); } } - let mut style = if code_to_the_left { Trailing } else { Isolated }; + let mut style = if code_to_the_left { + Trailing + } else { + Isolated + }; rdr.consume_non_eol_whitespace(); if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1 { style = Mixed; } debug!("<<< block comment"); - comments.push(Comment {style: style, lines: lines, pos: p}); + comments.push(Comment { + style: style, + lines: lines, + pos: p, + }); } -fn consume_comment(rdr: &mut StringReader, - code_to_the_left: bool, - comments: &mut Vec ) { +fn consume_comment(rdr: &mut StringReader, code_to_the_left: bool, comments: &mut Vec) { debug!(">>> consume comment"); if rdr.curr_is('/') && rdr.nextch_is('/') { read_line_comments(rdr, code_to_the_left, comments); @@ -322,7 +332,9 @@ fn consume_comment(rdr: &mut StringReader, read_block_comment(rdr, code_to_the_left, comments); } else if rdr.curr_is('#') && rdr.nextch_is('!') { read_shebang_comment(rdr, code_to_the_left, comments); - } else { panic!(); } + } else { + panic!(); + } debug!("<<< consume comment"); } @@ -337,7 +349,7 @@ pub struct Literal { pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler, path: String, srdr: &mut Read) - -> (Vec, Vec) { + -> (Vec, Vec) { let mut src = Vec::new(); srdr.read_to_end(&mut src).unwrap(); let src = String::from_utf8(src).unwrap(); @@ -366,12 +378,15 @@ pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler, let bstart = rdr.last_pos; rdr.next_token(); - //discard, and look ahead; we're working with internal state + // discard, and look ahead; we're working with internal state let TokenAndSpan { tok, sp } = rdr.peek(); if tok.is_lit() { rdr.with_str_from(bstart, |s| { debug!("tok lit: {}", s); - literals.push(Literal {lit: s.to_string(), pos: sp.lo}); + literals.push(Literal { + lit: s.to_string(), + pos: sp.lo, + }); }) } else { debug!("tok: {}", pprust::token_to_string(&tok)); @@ -386,31 +401,36 @@ pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler, mod tests { use super::*; - #[test] fn test_block_doc_comment_1() { + #[test] + fn test_block_doc_comment_1() { let comment = "/**\n * Test \n ** Test\n * Test\n*/"; let stripped = strip_doc_comment_decoration(comment); assert_eq!(stripped, " Test \n* Test\n Test"); } - #[test] fn test_block_doc_comment_2() { + #[test] + fn test_block_doc_comment_2() { let comment = "/**\n * Test\n * Test\n*/"; let stripped = strip_doc_comment_decoration(comment); assert_eq!(stripped, " Test\n Test"); } - #[test] fn test_block_doc_comment_3() { + #[test] + fn test_block_doc_comment_3() { let comment = "/**\n let a: *i32;\n *a = 5;\n*/"; let stripped = strip_doc_comment_decoration(comment); assert_eq!(stripped, " let a: *i32;\n *a = 5;"); } - #[test] fn test_block_doc_comment_4() { + #[test] + fn test_block_doc_comment_4() { let comment = "/*******************\n test\n *********************/"; let stripped = strip_doc_comment_decoration(comment); assert_eq!(stripped, " test"); } - #[test] fn test_line_doc_comment() { + #[test] + fn test_line_doc_comment() { let stripped = strip_doc_comment_decoration("/// test"); assert_eq!(stripped, " test"); let stripped = strip_doc_comment_decoration("///! test"); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 3d8f3bcd526..752e20a3d04 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -42,8 +42,8 @@ pub trait Reader { match t.tok { token::Whitespace | token::Comment | token::Shebang(_) => { t = self.next_token(); - }, - _ => break + } + _ => break, } } t @@ -67,17 +67,19 @@ pub struct StringReader<'a> { /// The last character to be read pub curr: Option, pub filemap: Rc, - /* cached: */ + // cached: pub peek_tok: token::Token, pub peek_span: Span, // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. - source_text: Rc + source_text: Rc, } impl<'a> Reader for StringReader<'a> { - fn is_eof(&self) -> bool { self.curr.is_none() } + fn is_eof(&self) -> bool { + self.curr.is_none() + } /// Return the next token. EFFECT: advances the string_reader. fn next_token(&mut self) -> TokenAndSpan { let ret_val = TokenAndSpan { @@ -128,10 +130,12 @@ impl<'a> Reader for TtReader<'a> { impl<'a> StringReader<'a> { /// For comments.rs, which hackily pokes into pos and curr pub fn new_raw<'b>(span_diagnostic: &'b Handler, - filemap: Rc) -> StringReader<'b> { + filemap: Rc) + -> StringReader<'b> { if filemap.src.is_none() { - span_diagnostic.bug(&format!("Cannot lex filemap without source: {}", - filemap.name)[..]); + span_diagnostic.bug(&format!("Cannot lex filemap \ + without source: {}", + filemap.name)[..]); } let source_text = (*filemap.src.as_ref().unwrap()).clone(); @@ -143,17 +147,18 @@ impl<'a> StringReader<'a> { col: CharPos(0), curr: Some('\n'), filemap: filemap, - /* dummy values; not read */ + // dummy values; not read peek_tok: token::Eof, peek_span: codemap::DUMMY_SP, - source_text: source_text + source_text: source_text, }; sr.bump(); sr } pub fn new<'b>(span_diagnostic: &'b Handler, - filemap: Rc) -> StringReader<'b> { + filemap: Rc) + -> StringReader<'b> { let mut sr = StringReader::new_raw(span_diagnostic, filemap); sr.advance_token(); sr @@ -189,7 +194,9 @@ impl<'a> StringReader<'a> { fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError { let mut m = m.to_string(); m.push_str(": "); - for c in c.escape_default() { m.push(c) } + for c in c.escape_default() { + m.push(c) + } self.fatal_span_(from_pos, to_pos, &m[..]) } fn struct_fatal_span_char(&self, @@ -197,10 +204,12 @@ impl<'a> StringReader<'a> { to_pos: BytePos, m: &str, c: char) - -> DiagnosticBuilder<'a> { + -> DiagnosticBuilder<'a> { let mut m = m.to_string(); m.push_str(": "); - for c in c.escape_default() { m.push(c) } + for c in c.escape_default() { + m.push(c) + } self.span_diagnostic.struct_span_fatal(codemap::mk_sp(from_pos, to_pos), &m[..]) } @@ -209,7 +218,9 @@ impl<'a> StringReader<'a> { fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) { let mut m = m.to_string(); m.push_str(": "); - for c in c.escape_default() { m.push(c) } + for c in c.escape_default() { + m.push(c) + } self.err_span_(from_pos, to_pos, &m[..]); } fn struct_err_span_char(&self, @@ -217,10 +228,12 @@ impl<'a> StringReader<'a> { to_pos: BytePos, m: &str, c: char) - -> DiagnosticBuilder<'a> { + -> DiagnosticBuilder<'a> { let mut m = m.to_string(); m.push_str(": "); - for c in c.escape_default() { m.push(c) } + for c in c.escape_default() { + m.push(c) + } self.span_diagnostic.struct_span_err(codemap::mk_sp(from_pos, to_pos), &m[..]) } @@ -241,7 +254,7 @@ impl<'a> StringReader<'a> { Some(comment) => { self.peek_span = comment.sp; self.peek_tok = comment.tok; - }, + } None => { if self.is_eof() { self.peek_tok = token::Eof; @@ -249,8 +262,7 @@ impl<'a> StringReader<'a> { } else { let start_bytepos = self.last_pos; self.peek_tok = self.next_token_inner(); - self.peek_span = codemap::mk_sp(start_bytepos, - self.last_pos); + self.peek_span = codemap::mk_sp(start_bytepos, self.last_pos); }; } } @@ -263,8 +275,8 @@ impl<'a> StringReader<'a> { /// Calls `f` with a string slice of the source text spanning from `start` /// up to but excluding `self.last_pos`, meaning the slice does not include /// the character `self.curr`. - pub fn with_str_from(&self, start: BytePos, f: F) -> T where - F: FnOnce(&str) -> T, + pub fn with_str_from(&self, start: BytePos, f: F) -> T + where F: FnOnce(&str) -> T { self.with_str_from_to(start, self.last_pos, f) } @@ -285,16 +297,14 @@ impl<'a> StringReader<'a> { /// Calls `f` with a string slice of the source text spanning from `start` /// up to but excluding `end`. - fn with_str_from_to(&self, start: BytePos, end: BytePos, f: F) -> T where - F: FnOnce(&str) -> T, + fn with_str_from_to(&self, start: BytePos, end: BytePos, f: F) -> T + where F: FnOnce(&str) -> T { - f(&self.source_text[self.byte_offset(start).to_usize().. - self.byte_offset(end).to_usize()]) + f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()]) } /// Converts CRLF to LF in the given string, raising an error on bare CR. - fn translate_crlf<'b>(&self, start: BytePos, - s: &'b str, errmsg: &'b str) -> Cow<'b, str> { + fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> { let mut i = 0; while i < s.len() { let ch = char_at(s, i); @@ -311,15 +321,21 @@ impl<'a> StringReader<'a> { } return s.into(); - fn translate_crlf_(rdr: &StringReader, start: BytePos, - s: &str, errmsg: &str, mut i: usize) -> String { + fn translate_crlf_(rdr: &StringReader, + start: BytePos, + s: &str, + errmsg: &str, + mut i: usize) + -> String { let mut buf = String::with_capacity(s.len()); let mut j = 0; while i < s.len() { let ch = char_at(s, i); let next = i + ch.len_utf8(); if ch == '\r' { - if j < i { buf.push_str(&s[j..i]); } + if j < i { + buf.push_str(&s[j..i]); + } j = next; if next >= s.len() || char_at(s, next) != '\n' { let pos = start + BytePos(i as u32); @@ -329,7 +345,9 @@ impl<'a> StringReader<'a> { } i = next; } - if j < s.len() { buf.push_str(&s[j..]); } + if j < s.len() { + buf.push_str(&s[j..]); + } buf } } @@ -378,7 +396,9 @@ impl<'a> StringReader<'a> { pub fn nextnextch(&self) -> Option { let offset = self.byte_offset(self.pos).to_usize(); let s = &self.source_text[..]; - if offset >= s.len() { return None } + if offset >= s.len() { + return None; + } let next = offset + char_at(s, offset).len_utf8(); if next < s.len() { Some(char_at(s, next)) @@ -394,7 +414,7 @@ impl<'a> StringReader<'a> { /// Eats *, if possible. fn scan_optional_raw_name(&mut self) -> Option { if !ident_start(self.curr) { - return None + return None; } let start = self.last_pos; while ident_continue(self.curr) { @@ -417,10 +437,11 @@ impl<'a> StringReader<'a> { Some(c) => { if c.is_whitespace() { self.span_diagnostic.span_err(codemap::mk_sp(self.last_pos, self.last_pos), - "called consume_any_line_comment, but there was whitespace"); + "called consume_any_line_comment, but there \ + was whitespace"); } - }, - None => { } + } + None => {} } if self.curr_is('/') { @@ -443,13 +464,14 @@ impl<'a> StringReader<'a> { '\r' => { if self.nextch_is('\n') { // CRLF - break + break; } else if doc_comment { - self.err_span_(self.last_pos, self.pos, + self.err_span_(self.last_pos, + self.pos, "bare CR not allowed in doc-comment"); } } - _ => () + _ => (), } self.bump(); } @@ -465,21 +487,22 @@ impl<'a> StringReader<'a> { Some(TokenAndSpan { tok: tok, - sp: codemap::mk_sp(start_bpos, self.last_pos) + sp: codemap::mk_sp(start_bpos, self.last_pos), }) }) } else { Some(TokenAndSpan { tok: token::Comment, - sp: codemap::mk_sp(start_bpos, self.last_pos) + sp: codemap::mk_sp(start_bpos, self.last_pos), }) - } + }; } Some('*') => { - self.bump(); self.bump(); + self.bump(); + self.bump(); self.scan_block_comment() } - _ => None + _ => None, } } else if self.curr_is('#') { if self.nextch_is('!') { @@ -498,10 +521,12 @@ impl<'a> StringReader<'a> { if loc.line == 1 && loc.col == CharPos(0) { // FIXME: Add shebang "token", return it let start = self.last_pos; - while !self.curr_is('\n') && !self.is_eof() { self.bump(); } + while !self.curr_is('\n') && !self.is_eof() { + self.bump(); + } return Some(TokenAndSpan { tok: token::Shebang(self.name_from(start)), - sp: codemap::mk_sp(start, self.last_pos) + sp: codemap::mk_sp(start, self.last_pos), }); } } @@ -521,18 +546,20 @@ impl<'a> StringReader<'a> { let c = self.scan_comment(); debug!("scanning a comment {:?}", c); c - }, + } c if is_whitespace(Some(c)) => { let start_bpos = self.last_pos; - while is_whitespace(self.curr) { self.bump(); } + while is_whitespace(self.curr) { + self.bump(); + } let c = Some(TokenAndSpan { tok: token::Whitespace, - sp: codemap::mk_sp(start_bpos, self.last_pos) + sp: codemap::mk_sp(start_bpos, self.last_pos), }); debug!("scanning whitespace: {:?}", c); c - }, - _ => None + } + _ => None, } } @@ -567,7 +594,7 @@ impl<'a> StringReader<'a> { '\r' => { has_cr = true; } - _ => () + _ => (), } self.bump(); } @@ -576,17 +603,20 @@ impl<'a> StringReader<'a> { // but comments with only "*"s between two "/"s are not let tok = if is_block_doc_comment(string) { let string = if has_cr { - self.translate_crlf(start_bpos, string, + self.translate_crlf(start_bpos, + string, "bare CR not allowed in block doc-comment") - } else { string.into() }; + } else { + string.into() + }; token::DocComment(token::intern(&string[..])) } else { token::Comment }; - Some(TokenAndSpan{ + Some(TokenAndSpan { tok: tok, - sp: codemap::mk_sp(start_bpos, self.last_pos) + sp: codemap::mk_sp(start_bpos, self.last_pos), }) }) } @@ -602,23 +632,27 @@ impl<'a> StringReader<'a> { let mut len = 0; loop { let c = self.curr; - if c == Some('_') { debug!("skipping a _"); self.bump(); continue; } + if c == Some('_') { + debug!("skipping a _"); + self.bump(); + continue; + } match c.and_then(|cc| cc.to_digit(scan_radix)) { Some(_) => { debug!("{:?} in scan_digits", c); // check that the hypothetical digit is actually // in range for the true radix if c.unwrap().to_digit(real_radix).is_none() { - self.err_span_(self.last_pos, self.pos, - &format!("invalid digit for a base {} literal", - real_radix)); + self.err_span_(self.last_pos, + self.pos, + &format!("invalid digit for a base {} literal", real_radix)); } len += 1; self.bump(); } - _ => return len + _ => return len, } - }; + } } /// Lex a LIT_INTEGER or a LIT_FLOAT @@ -631,9 +665,21 @@ impl<'a> StringReader<'a> { if c == '0' { match self.curr.unwrap_or('\0') { - 'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2, 10); } - 'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8, 10); } - 'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16, 16); } + 'b' => { + self.bump(); + base = 2; + num_digits = self.scan_digits(2, 10); + } + 'o' => { + self.bump(); + base = 8; + num_digits = self.scan_digits(8, 10); + } + 'x' => { + self.bump(); + base = 16; + num_digits = self.scan_digits(16, 16); + } '0'...'9' | '_' | '.' => { num_digits = self.scan_digits(10, 10) + 1; } @@ -649,15 +695,19 @@ impl<'a> StringReader<'a> { } if num_digits == 0 { - self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); + self.err_span_(start_bpos, + self.last_pos, + "no valid digits found for number"); return token::Integer(token::intern("0")); } // might be a float, but don't be greedy if this is actually an // integer literal followed by field/method access or a range pattern // (`0..2` and `12.foo()`) - if self.curr_is('.') && !self.nextch_is('.') && !self.nextch().unwrap_or('\0') - .is_xid_start() { + if self.curr_is('.') && !self.nextch_is('.') && + !self.nextch() + .unwrap_or('\0') + .is_xid_start() { // might have stuff after the ., and if it does, it needs to start // with a number self.bump(); @@ -683,11 +733,7 @@ impl<'a> StringReader<'a> { /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an /// error if too many or too few digits are encountered. - fn scan_hex_digits(&mut self, - n_digits: usize, - delim: char, - below_0x7f_only: bool) - -> bool { + fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool { debug!("scanning {} digits until {:?}", n_digits, delim); let start_bpos = self.last_pos; let mut accum_int = 0; @@ -702,15 +748,19 @@ impl<'a> StringReader<'a> { } if self.curr_is(delim) { let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "numeric character escape is too short"); + self.err_span_(start_bpos, + last_bpos, + "numeric character escape is too short"); valid = false; break; } let c = self.curr.unwrap_or('\x00'); accum_int *= 16; accum_int += c.to_digit(16).unwrap_or_else(|| { - self.err_span_char(self.last_pos, self.pos, - "invalid character in numeric character escape", c); + self.err_span_char(self.last_pos, + self.pos, + "invalid character in numeric character escape", + c); valid = false; 0 @@ -721,8 +771,8 @@ impl<'a> StringReader<'a> { if below_0x7f_only && accum_int >= 0x80 { self.err_span_(start_bpos, self.last_pos, - "this form of character escape may only be used \ - with characters in the range [\\x00-\\x7f]"); + "this form of character escape may only be used with characters in \ + the range [\\x00-\\x7f]"); valid = false; } @@ -741,8 +791,12 @@ impl<'a> StringReader<'a> { /// `start` is the position of `first_source_char`, which is already consumed. /// /// Returns true if there was a valid char/byte, false otherwise. - fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char, - ascii_only: bool, delim: char) -> bool { + fn scan_char_or_byte(&mut self, + start: BytePos, + first_source_char: char, + ascii_only: bool, + delim: char) + -> bool { match first_source_char { '\\' => { // '\X' for some X must be a character constant: @@ -750,7 +804,7 @@ impl<'a> StringReader<'a> { let escaped_pos = self.last_pos; self.bump(); match escaped { - None => {}, // EOF here is an error that will be checked later. + None => {} // EOF here is an error that will be checked later. Some(e) => { return match e { 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, @@ -760,18 +814,19 @@ impl<'a> StringReader<'a> { self.scan_unicode_escape(delim) && !ascii_only } else { let span = codemap::mk_sp(start, self.last_pos); - self.span_diagnostic.struct_span_err(span, - "incorrect unicode escape sequence") + self.span_diagnostic + .struct_span_err(span, "incorrect unicode escape sequence") .span_help(span, - "format of unicode escape sequences is `\\u{…}`") + "format of unicode escape sequences is \ + `\\u{…}`") .emit(); false }; if ascii_only { - self.err_span_(start, self.last_pos, - "unicode escape sequences cannot be used as a byte or in \ - a byte string" - ); + self.err_span_(start, + self.last_pos, + "unicode escape sequences cannot be used as a \ + byte or in a byte string"); } valid @@ -779,27 +834,32 @@ impl<'a> StringReader<'a> { '\n' if delim == '"' => { self.consume_whitespace(); true - }, + } '\r' if delim == '"' && self.curr_is('\n') => { self.consume_whitespace(); true } c => { let last_pos = self.last_pos; - let mut err = self.struct_err_span_char( - escaped_pos, last_pos, - if ascii_only { "unknown byte escape" } - else { "unknown character escape" }, - c); + let mut err = self.struct_err_span_char(escaped_pos, + last_pos, + if ascii_only { + "unknown byte escape" + } else { + "unknown character \ + escape" + }, + c); if e == '\r' { err.span_help(codemap::mk_sp(escaped_pos, last_pos), - "this is an isolated carriage return; consider checking \ - your editor and version control settings"); + "this is an isolated carriage return; consider \ + checking your editor and version control \ + settings"); } if (e == '{' || e == '}') && !ascii_only { err.span_help(codemap::mk_sp(escaped_pos, last_pos), - "if used in a formatting string, \ - curly braces are escaped with `{{` and `}}`"); + "if used in a formatting string, curly braces \ + are escaped with `{{` and `}}`"); } err.emit(); false @@ -810,11 +870,14 @@ impl<'a> StringReader<'a> { } '\t' | '\n' | '\r' | '\'' if delim == '\'' => { let last_pos = self.last_pos; - self.err_span_char( - start, last_pos, - if ascii_only { "byte constant must be escaped" } - else { "character constant must be escaped" }, - first_source_char); + self.err_span_char(start, + last_pos, + if ascii_only { + "byte constant must be escaped" + } else { + "character constant must be escaped" + }, + first_source_char); return false; } '\r' => { @@ -822,18 +885,22 @@ impl<'a> StringReader<'a> { self.bump(); return true; } else { - self.err_span_(start, self.last_pos, + self.err_span_(start, + self.last_pos, "bare CR not allowed in string, use \\r instead"); return false; } } - _ => if ascii_only && first_source_char > '\x7F' { - let last_pos = self.last_pos; - self.err_span_char( - start, last_pos, - "byte constant must be ASCII. \ - Use a \\xHH escape for a non-ASCII byte", first_source_char); - return false; + _ => { + if ascii_only && first_source_char > '\x7F' { + let last_pos = self.last_pos; + self.err_span_char(start, + last_pos, + "byte constant must be ASCII. Use a \\xHH escape for a \ + non-ASCII byte", + first_source_char); + return false; + } } } true @@ -854,18 +921,22 @@ impl<'a> StringReader<'a> { let c = match self.curr { Some(c) => c, None => { - panic!(self.fatal_span_(start_bpos, self.last_pos, + panic!(self.fatal_span_(start_bpos, + self.last_pos, "unterminated unicode escape (found EOF)")); } }; accum_int *= 16; accum_int += c.to_digit(16).unwrap_or_else(|| { if c == delim { - panic!(self.fatal_span_(self.last_pos, self.pos, + panic!(self.fatal_span_(self.last_pos, + self.pos, "unterminated unicode escape (needed a `}`)")); } else { - self.err_span_char(self.last_pos, self.pos, - "invalid character in unicode escape", c); + self.err_span_char(self.last_pos, + self.pos, + "invalid character in unicode escape", + c); } valid = false; 0 @@ -875,13 +946,16 @@ impl<'a> StringReader<'a> { } if count > 6 { - self.err_span_(start_bpos, self.last_pos, - "overlong unicode escape (can have at most 6 hex digits)"); + self.err_span_(start_bpos, + self.last_pos, + "overlong unicode escape (can have at most 6 hex digits)"); valid = false; } if valid && (char::from_u32(accum_int).is_none() || count == 0) { - self.err_span_(start_bpos, self.last_pos, "invalid unicode character escape"); + self.err_span_(start_bpos, + self.last_pos, + "invalid unicode character escape"); valid = false; } @@ -897,7 +971,9 @@ impl<'a> StringReader<'a> { self.bump(); } if self.scan_digits(10, 10) == 0 { - self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent") + self.err_span_(self.last_pos, + self.pos, + "expected at least one digit in exponent") } } } @@ -906,11 +982,22 @@ impl<'a> StringReader<'a> { /// error if it isn't. fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) { match base { - 16 => self.err_span_(start_bpos, last_bpos, "hexadecimal float literal is not \ - supported"), - 8 => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"), - 2 => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"), - _ => () + 16 => { + self.err_span_(start_bpos, + last_bpos, + "hexadecimal float literal is not supported") + } + 8 => { + self.err_span_(start_bpos, + last_bpos, + "octal float literal is not supported") + } + 2 => { + self.err_span_(start_bpos, + last_bpos, + "binary float literal is not supported") + } + _ => (), } } @@ -928,14 +1015,18 @@ impl<'a> StringReader<'a> { /// token, and updates the interner fn next_token_inner(&mut self) -> token::Token { let c = self.curr; - if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) { + if ident_start(c) && + match (c.unwrap(), self.nextch(), self.nextnextch()) { // Note: r as in r" or r#" is part of a raw string literal, // b as in b' is part of a byte literal. // They are not identifiers, and are handled further down. - ('r', Some('"'), _) | ('r', Some('#'), _) | - ('b', Some('"'), _) | ('b', Some('\''), _) | - ('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false, - _ => true + ('r', Some('"'), _) | + ('r', Some('#'), _) | + ('b', Some('"'), _) | + ('b', Some('\''), _) | + ('b', Some('r'), Some('"')) | + ('b', Some('r'), Some('#')) => false, + _ => true, } { let start = self.last_pos; while ident_continue(self.curr) { @@ -960,299 +1051,393 @@ impl<'a> StringReader<'a> { let num = self.scan_number(c.unwrap()); let suffix = self.scan_optional_raw_name(); debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix); - return token::Literal(num, suffix) + return token::Literal(num, suffix); } match c.expect("next_token_inner called at EOF") { - // One-byte tokens. - ';' => { self.bump(); return token::Semi; } - ',' => { self.bump(); return token::Comma; } - '.' => { - self.bump(); - return if self.curr_is('.') { - self.bump(); - if self.curr_is('.') { - self.bump(); - token::DotDotDot - } else { - token::DotDot - } - } else { - token::Dot - }; - } - '(' => { self.bump(); return token::OpenDelim(token::Paren); } - ')' => { self.bump(); return token::CloseDelim(token::Paren); } - '{' => { self.bump(); return token::OpenDelim(token::Brace); } - '}' => { self.bump(); return token::CloseDelim(token::Brace); } - '[' => { self.bump(); return token::OpenDelim(token::Bracket); } - ']' => { self.bump(); return token::CloseDelim(token::Bracket); } - '@' => { self.bump(); return token::At; } - '#' => { self.bump(); return token::Pound; } - '~' => { self.bump(); return token::Tilde; } - '?' => { self.bump(); return token::Question; } - ':' => { - self.bump(); - if self.curr_is(':') { + // One-byte tokens. + ';' => { self.bump(); - return token::ModSep; - } else { - return token::Colon; + return token::Semi; } - } - - '$' => { self.bump(); return token::Dollar; } - - // Multi-byte tokens. - '=' => { - self.bump(); - if self.curr_is('=') { + ',' => { self.bump(); - return token::EqEq; - } else if self.curr_is('>') { - self.bump(); - return token::FatArrow; - } else { - return token::Eq; + return token::Comma; } - } - '!' => { - self.bump(); - if self.curr_is('=') { + '.' => { self.bump(); - return token::Ne; - } else { return token::Not; } - } - '<' => { - self.bump(); - match self.curr.unwrap_or('\x00') { - '=' => { self.bump(); return token::Le; } - '<' => { return self.binop(token::Shl); } - '-' => { + return if self.curr_is('.') { + self.bump(); + if self.curr_is('.') { + self.bump(); + token::DotDotDot + } else { + token::DotDot + } + } else { + token::Dot + }; + } + '(' => { + self.bump(); + return token::OpenDelim(token::Paren); + } + ')' => { + self.bump(); + return token::CloseDelim(token::Paren); + } + '{' => { + self.bump(); + return token::OpenDelim(token::Brace); + } + '}' => { + self.bump(); + return token::CloseDelim(token::Brace); + } + '[' => { + self.bump(); + return token::OpenDelim(token::Bracket); + } + ']' => { + self.bump(); + return token::CloseDelim(token::Bracket); + } + '@' => { + self.bump(); + return token::At; + } + '#' => { + self.bump(); + return token::Pound; + } + '~' => { + self.bump(); + return token::Tilde; + } + '?' => { + self.bump(); + return token::Question; + } + ':' => { + self.bump(); + if self.curr_is(':') { + self.bump(); + return token::ModSep; + } else { + return token::Colon; + } + } + + '$' => { + self.bump(); + return token::Dollar; + } + + // Multi-byte tokens. + '=' => { + self.bump(); + if self.curr_is('=') { + self.bump(); + return token::EqEq; + } else if self.curr_is('>') { + self.bump(); + return token::FatArrow; + } else { + return token::Eq; + } + } + '!' => { + self.bump(); + if self.curr_is('=') { + self.bump(); + return token::Ne; + } else { + return token::Not; + } + } + '<' => { self.bump(); match self.curr.unwrap_or('\x00') { - _ => { return token::LArrow; } + '=' => { + self.bump(); + return token::Le; + } + '<' => { + return self.binop(token::Shl); + } + '-' => { + self.bump(); + match self.curr.unwrap_or('\x00') { + _ => { + return token::LArrow; + } + } + } + _ => { + return token::Lt; + } } - } - _ => { return token::Lt; } } - } - '>' => { - self.bump(); - match self.curr.unwrap_or('\x00') { - '=' => { self.bump(); return token::Ge; } - '>' => { return self.binop(token::Shr); } - _ => { return token::Gt; } - } - } - '\'' => { - // Either a character constant 'a' OR a lifetime name 'abc - self.bump(); - let start = self.last_pos; - - // the eof will be picked up by the final `'` check below - let c2 = self.curr.unwrap_or('\x00'); - self.bump(); - - // If the character is an ident start not followed by another single - // quote, then this is a lifetime name: - if ident_start(Some(c2)) && !self.curr_is('\'') { - while ident_continue(self.curr) { - self.bump(); + '>' => { + self.bump(); + match self.curr.unwrap_or('\x00') { + '=' => { + self.bump(); + return token::Ge; + } + '>' => { + return self.binop(token::Shr); + } + _ => { + return token::Gt; + } } + } + '\'' => { + // Either a character constant 'a' OR a lifetime name 'abc + self.bump(); + let start = self.last_pos; - // Include the leading `'` in the real identifier, for macro - // expansion purposes. See #12512 for the gory details of why - // this is necessary. - let ident = self.with_str_from(start, |lifetime_name| { - str_to_ident(&format!("'{}", lifetime_name)) - }); + // the eof will be picked up by the final `'` check below + let c2 = self.curr.unwrap_or('\x00'); + self.bump(); - // Conjure up a "keyword checking ident" to make sure that - // the lifetime name is not a keyword. - let keyword_checking_ident = - self.with_str_from(start, |lifetime_name| { + // If the character is an ident start not followed by another single + // quote, then this is a lifetime name: + if ident_start(Some(c2)) && !self.curr_is('\'') { + while ident_continue(self.curr) { + self.bump(); + } + + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let ident = self.with_str_from(start, |lifetime_name| { + str_to_ident(&format!("'{}", lifetime_name)) + }); + + // Conjure up a "keyword checking ident" to make sure that + // the lifetime name is not a keyword. + let keyword_checking_ident = self.with_str_from(start, |lifetime_name| { str_to_ident(lifetime_name) }); - let keyword_checking_token = - &token::Ident(keyword_checking_ident, token::Plain); - let last_bpos = self.last_pos; - if keyword_checking_token.is_keyword(token::keywords::SelfValue) { - self.err_span_(start, - last_bpos, - "invalid lifetime name: 'self \ - is no longer a special lifetime"); - } else if keyword_checking_token.is_any_keyword() && - !keyword_checking_token.is_keyword(token::keywords::Static) - { - self.err_span_(start, - last_bpos, - "invalid lifetime name"); - } - return token::Lifetime(ident); - } - - // Otherwise it is a character constant: - let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\''); - if !self.curr_is('\'') { - let last_bpos = self.last_pos; - panic!(self.fatal_span_verbose( - // Byte offsetting here is okay because the - // character before position `start` is an - // ascii single quote. - start - BytePos(1), last_bpos, - - String::from("character literal may only contain one codepoint"))); - } - let id = if valid { self.name_from(start) } else { token::intern("0") }; - self.bump(); // advance curr past token - let suffix = self.scan_optional_raw_name(); - return token::Literal(token::Char(id), suffix); - } - 'b' => { - self.bump(); - let lit = match self.curr { - Some('\'') => self.scan_byte(), - Some('"') => self.scan_byte_string(), - Some('r') => self.scan_raw_byte_string(), - _ => unreachable!() // Should have been a token::Ident above. - }; - let suffix = self.scan_optional_raw_name(); - return token::Literal(lit, suffix); - } - '"' => { - let start_bpos = self.last_pos; - let mut valid = true; - self.bump(); - while !self.curr_is('"') { - if self.is_eof() { + let keyword_checking_token = &token::Ident(keyword_checking_ident, + token::Plain); let last_bpos = self.last_pos; - panic!(self.fatal_span_(start_bpos, - last_bpos, - "unterminated double quote string")); + if keyword_checking_token.is_keyword(token::keywords::SelfValue) { + self.err_span_(start, + last_bpos, + "invalid lifetime name: 'self is no longer a special \ + lifetime"); + } else if keyword_checking_token.is_any_keyword() && + !keyword_checking_token.is_keyword(token::keywords::Static) { + self.err_span_(start, last_bpos, "invalid lifetime name"); + } + return token::Lifetime(ident); } - let ch_start = self.last_pos; - let ch = self.curr.unwrap(); - self.bump(); - valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"'); - } - // adjust for the ASCII " at the start of the literal - let id = if valid { self.name_from(start_bpos + BytePos(1)) } - else { token::intern("??") }; - self.bump(); - let suffix = self.scan_optional_raw_name(); - return token::Literal(token::Str_(id), suffix); - } - 'r' => { - let start_bpos = self.last_pos; - self.bump(); - let mut hash_count = 0; - while self.curr_is('#') { - self.bump(); - hash_count += 1; - } + // Otherwise it is a character constant: + let valid = self.scan_char_or_byte(start, + c2, + // ascii_only = + false, + '\''); + if !self.curr_is('\'') { + let last_bpos = self.last_pos; + panic!(self.fatal_span_verbose(// Byte offsetting here is okay because the + // character before position `start` is an + // ascii single quote. + start - BytePos(1), + last_bpos, - if self.is_eof() { - let last_bpos = self.last_pos; - panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string")); - } else if !self.curr_is('"') { - let last_bpos = self.last_pos; - let curr_char = self.curr.unwrap(); - panic!(self.fatal_span_char(start_bpos, last_bpos, - "found invalid character; \ - only `#` is allowed in raw string delimitation", - curr_char)); + String::from("character literal may only \ + contain one codepoint"))); + } + let id = if valid { + self.name_from(start) + } else { + token::intern("0") + }; + self.bump(); // advance curr past token + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::Char(id), suffix); } - self.bump(); - let content_start_bpos = self.last_pos; - let mut content_end_bpos; - let mut valid = true; - 'outer: loop { + 'b' => { + self.bump(); + let lit = match self.curr { + Some('\'') => self.scan_byte(), + Some('"') => self.scan_byte_string(), + Some('r') => self.scan_raw_byte_string(), + _ => unreachable!(), // Should have been a token::Ident above. + }; + let suffix = self.scan_optional_raw_name(); + return token::Literal(lit, suffix); + } + '"' => { + let start_bpos = self.last_pos; + let mut valid = true; + self.bump(); + while !self.curr_is('"') { + if self.is_eof() { + let last_bpos = self.last_pos; + panic!(self.fatal_span_(start_bpos, + last_bpos, + "unterminated double quote string")); + } + + let ch_start = self.last_pos; + let ch = self.curr.unwrap(); + self.bump(); + valid &= self.scan_char_or_byte(ch_start, + ch, + // ascii_only = + false, + '"'); + } + // adjust for the ASCII " at the start of the literal + let id = if valid { + self.name_from(start_bpos + BytePos(1)) + } else { + token::intern("??") + }; + self.bump(); + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::Str_(id), suffix); + } + 'r' => { + let start_bpos = self.last_pos; + self.bump(); + let mut hash_count = 0; + while self.curr_is('#') { + self.bump(); + hash_count += 1; + } + if self.is_eof() { let last_bpos = self.last_pos; panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string")); + } else if !self.curr_is('"') { + let last_bpos = self.last_pos; + let curr_char = self.curr.unwrap(); + panic!(self.fatal_span_char(start_bpos, + last_bpos, + "found invalid character; only `#` is allowed \ + in raw string delimitation", + curr_char)); } - //if self.curr_is('"') { - //content_end_bpos = self.last_pos; - //for _ in 0..hash_count { - //self.bump(); - //if !self.curr_is('#') { - //continue 'outer; - let c = self.curr.unwrap(); - match c { - '"' => { - content_end_bpos = self.last_pos; - for _ in 0..hash_count { - self.bump(); - if !self.curr_is('#') { - continue 'outer; + self.bump(); + let content_start_bpos = self.last_pos; + let mut content_end_bpos; + let mut valid = true; + 'outer: loop { + if self.is_eof() { + let last_bpos = self.last_pos; + panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string")); + } + // if self.curr_is('"') { + // content_end_bpos = self.last_pos; + // for _ in 0..hash_count { + // self.bump(); + // if !self.curr_is('#') { + // continue 'outer; + let c = self.curr.unwrap(); + match c { + '"' => { + content_end_bpos = self.last_pos; + for _ in 0..hash_count { + self.bump(); + if !self.curr_is('#') { + continue 'outer; + } + } + break; + } + '\r' => { + if !self.nextch_is('\n') { + let last_bpos = self.last_pos; + self.err_span_(start_bpos, + last_bpos, + "bare CR not allowed in raw string, use \\r \ + instead"); + valid = false; } } - break; - }, - '\r' => { - if !self.nextch_is('\n') { - let last_bpos = self.last_pos; - self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \ - string, use \\r instead"); - valid = false; - } + _ => (), } - _ => () + self.bump(); } self.bump(); + let id = if valid { + self.name_from_to(content_start_bpos, content_end_bpos) + } else { + token::intern("??") + }; + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::StrRaw(id, hash_count), suffix); } - self.bump(); - let id = if valid { - self.name_from_to(content_start_bpos, content_end_bpos) - } else { - token::intern("??") - }; - let suffix = self.scan_optional_raw_name(); - return token::Literal(token::StrRaw(id, hash_count), suffix); - } - '-' => { - if self.nextch_is('>') { - self.bump(); - self.bump(); - return token::RArrow; - } else { return self.binop(token::Minus); } - } - '&' => { - if self.nextch_is('&') { - self.bump(); - self.bump(); - return token::AndAnd; - } else { return self.binop(token::And); } - } - '|' => { - match self.nextch() { - Some('|') => { self.bump(); self.bump(); return token::OrOr; } - _ => { return self.binop(token::Or); } + '-' => { + if self.nextch_is('>') { + self.bump(); + self.bump(); + return token::RArrow; + } else { + return self.binop(token::Minus); + } + } + '&' => { + if self.nextch_is('&') { + self.bump(); + self.bump(); + return token::AndAnd; + } else { + return self.binop(token::And); + } + } + '|' => { + match self.nextch() { + Some('|') => { + self.bump(); + self.bump(); + return token::OrOr; + } + _ => { + return self.binop(token::Or); + } + } + } + '+' => { + return self.binop(token::Plus); + } + '*' => { + return self.binop(token::Star); + } + '/' => { + return self.binop(token::Slash); + } + '^' => { + return self.binop(token::Caret); + } + '%' => { + return self.binop(token::Percent); + } + c => { + let last_bpos = self.last_pos; + let bpos = self.pos; + let mut err = self.struct_fatal_span_char(last_bpos, + bpos, + "unknown start of token", + c); + unicode_chars::check_for_substitution(&self, c, &mut err); + err.emit(); + panic!(FatalError); } - } - '+' => { return self.binop(token::Plus); } - '*' => { return self.binop(token::Star); } - '/' => { return self.binop(token::Slash); } - '^' => { return self.binop(token::Caret); } - '%' => { return self.binop(token::Percent); } - c => { - let last_bpos = self.last_pos; - let bpos = self.pos; - let mut err = self.struct_fatal_span_char(last_bpos, - bpos, - "unknown start of token", - c); - unicode_chars::check_for_substitution(&self, c, &mut err); - err.emit(); - panic!(FatalError); - } } } fn consume_whitespace(&mut self) { - while is_whitespace(self.curr) && !self.is_eof() { self.bump(); } + while is_whitespace(self.curr) && !self.is_eof() { + self.bump(); + } } fn read_to_eol(&mut self) -> String { @@ -1261,14 +1446,16 @@ impl<'a> StringReader<'a> { val.push(self.curr.unwrap()); self.bump(); } - if self.curr_is('\n') { self.bump(); } - return val + if self.curr_is('\n') { + self.bump(); + } + return val; } fn read_one_line_comment(&mut self) -> String { let val = self.read_to_eol(); - assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') - || (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!')); + assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') || + (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!')); return val; } @@ -1279,10 +1466,8 @@ impl<'a> StringReader<'a> { } fn peeking_at_comment(&self) -> bool { - (self.curr_is('/') && self.nextch_is('/')) - || (self.curr_is('/') && self.nextch_is('*')) - // consider shebangs comments, but not inner attributes - || (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) + (self.curr_is('/') && self.nextch_is('/')) || (self.curr_is('/') && self.nextch_is('*')) || + (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) } fn scan_byte(&mut self) -> token::Lit { @@ -1293,18 +1478,26 @@ impl<'a> StringReader<'a> { let c2 = self.curr.unwrap_or('\x00'); self.bump(); - let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\''); + let valid = self.scan_char_or_byte(start, + c2, + // ascii_only = + true, + '\''); if !self.curr_is('\'') { // Byte offsetting here is okay because the // character before position `start` are an // ascii single quote and ascii 'b'. let last_pos = self.last_pos; - panic!(self.fatal_span_verbose( - start - BytePos(2), last_pos, - "unterminated byte constant".to_string())); + panic!(self.fatal_span_verbose(start - BytePos(2), + last_pos, + "unterminated byte constant".to_string())); } - let id = if valid { self.name_from(start) } else { token::intern("?") }; + let id = if valid { + self.name_from(start) + } else { + token::intern("?") + }; self.bump(); // advance curr past token return token::Byte(id); } @@ -1327,9 +1520,17 @@ impl<'a> StringReader<'a> { let ch_start = self.last_pos; let ch = self.curr.unwrap(); self.bump(); - valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"'); + valid &= self.scan_char_or_byte(ch_start, + ch, + // ascii_only = + true, + '"'); } - let id = if valid { self.name_from(start) } else { token::intern("??") }; + let id = if valid { + self.name_from(start) + } else { + token::intern("??") + }; self.bump(); return token::ByteStr(id); } @@ -1349,10 +1550,11 @@ impl<'a> StringReader<'a> { } else if !self.curr_is('"') { let last_pos = self.last_pos; let ch = self.curr.unwrap(); - panic!(self.fatal_span_char(start_bpos, last_pos, - "found invalid character; \ - only `#` is allowed in raw string delimitation", - ch)); + panic!(self.fatal_span_char(start_bpos, + last_pos, + "found invalid character; only `#` is allowed in raw \ + string delimitation", + ch)); } self.bump(); let content_start_bpos = self.last_pos; @@ -1362,7 +1564,7 @@ impl<'a> StringReader<'a> { None => { let last_pos = self.last_pos; panic!(self.fatal_span_(start_bpos, last_pos, "unterminated raw string")) - }, + } Some('"') => { content_end_bpos = self.last_pos; for _ in 0..hash_count { @@ -1372,70 +1574,71 @@ impl<'a> StringReader<'a> { } } break; - }, - Some(c) => if c > '\x7F' { - let last_pos = self.last_pos; - self.err_span_char( - last_pos, last_pos, "raw byte string must be ASCII", c); + } + Some(c) => { + if c > '\x7F' { + let last_pos = self.last_pos; + self.err_span_char(last_pos, last_pos, "raw byte string must be ASCII", c); + } } } self.bump(); } self.bump(); - return token::ByteStrRaw(self.name_from_to(content_start_bpos, - content_end_bpos), - hash_count); + return token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos), + hash_count); } } pub fn is_whitespace(c: Option) -> bool { match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace ' ' | '\n' | '\t' | '\r' => true, - _ => false + _ => false, } } fn in_range(c: Option, lo: char, hi: char) -> bool { match c { Some(c) => lo <= c && c <= hi, - _ => false + _ => false, } } -fn is_dec_digit(c: Option) -> bool { return in_range(c, '0', '9'); } +fn is_dec_digit(c: Option) -> bool { + return in_range(c, '0', '9'); +} pub fn is_doc_comment(s: &str) -> bool { - let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') - || s.starts_with("//!"); + let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') || + s.starts_with("//!"); debug!("is {:?} a doc comment? {}", s, res); res } pub fn is_block_doc_comment(s: &str) -> bool { - let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') - || s.starts_with("/*!")) - && s.len() >= 5; // Prevent `/**/` from being parsed as a doc comment + let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') || + s.starts_with("/*!")) && s.len() >= 5; // Prevent `/**/` from being parsed as a doc comment debug!("is {:?} a doc comment? {}", s, res); res } fn ident_start(c: Option) -> bool { - let c = match c { Some(c) => c, None => return false }; + let c = match c { + Some(c) => c, + None => return false, + }; - (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || c == '_' - || (c > '\x7f' && c.is_xid_start()) + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start()) } fn ident_continue(c: Option) -> bool { - let c = match c { Some(c) => c, None => return false }; + let c = match c { + Some(c) => c, + None => return false, + }; - (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || (c >= '0' && c <= '9') - || c == '_' - || (c > '\x7f' && c.is_xid_continue()) + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || + (c > '\x7f' && c.is_xid_continue()) } #[cfg(test)] @@ -1445,7 +1648,7 @@ mod tests { use codemap::{BytePos, CodeMap, Span, NO_EXPANSION}; use errors; use parse::token; - use parse::token::{str_to_ident}; + use parse::token::str_to_ident; use std::io; use std::rc::Rc; @@ -1458,41 +1661,54 @@ mod tests { // open a string reader for the given string fn setup<'a>(cm: &CodeMap, span_handler: &'a errors::Handler, - teststr: String) -> StringReader<'a> { + teststr: String) + -> StringReader<'a> { let fm = cm.new_filemap("zebra.rs".to_string(), teststr); StringReader::new(span_handler, fm) } - #[test] fn t1 () { + #[test] + fn t1() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); - let mut string_reader = setup(&cm, &sh, - "/* my source file */ \ - fn main() { println!(\"zebra\"); }\n".to_string()); + let mut string_reader = setup(&cm, + &sh, + "/* my source file */ fn main() { println!(\"zebra\"); }\n" + .to_string()); let id = str_to_ident("fn"); assert_eq!(string_reader.next_token().tok, token::Comment); assert_eq!(string_reader.next_token().tok, token::Whitespace); let tok1 = string_reader.next_token(); - let tok2 = TokenAndSpan{ - tok:token::Ident(id, token::Plain), - sp:Span {lo:BytePos(21),hi:BytePos(23),expn_id: NO_EXPANSION}}; - assert_eq!(tok1,tok2); + let tok2 = TokenAndSpan { + tok: token::Ident(id, token::Plain), + sp: Span { + lo: BytePos(21), + hi: BytePos(23), + expn_id: NO_EXPANSION, + }, + }; + assert_eq!(tok1, tok2); assert_eq!(string_reader.next_token().tok, token::Whitespace); // the 'main' id is already read: assert_eq!(string_reader.last_pos.clone(), BytePos(28)); // read another token: let tok3 = string_reader.next_token(); - let tok4 = TokenAndSpan{ - tok:token::Ident(str_to_ident("main"), token::Plain), - sp:Span {lo:BytePos(24),hi:BytePos(28),expn_id: NO_EXPANSION}}; - assert_eq!(tok3,tok4); + let tok4 = TokenAndSpan { + tok: token::Ident(str_to_ident("main"), token::Plain), + sp: Span { + lo: BytePos(24), + hi: BytePos(28), + expn_id: NO_EXPANSION, + }, + }; + assert_eq!(tok3, tok4); // the lparen is already read: assert_eq!(string_reader.last_pos.clone(), BytePos(29)) } // check that the given reader produces the desired stream // of tokens (stop checking after exhausting the expected vec) - fn check_tokenization (mut string_reader: StringReader, expected: Vec ) { + fn check_tokenization(mut string_reader: StringReader, expected: Vec) { for expected_tok in &expected { assert_eq!(&string_reader.next_token().tok, expected_tok); } @@ -1503,7 +1719,8 @@ mod tests { token::Ident(str_to_ident(id), style) } - #[test] fn doublecolonparsing () { + #[test] + fn doublecolonparsing() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); check_tokenization(setup(&cm, &sh, "a b".to_string()), @@ -1512,16 +1729,18 @@ mod tests { mk_ident("b", token::Plain)]); } - #[test] fn dcparsing_2 () { + #[test] + fn dcparsing_2() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); check_tokenization(setup(&cm, &sh, "a::b".to_string()), - vec![mk_ident("a",token::ModName), + vec![mk_ident("a", token::ModName), token::ModSep, mk_ident("b", token::Plain)]); } - #[test] fn dcparsing_3 () { + #[test] + fn dcparsing_3() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); check_tokenization(setup(&cm, &sh, "a ::b".to_string()), @@ -1531,54 +1750,61 @@ mod tests { mk_ident("b", token::Plain)]); } - #[test] fn dcparsing_4 () { + #[test] + fn dcparsing_4() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); check_tokenization(setup(&cm, &sh, "a:: b".to_string()), - vec![mk_ident("a",token::ModName), + vec![mk_ident("a", token::ModName), token::ModSep, token::Whitespace, mk_ident("b", token::Plain)]); } - #[test] fn character_a() { + #[test] + fn character_a() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok, token::Literal(token::Char(token::intern("a")), None)); } - #[test] fn character_space() { + #[test] + fn character_space() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok, token::Literal(token::Char(token::intern(" ")), None)); } - #[test] fn character_escaped() { + #[test] + fn character_escaped() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok, token::Literal(token::Char(token::intern("\\n")), None)); } - #[test] fn lifetime_name() { + #[test] + fn lifetime_name() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok, token::Lifetime(token::str_to_ident("'abc"))); } - #[test] fn raw_string() { + #[test] + fn raw_string() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); - assert_eq!(setup(&cm, &sh, - "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token() - .tok, + assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()) + .next_token() + .tok, token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None)); } - #[test] fn literal_suffixes() { + #[test] + fn literal_suffixes() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); macro_rules! test { @@ -1614,24 +1840,28 @@ mod tests { Some(token::intern("suffix")))); } - #[test] fn line_doc_comments() { + #[test] + fn line_doc_comments() { assert!(is_doc_comment("///")); assert!(is_doc_comment("/// blah")); assert!(!is_doc_comment("////")); } - #[test] fn nested_block_comments() { + #[test] + fn nested_block_comments() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string()); match lexer.next_token().tok { - token::Comment => { }, - _ => panic!("expected a comment!") + token::Comment => {} + _ => panic!("expected a comment!"), } - assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None)); + assert_eq!(lexer.next_token().tok, + token::Literal(token::Char(token::intern("a")), None)); } - #[test] fn crlf_comments() { + #[test] + fn crlf_comments() { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string()); @@ -1639,6 +1869,7 @@ mod tests { assert_eq!(comment.tok, token::Comment); assert_eq!(comment.sp, ::codemap::mk_sp(BytePos(0), BytePos(7))); assert_eq!(lexer.next_token().tok, token::Whitespace); - assert_eq!(lexer.next_token().tok, token::DocComment(token::intern("/// test"))); + assert_eq!(lexer.next_token().tok, + token::DocComment(token::intern("/// test"))); } } From 4a1062873eedaa5d804e3050cd94b9c32219f811 Mon Sep 17 00:00:00 2001 From: Tshepang Lekhonkhobe Date: Sun, 3 Jan 2016 11:20:06 +0200 Subject: [PATCH 2/4] fix "make tidy" failure --- src/libsyntax/parse/lexer/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 752e20a3d04..9f6aa4d2a78 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1616,8 +1616,9 @@ pub fn is_doc_comment(s: &str) -> bool { } pub fn is_block_doc_comment(s: &str) -> bool { + // Prevent `/**/` from being parsed as a doc comment let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') || - s.starts_with("/*!")) && s.len() >= 5; // Prevent `/**/` from being parsed as a doc comment + s.starts_with("/*!")) && s.len() >= 5; debug!("is {:?} a doc comment? {}", s, res); res } From 249b5c0b4ae2451e7e2987704879e94f8f4e9cfc Mon Sep 17 00:00:00 2001 From: Tshepang Lekhonkhobe Date: Mon, 4 Jan 2016 21:35:06 +0200 Subject: [PATCH 3/4] address review comment --- src/libsyntax/parse/lexer/comments.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 5365870d3fe..e336c98f03c 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -169,11 +169,7 @@ fn read_shebang_comment(rdr: &mut StringReader, let p = rdr.last_pos; debug!("<<< shebang comment"); comments.push(Comment { - style: if code_to_the_left { - Trailing - } else { - Isolated - }, + style: if code_to_the_left { Trailing } else { Isolated }, lines: vec![rdr.read_one_line_comment()], pos: p, }); @@ -198,11 +194,7 @@ fn read_line_comments(rdr: &mut StringReader, debug!("<<< line comments"); if !lines.is_empty() { comments.push(Comment { - style: if code_to_the_left { - Trailing - } else { - Isolated - }, + style: if code_to_the_left { Trailing } else { Isolated }, lines: lines, pos: p, }); From aa3b4c668e00815c5885698887bb2412f408aced Mon Sep 17 00:00:00 2001 From: Tshepang Lekhonkhobe Date: Tue, 12 Jan 2016 20:52:22 +0200 Subject: [PATCH 4/4] re-instate comment that was mysteriously disappeared --- src/libsyntax/parse/lexer/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 9f6aa4d2a78..1402b7888dd 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1467,6 +1467,7 @@ impl<'a> StringReader<'a> { fn peeking_at_comment(&self) -> bool { (self.curr_is('/') && self.nextch_is('/')) || (self.curr_is('/') && self.nextch_is('*')) || + // consider shebangs comments, but not inner attributes (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) }