From 7eea67605a92864530600bde3776c80ce08e790e Mon Sep 17 00:00:00 2001 From: Joshua Holmer Date: Sun, 14 Feb 2016 22:40:43 -0500 Subject: [PATCH 1/4] Lint single-character strings as P: Pattern args Fixes #650 --- README.md | 3 +- src/lib.rs | 1 + src/methods.rs | 62 +++++++++++++++++++++++- src/misc_early.rs | 2 +- tests/compile-fail/methods.rs | 91 ++++++++++++++++++++++++++++++++++- 5 files changed, 155 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 483182a68..ae5b42e45 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ A collection of lints to catch common mistakes and improve your Rust code. [Jump to usage instructions](#usage) ##Lints -There are 121 lints included in this crate: +There are 122 lints included in this crate: name | default | meaning ---------------------------------------------------------------------------------------------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ @@ -104,6 +104,7 @@ name [shadow_same](https://github.com/Manishearth/rust-clippy/wiki#shadow_same) | allow | rebinding a name to itself, e.g. `let mut x = &mut x` [shadow_unrelated](https://github.com/Manishearth/rust-clippy/wiki#shadow_unrelated) | allow | The name is re-bound without even using the original value [should_implement_trait](https://github.com/Manishearth/rust-clippy/wiki#should_implement_trait) | warn | defining a method that should be implementing a std trait +[single_char_pattern](https://github.com/Manishearth/rust-clippy/wiki#single_char_pattern) | warn | using a single-character str where a char could be used, e.g. `_.split("x")` [single_match](https://github.com/Manishearth/rust-clippy/wiki#single_match) | warn | a match statement with a single nontrivial arm (i.e, where the other arm is `_ => {}`) is used; recommends `if let` instead [single_match_else](https://github.com/Manishearth/rust-clippy/wiki#single_match_else) | allow | a match statement with a two arms where the second arm's pattern is a wildcard; recommends `if let` instead [str_to_string](https://github.com/Manishearth/rust-clippy/wiki#str_to_string) | warn | using `to_string()` on a str, which should be `to_owned()` diff --git a/src/lib.rs b/src/lib.rs index 8d29d6ab6..7233eab29 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -241,6 +241,7 @@ pub fn plugin_registrar(reg: &mut Registry) { methods::OR_FUN_CALL, methods::SEARCH_IS_SOME, methods::SHOULD_IMPLEMENT_TRAIT, + methods::SINGLE_CHAR_PATTERN, methods::STR_TO_STRING, methods::STRING_TO_STRING, methods::WRONG_SELF_CONVENTION, diff --git a/src/methods.rs b/src/methods.rs index ed3f61e4a..c8c3160cc 100644 --- a/src/methods.rs +++ b/src/methods.rs @@ -1,4 +1,6 @@ use rustc::lint::*; +use rustc::middle::const_eval::{ConstVal, eval_const_expr_partial}; +use rustc::middle::const_eval::EvalHint::ExprTypeChecked; use rustc::middle::subst::{Subst, TypeSpace}; use rustc::middle::ty; use rustc_front::hir::*; @@ -295,6 +297,20 @@ declare_lint! { pub NEW_RET_NO_SELF, Warn, "not returning `Self` in a `new` method" } +/// **What it does:** This lint checks for string methods that receive a single-character `str` as an argument, e.g. `_.split("x")` +/// +/// **Why is this bad?** Performing these methods using a `str` may be slower than using a `char` +/// +/// **Known problems:** Does not catch multi-byte unicode characters +/// +/// **Example:** `_.split("x")` could be `_.split('x')` +declare_lint! { + pub SINGLE_CHAR_PATTERN, + Warn, + "using a single-character str where a char could be used, e.g. \ + `_.split(\"x\")`" +} + impl LintPass for MethodsPass { fn get_lints(&self) -> LintArray { lint_array!(EXTEND_FROM_SLICE, @@ -312,7 +328,8 @@ impl LintPass for MethodsPass { CHARS_NEXT_CMP, CLONE_ON_COPY, CLONE_DOUBLE_REF, - NEW_RET_NO_SELF) + NEW_RET_NO_SELF, + SINGLE_CHAR_PATTERN) } } @@ -351,6 +368,11 @@ impl LateLintPass for MethodsPass { lint_clone_on_copy(cx, expr); lint_clone_double_ref(cx, expr, &args[0]); } + for &(method, pos) in &PATTERN_METHODS { + if name.node.as_str() == method && args.len() > pos { + lint_single_char_pattern(cx, expr, &args[pos]); + } + } } ExprBinary(op, ref lhs, ref rhs) if op.node == BiEq || op.node == BiNe => { if !lint_chars_next(cx, expr, lhs, rhs, op.node == BiEq) { @@ -817,6 +839,22 @@ fn lint_chars_next(cx: &LateContext, expr: &Expr, chain: &Expr, other: &Expr, eq false } +/// lint for length-1 `str`s for methods in `PATTERN_METHODS` +fn lint_single_char_pattern(cx: &LateContext, expr: &Expr, arg: &Expr) { + if let Ok(ConstVal::Str(r)) = eval_const_expr_partial(cx.tcx, arg, ExprTypeChecked, None) { + if r.len() == 1 { + let hint = snippet(cx, expr.span, "..").replace(&format!("\"{}\"", r), &format!("'{}'", r)); + span_lint_and_then(cx, + SINGLE_CHAR_PATTERN, + expr.span, + "single-character string constant used as pattern", + |db| { + db.span_suggestion(expr.span, "try using a char instead:", hint); + }); + } + } +} + /// Given a `Result` type, return its error type (`E`). fn get_error_type<'a>(cx: &LateContext, ty: ty::Ty<'a>) -> Option> { if !match_type(cx, ty, &RESULT_PATH) { @@ -887,6 +925,28 @@ const TRAIT_METHODS: [(&'static str, usize, SelfKind, OutType, &'static str); 30 ("sub", 2, SelfKind::Value, OutType::Any, "std::ops::Sub"), ]; +#[cfg_attr(rustfmt, rustfmt_skip)] +const PATTERN_METHODS: [(&'static str, usize); 17] = [ + ("contains", 1), + ("starts_with", 1), + ("ends_with", 1), + ("find", 1), + ("rfind", 1), + ("split", 1), + ("rsplit", 1), + ("split_terminator", 1), + ("rsplit_terminator", 1), + ("splitn", 2), + ("rsplitn", 2), + ("matches", 1), + ("rmatches", 1), + ("match_indices", 1), + ("rmatch_indices", 1), + ("trim_left_matches", 1), + ("trim_right_matches", 1), +]; + + #[derive(Clone, Copy)] enum SelfKind { Value, diff --git a/src/misc_early.rs b/src/misc_early.rs index 59a0102aa..1999d9118 100644 --- a/src/misc_early.rs +++ b/src/misc_early.rs @@ -104,7 +104,7 @@ impl EarlyLintPass for MiscEarly { if let PatIdent(_, sp_ident, None) = arg.pat.node { let arg_name = sp_ident.node.to_string(); - if arg_name.starts_with("_") { + if arg_name.starts_with('_') { if let Some(correspondance) = registered_names.get(&arg_name[1..]) { span_lint(cx, DUPLICATE_UNDERSCORE_ARGUMENT, diff --git a/tests/compile-fail/methods.rs b/tests/compile-fail/methods.rs index afe056e3d..06dd161ba 100644 --- a/tests/compile-fail/methods.rs +++ b/tests/compile-fail/methods.rs @@ -2,7 +2,7 @@ #![plugin(clippy)] #![deny(clippy, clippy_pedantic)] -#![allow(unused, print_stdout)] +#![allow(unused, print_stdout, non_ascii_literal)] use std::collections::BTreeMap; use std::collections::HashMap; @@ -355,3 +355,92 @@ fn clone_on_double_ref() { //~^^^ERROR using `clone` on a `Copy` type println!("{:p} {:p}",*y, z); } + +fn single_char_pattern() { + let x = "foo"; + x.split("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.split('x'); + + x.split("xx"); + + x.split('x'); + + let y = "x"; + x.split(y); + + // Not yet testing for multi-byte characters + // Changing `r.len() == 1` to `r.chars().count() == 1` in `lint_single_char_pattern` + // should have done this but produced an ICE + x.split("ß"); + x.split("ℝ"); + x.split("💣"); + // Can't use this lint for unicode code points which don't fit in a char + x.split("❤️"); + + x.contains("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.contains('x'); + x.starts_with("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.starts_with('x'); + x.ends_with("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.ends_with('x'); + x.find("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.find('x'); + x.rfind("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.rfind('x'); + x.rsplit("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.rsplit('x'); + x.split_terminator("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.split_terminator('x'); + x.rsplit_terminator("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.rsplit_terminator('x'); + x.splitn(0, "x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.splitn(0, 'x'); + x.rsplitn(0, "x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.rsplitn(0, 'x'); + x.matches("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.matches('x'); + x.rmatches("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.rmatches('x'); + x.match_indices("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.match_indices('x'); + x.rmatch_indices("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.rmatch_indices('x'); + x.trim_left_matches("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.trim_left_matches('x'); + x.trim_right_matches("x"); + //~^ ERROR single-character string constant used as pattern + //~| HELP try using a char instead: + //~| SUGGESTION x.trim_right_matches('x'); +} \ No newline at end of file From 643a223f7123ee832d67732370faffb023ab3ed9 Mon Sep 17 00:00:00 2001 From: Joshua Holmer Date: Mon, 15 Feb 2016 09:10:31 -0500 Subject: [PATCH 2/4] Address nits --- src/methods.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/methods.rs b/src/methods.rs index c8c3160cc..f1aaaf633 100644 --- a/src/methods.rs +++ b/src/methods.rs @@ -297,11 +297,11 @@ declare_lint! { pub NEW_RET_NO_SELF, Warn, "not returning `Self` in a `new` method" } -/// **What it does:** This lint checks for string methods that receive a single-character `str` as an argument, e.g. `_.split("x")` +/// **What it does:** This lint checks for string methods that receive a single-character `str` as an argument, e.g. `_.split("x")`. /// -/// **Why is this bad?** Performing these methods using a `str` may be slower than using a `char` +/// **Why is this bad?** Performing these methods using a `str` may be slower than using a `char`. /// -/// **Known problems:** Does not catch multi-byte unicode characters +/// **Known problems:** Does not catch multi-byte unicode characters. /// /// **Example:** `_.split("x")` could be `_.split('x')` declare_lint! { @@ -846,7 +846,7 @@ fn lint_single_char_pattern(cx: &LateContext, expr: &Expr, arg: &Expr) { let hint = snippet(cx, expr.span, "..").replace(&format!("\"{}\"", r), &format!("'{}'", r)); span_lint_and_then(cx, SINGLE_CHAR_PATTERN, - expr.span, + arg.span, "single-character string constant used as pattern", |db| { db.span_suggestion(expr.span, "try using a char instead:", hint); From c22ded11e57ece41455e18e11eba8f64606c8860 Mon Sep 17 00:00:00 2001 From: Joshua Holmer Date: Mon, 15 Feb 2016 10:32:04 -0500 Subject: [PATCH 3/4] Reword lint documentation char is faster, proven by benchmark. --- src/methods.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods.rs b/src/methods.rs index f1aaaf633..c8d5fa08f 100644 --- a/src/methods.rs +++ b/src/methods.rs @@ -299,7 +299,7 @@ declare_lint! { /// **What it does:** This lint checks for string methods that receive a single-character `str` as an argument, e.g. `_.split("x")`. /// -/// **Why is this bad?** Performing these methods using a `str` may be slower than using a `char`. +/// **Why is this bad?** Performing these methods using a `char` is faster than using a `str`. /// /// **Known problems:** Does not catch multi-byte unicode characters. /// From b1e4b496e118b47be06e362286764f077c8d899d Mon Sep 17 00:00:00 2001 From: Joshua Holmer Date: Mon, 15 Feb 2016 13:36:10 -0500 Subject: [PATCH 4/4] Address @ilogiq's nits --- tests/compile-fail/methods.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/compile-fail/methods.rs b/tests/compile-fail/methods.rs index 06dd161ba..c450c9532 100644 --- a/tests/compile-fail/methods.rs +++ b/tests/compile-fail/methods.rs @@ -373,6 +373,9 @@ fn single_char_pattern() { // Not yet testing for multi-byte characters // Changing `r.len() == 1` to `r.chars().count() == 1` in `lint_single_char_pattern` // should have done this but produced an ICE + // + // We may not want to suggest changing these anyway + // See: https://github.com/Manishearth/rust-clippy/issues/650#issuecomment-184328984 x.split("ß"); x.split("ℝ"); x.split("💣"); @@ -443,4 +446,4 @@ fn single_char_pattern() { //~^ ERROR single-character string constant used as pattern //~| HELP try using a char instead: //~| SUGGESTION x.trim_right_matches('x'); -} \ No newline at end of file +}