forked from OSchip/llvm-project
[clang-tidy][modernize-raw-string-literal] Don't replace upper ASCII with raw literals
It's useless and not safe to replace UTF-8 encoded with escaped ASCII to raw UTF-8 chars: "\xE2\x98\x83" ---> <snowman> So don't do it. llvm-svn: 331297
This commit is contained in:
parent
e5f3cf824f
commit
af2657bb2c
|
@ -42,28 +42,15 @@ bool isRawStringLiteral(StringRef Text) {
|
|||
}
|
||||
|
||||
bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
|
||||
const StringLiteral *Literal) {
|
||||
const StringLiteral *Literal,
|
||||
const CharsBitSet &DisallowedChars) {
|
||||
// FIXME: Handle L"", u8"", u"" and U"" literals.
|
||||
if (!Literal->isAscii())
|
||||
return false;
|
||||
|
||||
StringRef Bytes = Literal->getBytes();
|
||||
// Non-printing characters disqualify this literal:
|
||||
// \007 = \a bell
|
||||
// \010 = \b backspace
|
||||
// \011 = \t horizontal tab
|
||||
// \012 = \n new line
|
||||
// \013 = \v vertical tab
|
||||
// \014 = \f form feed
|
||||
// \015 = \r carriage return
|
||||
// \177 = delete
|
||||
if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a"
|
||||
"\b\t\n\v\f\r\016\017"
|
||||
"\020\021\022\023\024\025\026\027"
|
||||
"\030\031\032\033\034\035\036\037"
|
||||
"\177",
|
||||
33)) != StringRef::npos)
|
||||
return false;
|
||||
for (const unsigned char C : Literal->getBytes())
|
||||
if (DisallowedChars.test(C))
|
||||
return false;
|
||||
|
||||
CharSourceRange CharRange = Lexer::makeFileCharRange(
|
||||
CharSourceRange::getTokenRange(Literal->getSourceRange()),
|
||||
|
@ -102,7 +89,28 @@ RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
|
|||
ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
DelimiterStem(Options.get("DelimiterStem", "lit")),
|
||||
ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {}
|
||||
ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
|
||||
// Non-printing characters are disallowed:
|
||||
// \007 = \a bell
|
||||
// \010 = \b backspace
|
||||
// \011 = \t horizontal tab
|
||||
// \012 = \n new line
|
||||
// \013 = \v vertical tab
|
||||
// \014 = \f form feed
|
||||
// \015 = \r carriage return
|
||||
// \177 = delete
|
||||
for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
|
||||
"\b\t\n\v\f\r\016\017"
|
||||
"\020\021\022\023\024\025\026\027"
|
||||
"\030\031\032\033\034\035\036\037"
|
||||
"\177",
|
||||
33))
|
||||
DisallowedChars.set(C);
|
||||
|
||||
// Non-ASCII are disallowed too.
|
||||
for (unsigned int C = 0x80u; C <= 0xFFu; ++C)
|
||||
DisallowedChars.set(static_cast<unsigned char>(C));
|
||||
}
|
||||
|
||||
void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) {
|
||||
ClangTidyCheck::storeOptions(Options);
|
||||
|
@ -124,7 +132,7 @@ void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
|
|||
if (Literal->getLocStart().isMacroID())
|
||||
return;
|
||||
|
||||
if (containsEscapedCharacters(Result, Literal)) {
|
||||
if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
|
||||
std::string Replacement = asRawStringLiteral(Literal, DelimiterStem);
|
||||
if (ReplaceShorterLiterals ||
|
||||
Replacement.length() <=
|
||||
|
|
|
@ -11,11 +11,14 @@
|
|||
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
|
||||
|
||||
#include "../ClangTidy.h"
|
||||
#include <bitset>
|
||||
|
||||
namespace clang {
|
||||
namespace tidy {
|
||||
namespace modernize {
|
||||
|
||||
using CharsBitSet = std::bitset<1 << CHAR_BIT>;
|
||||
|
||||
/// This check replaces string literals with escaped characters to
|
||||
/// raw string literals.
|
||||
///
|
||||
|
@ -35,6 +38,7 @@ private:
|
|||
const StringLiteral *Literal, StringRef Replacement);
|
||||
|
||||
std::string DelimiterStem;
|
||||
CharsBitSet DisallowedChars;
|
||||
const bool ReplaceShorterLiterals;
|
||||
};
|
||||
|
||||
|
|
|
@ -40,6 +40,8 @@ char const *const Rs("goink\\\036");
|
|||
char const *const Us("goink\\\037");
|
||||
char const *const HexNonPrintable("\\\x03");
|
||||
char const *const Delete("\\\177");
|
||||
char const *const MultibyteSnowman("\xE2\x98\x83");
|
||||
// CHECK-FIXES: {{^}}char const *const MultibyteSnowman("\xE2\x98\x83");{{$}}
|
||||
|
||||
char const *const TrailingSpace("A line \\with space. \n");
|
||||
char const *const TrailingNewLine("A single \\line.\n");
|
||||
|
|
Loading…
Reference in New Issue