forked from OSchip/llvm-project
clang-tidy: Add check modernize-raw-string-literal
llvm-svn: 264539
This commit is contained in:
parent
569af59b14
commit
8930aab886
|
@ -7,6 +7,7 @@ add_clang_library(clangTidyModernizeModule
|
|||
MakeUniqueCheck.cpp
|
||||
ModernizeTidyModule.cpp
|
||||
PassByValueCheck.cpp
|
||||
RawStringLiteralCheck.cpp
|
||||
RedundantVoidArgCheck.cpp
|
||||
ReplaceAutoPtrCheck.cpp
|
||||
ShrinkToFitCheck.cpp
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "LoopConvertCheck.h"
|
||||
#include "MakeUniqueCheck.h"
|
||||
#include "PassByValueCheck.h"
|
||||
#include "RawStringLiteralCheck.h"
|
||||
#include "RedundantVoidArgCheck.h"
|
||||
#include "ReplaceAutoPtrCheck.h"
|
||||
#include "ShrinkToFitCheck.h"
|
||||
|
@ -36,6 +37,8 @@ public:
|
|||
CheckFactories.registerCheck<LoopConvertCheck>("modernize-loop-convert");
|
||||
CheckFactories.registerCheck<MakeUniqueCheck>("modernize-make-unique");
|
||||
CheckFactories.registerCheck<PassByValueCheck>("modernize-pass-by-value");
|
||||
CheckFactories.registerCheck<RawStringLiteralCheck>(
|
||||
"modernize-raw-string-literal");
|
||||
CheckFactories.registerCheck<RedundantVoidArgCheck>(
|
||||
"modernize-redundant-void-arg");
|
||||
CheckFactories.registerCheck<ReplaceAutoPtrCheck>(
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "RawStringLiteralCheck.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/ASTMatchers/ASTMatchFinder.h"
|
||||
#include "clang/Lex/Lexer.h"
|
||||
|
||||
using namespace clang::ast_matchers;
|
||||
|
||||
namespace clang {
|
||||
namespace tidy {
|
||||
namespace modernize {
|
||||
|
||||
namespace {
|
||||
|
||||
bool containsEscapes(StringRef HayStack, StringRef Escapes) {
|
||||
size_t BackSlash = HayStack.find('\\');
|
||||
if (BackSlash == StringRef::npos)
|
||||
return false;
|
||||
|
||||
while (BackSlash != StringRef::npos) {
|
||||
if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos)
|
||||
return false;
|
||||
BackSlash = HayStack.find('\\', BackSlash + 2);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isRawStringLiteral(StringRef Text) {
|
||||
// Already a raw string literal if R comes before ".
|
||||
const size_t QuotePos = Text.find('"');
|
||||
assert(QuotePos != StringRef::npos);
|
||||
return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
|
||||
}
|
||||
|
||||
bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
|
||||
const StringLiteral *Literal) {
|
||||
// FIXME: Handle L"", u8"", u"" and U"" literals.
|
||||
if (!Literal->isAscii())
|
||||
return false;
|
||||
|
||||
StringRef Bytes = Literal->getBytes();
|
||||
// Non-printing characters disqualify this literal:
|
||||
// \007 = \a bell
|
||||
// \010 = \b backspace
|
||||
// \011 = \t horizontal tab
|
||||
// \012 = \n new line
|
||||
// \013 = \v vertical tab
|
||||
// \014 = \f form feed
|
||||
// \015 = \r carriage return
|
||||
// \177 = delete
|
||||
if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a"
|
||||
"\b\t\n\v\f\r\016\017"
|
||||
"\020\021\022\023\024\025\026\027"
|
||||
"\030\031\032\033\034\035\036\037"
|
||||
"\177",
|
||||
33)) != StringRef::npos)
|
||||
return false;
|
||||
|
||||
CharSourceRange CharRange = Lexer::makeFileCharRange(
|
||||
CharSourceRange::getTokenRange(Literal->getSourceRange()),
|
||||
*Result.SourceManager, Result.Context->getLangOpts());
|
||||
StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
|
||||
Result.Context->getLangOpts());
|
||||
if (isRawStringLiteral(Text))
|
||||
return false;
|
||||
|
||||
return containsEscapes(Text, R"('\"?x01)");
|
||||
}
|
||||
|
||||
bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
|
||||
return Bytes.find(Delimiter.empty()
|
||||
? std::string(R"lit()")lit")
|
||||
: (")" + Delimiter + R"(")")) != StringRef::npos;
|
||||
}
|
||||
|
||||
std::string asRawStringLiteral(const StringLiteral *Literal,
|
||||
const std::string &DelimiterStem) {
|
||||
const StringRef Bytes = Literal->getBytes();
|
||||
std::string Delimiter;
|
||||
for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
|
||||
Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
|
||||
}
|
||||
|
||||
if (Delimiter.empty())
|
||||
return (R"(R"()" + Bytes + R"lit()")lit").str();
|
||||
|
||||
return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
|
||||
ClangTidyContext *Context)
|
||||
: ClangTidyCheck(Name, Context),
|
||||
DelimiterStem(Options.get("DelimiterStem", "lit")) {}
|
||||
|
||||
void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) {
|
||||
ClangTidyCheck::storeOptions(Options);
|
||||
}
|
||||
|
||||
void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
|
||||
Finder->addMatcher(stringLiteral().bind("lit"), this);
|
||||
}
|
||||
|
||||
void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
|
||||
// Raw string literals require C++11 or later.
|
||||
if (!Result.Context->getLangOpts().CPlusPlus11)
|
||||
return;
|
||||
|
||||
const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
|
||||
if (Literal->getLocStart().isMacroID())
|
||||
return;
|
||||
|
||||
if (containsEscapedCharacters(Result, Literal))
|
||||
replaceWithRawStringLiteral(Result, Literal);
|
||||
}
|
||||
|
||||
void RawStringLiteralCheck::replaceWithRawStringLiteral(
|
||||
const MatchFinder::MatchResult &Result, const StringLiteral *Literal) {
|
||||
CharSourceRange CharRange = Lexer::makeFileCharRange(
|
||||
CharSourceRange::getTokenRange(Literal->getSourceRange()),
|
||||
*Result.SourceManager, Result.Context->getLangOpts());
|
||||
diag(Literal->getLocStart(),
|
||||
"escaped string literal can be written as a raw string literal")
|
||||
<< FixItHint::CreateReplacement(
|
||||
CharRange, asRawStringLiteral(Literal, DelimiterStem));
|
||||
}
|
||||
|
||||
} // namespace modernize
|
||||
} // namespace tidy
|
||||
} // namespace clang
|
|
@ -0,0 +1,45 @@
|
|||
//===--- RawStringLiteralCheck.h - clang-tidy--------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
|
||||
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
|
||||
|
||||
#include "../ClangTidy.h"
|
||||
//#include <string>
|
||||
|
||||
namespace clang {
|
||||
namespace tidy {
|
||||
namespace modernize {
|
||||
|
||||
/// This check replaces string literals with escaped characters to
|
||||
/// raw string literals.
|
||||
///
|
||||
/// For the user-facing documentation see:
|
||||
/// http://clang.llvm.org/extra/clang-tidy/checks/modernize-raw-string-literal.html
|
||||
class RawStringLiteralCheck : public ClangTidyCheck {
|
||||
public:
|
||||
RawStringLiteralCheck(StringRef Name, ClangTidyContext *Context);
|
||||
|
||||
void storeOptions(ClangTidyOptions::OptionMap &Options) override;
|
||||
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
|
||||
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
|
||||
|
||||
private:
|
||||
void replaceWithRawStringLiteral(
|
||||
const ast_matchers::MatchFinder::MatchResult &Result,
|
||||
const StringLiteral *Literal);
|
||||
|
||||
std::string DelimiterStem;
|
||||
};
|
||||
|
||||
} // namespace modernize
|
||||
} // namespace tidy
|
||||
} // namespace clang
|
||||
|
||||
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H
|
|
@ -63,7 +63,10 @@ Improvements to ``clang-tidy``
|
|||
explain them more clearly, and provide more accurate fix-its for the issues
|
||||
identified. The improvements since the 3.8 release include:
|
||||
|
||||
- ...
|
||||
- New ``modernize-raw-string-literal`` check
|
||||
|
||||
This check selectively replaces string literals containing escaped
|
||||
characters with raw string literals.
|
||||
|
||||
Improvements to ``modularize``
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
|
|
@ -78,6 +78,7 @@ Clang-Tidy Checks
|
|||
modernize-loop-convert
|
||||
modernize-make-unique
|
||||
modernize-pass-by-value
|
||||
modernize-raw-string-literal
|
||||
modernize-redundant-void-arg
|
||||
modernize-replace-auto-ptr
|
||||
modernize-shrink-to-fit
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
.. title:: clang-tidy - modernize-raw-string-literal
|
||||
|
||||
modernize-raw-string-literal
|
||||
============================
|
||||
|
||||
This check selectively replaces string literals containing escaped characters
|
||||
with raw string literals.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-blocK:: c++
|
||||
|
||||
const char *const Quotes{"embedded \"quotes\""};
|
||||
const char *const Paragraph{"Line one.\nLine two.\nLine three.\n"};
|
||||
const char *const SingleLine{"Single line.\n"};
|
||||
const char *const TrailingSpace{"Look here -> \n"};
|
||||
const char *const Tab{"One\tTwo\n"};
|
||||
const char *const Bell{"Hello!\a And welcome!"};
|
||||
const char *const Path{"C:\\Program Files\\Vendor\\Application.exe"};
|
||||
const char *const RegEx{"\\w\\([a-z]\\)"};
|
||||
|
||||
becomes
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
const char *const Quotes{R"(embedded "quotes")"};
|
||||
const char *const Paragraph{"Line one.\nLine two.\nLine three.\n"};
|
||||
const char *const SingleLine{"Single line.\n"};
|
||||
const char *const TrailingSpace{"Look here -> \n"};
|
||||
const char *const Tab{"One\tTwo\n"};
|
||||
const char *const Bell{"Hello!\a And welcome!"};
|
||||
const char *const Path{R"(C:\Program Files\Vendor\Application.exe)"};
|
||||
const char *const RegEx{R"(\w\([a-z]\))"};
|
||||
|
||||
The presence of any of the following escapes can cause the string to be
|
||||
converted to a raw string literal: ``\\``, ``\'``, ``\"``, ``\?``,
|
||||
and octal or hexadecimal escapes for printable ASCII characters.
|
||||
|
||||
A string literal containing only escaped newlines is a common way of
|
||||
writing lines of text output. Introducing physical newlines with raw
|
||||
string literals in this case is likely to impede readability. These
|
||||
string literals are left unchanged.
|
||||
|
||||
An escaped horizontal tab, form feed, or vertical tab prevents the string
|
||||
literal from being converted. The presence of a horizontal tab, form feed or
|
||||
vertical tab in source code is not visually obvious.
|
|
@ -0,0 +1,9 @@
|
|||
// RUN: %check_clang_tidy %s modernize-raw-string-literal %t -- -config='{CheckOptions: [{key: "modernize-raw-string-literal.DelimiterStem", value: "str"}]}' -- -std=c++11
|
||||
|
||||
char const *const ContainsSentinel{"who\\ops)\""};
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const ContainsSentinel{R"str(who\ops)")str"};{{$}}
|
||||
|
||||
//char const *const ContainsDelim{"whoops)\")lit\""};
|
||||
// CHECK-XMESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-XFIXES: {{^}}char const *const ContainsDelim{R"lit1(whoops)")lit")lit1"};{{$}}
|
|
@ -0,0 +1,123 @@
|
|||
// RUN: %check_clang_tidy %s modernize-raw-string-literal %t
|
||||
|
||||
char const *const BackSlash("goink\\frob");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: escaped string literal can be written as a raw string literal [modernize-raw-string-literal]
|
||||
// CHECK-FIXES: {{^}}char const *const BackSlash(R"(goink\frob)");{{$}}
|
||||
|
||||
char const *const PlainLiteral("plain literal");
|
||||
|
||||
// Non-printable ASCII characters.
|
||||
char const *const Nul("goink\\\000");
|
||||
char const *const Soh("goink\\\001");
|
||||
char const *const Stx("goink\\\002");
|
||||
char const *const Etx("goink\\\003");
|
||||
char const *const Enq("goink\\\004");
|
||||
char const *const Ack("goink\\\005");
|
||||
char const *const Bell("goink\\\afrob");
|
||||
char const *const BackSpace("goink\\\bfrob");
|
||||
char const *const HorizontalTab("goink\\\tfrob");
|
||||
char const *const NewLine("goink\nfrob");
|
||||
char const *const VerticalTab("goink\\\vfrob");
|
||||
char const *const FormFeed("goink\\\ffrob");
|
||||
char const *const CarraigeReturn("goink\\\rfrob");
|
||||
char const *const So("goink\\\016");
|
||||
char const *const Si("goink\\\017");
|
||||
char const *const Dle("goink\\\020");
|
||||
char const *const Dc1("goink\\\021");
|
||||
char const *const Dc2("goink\\\022");
|
||||
char const *const Dc3("goink\\\023");
|
||||
char const *const Dc4("goink\\\024");
|
||||
char const *const Nak("goink\\\025");
|
||||
char const *const Syn("goink\\\026");
|
||||
char const *const Etb("goink\\\027");
|
||||
char const *const Can("goink\\\030");
|
||||
char const *const Em("goink\\\031");
|
||||
char const *const Sub("goink\\\032");
|
||||
char const *const Esc("goink\\\033");
|
||||
char const *const Fs("goink\\\034");
|
||||
char const *const Gs("goink\\\035");
|
||||
char const *const Rs("goink\\\036");
|
||||
char const *const Us("goink\\\037");
|
||||
char const *const HexNonPrintable("\\\x03");
|
||||
char const *const Delete("\\\177");
|
||||
|
||||
char const *const TrailingSpace("A line \\with space. \n");
|
||||
char const *const TrailingNewLine("A single \\line.\n");
|
||||
char const *const AlreadyRaw(R"(foobie\\bletch)");
|
||||
char const *const UTF8Literal(u8"foobie\\bletch");
|
||||
char const *const UTF8RawLiteral(u8R"(foobie\\bletch)");
|
||||
char16_t const *const UTF16Literal(u"foobie\\bletch");
|
||||
char16_t const *const UTF16RawLiteral(uR"(foobie\\bletch)");
|
||||
char32_t const *const UTF32Literal(U"foobie\\bletch");
|
||||
char32_t const *const UTF32RawLiteral(UR"(foobie\\bletch)");
|
||||
wchar_t const *const WideLiteral(L"foobie\\bletch");
|
||||
wchar_t const *const WideRawLiteral(LR"(foobie\\bletch)");
|
||||
|
||||
char const *const SingleQuote("goink\'frob");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-XFIXES: {{^}}char const *const SingleQuote(R"(goink'frob)");{{$}}
|
||||
|
||||
char const *const DoubleQuote("goink\"frob");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const DoubleQuote(R"(goink"frob)");{{$}}
|
||||
|
||||
char const *const QuestionMark("goink\?frob");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const QuestionMark(R"(goink?frob)");{{$}}
|
||||
|
||||
char const *const RegEx("goink\\(one|two\\)\\\\\\?.*\\nfrob");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const RegEx(R"(goink\(one|two\)\\\?.*\nfrob)");{{$}}
|
||||
|
||||
char const *const Path("C:\\Program Files\\Vendor\\Application\\Application.exe");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const Path(R"(C:\Program Files\Vendor\Application\Application.exe)");{{$}}
|
||||
|
||||
char const *const ContainsSentinel("who\\ops)\"");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const ContainsSentinel(R"lit(who\ops)")lit");{{$}}
|
||||
|
||||
char const *const ContainsDelim("whoops)\")lit\"");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const ContainsDelim(R"lit1(whoops)")lit")lit1");{{$}}
|
||||
|
||||
char const *const OctalPrintable("\100\\");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:34: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const OctalPrintable(R"(@\)");{{$}}
|
||||
|
||||
char const *const HexPrintable("\x40\\");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}}char const *const HexPrintable(R"(@\)");{{$}}
|
||||
|
||||
#define TRICK(arg_) #arg_
|
||||
char const *const MacroBody = TRICK(foo\\bar);
|
||||
|
||||
#define HAT(rabbit_) #rabbit_ "foo\\bar"
|
||||
char const *const StringizedMacroArgument = HAT(foo\\bar);
|
||||
|
||||
#define SUBST(lit_) lit_
|
||||
char const *const MacroArgument = SUBST("foo\\bar");
|
||||
// FIXME: We should be able to replace this string literal macro argument
|
||||
|
||||
template <typename T>
|
||||
void fn(char const *const Arg) {
|
||||
char const *const Str("foo\\bar");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}} char const *const Str(R"(foo\bar)");{{$}}
|
||||
}
|
||||
|
||||
template <>
|
||||
void fn<int>(char const *const Arg) {
|
||||
char const *const Str("foo\\bar");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}} char const *const Str(R"(foo\bar)");{{$}}
|
||||
}
|
||||
|
||||
void callFn() {
|
||||
fn<int>("foo\\bar");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:11: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}} fn<int>(R"(foo\bar)");{{$}}
|
||||
fn<double>("foo\\bar");
|
||||
// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: {{.*}} can be written as a raw string literal
|
||||
// CHECK-FIXES: {{^}} fn<double>(R"(foo\bar)");{{$}}
|
||||
}
|
Loading…
Reference in New Issue