clang-format: do not reflow bullet lists

Summary:
This patch prevents reflowing bullet lists in block comments.

It handles all lists supported by doxygen and markdown, e.g. bullet
lists starting with '-', '*', '+', as well as numbered lists starting
with -# or a number followed by a dot.

Reviewers: krasimir

Reviewed By: krasimir

Subscribers: djasper, klimek, cfe-commits

Differential Revision: https://reviews.llvm.org/D33285

llvm-svn: 303556
This commit is contained in:
Francois Ferrand 2017-05-22 14:47:17 +00:00
parent d65b3e4212
commit a881be87ca
2 changed files with 84 additions and 4 deletions

View File

@ -78,6 +78,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
}
StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
// Do not split before a number followed by a dot: this would be interpreted
// as a numbered list, which would prevent re-flowing in subsequent passes.
static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
if (SpaceOffset != StringRef::npos &&
kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks)))
SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
if (SpaceOffset == StringRef::npos ||
// Don't break at leading whitespace.
Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
@ -299,8 +307,9 @@ const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
static bool mayReflowContent(StringRef Content) {
Content = Content.trim(Blanks);
// Lines starting with '@' commonly have special meaning.
static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = {
"@", "TODO", "FIXME", "XXX"};
// Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = {
"@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " };
bool hasSpecialMeaningPrefix = false;
for (StringRef Prefix : kSpecialMeaningPrefixes) {
if (Content.startswith(Prefix)) {
@ -308,6 +317,14 @@ static bool mayReflowContent(StringRef Content) {
break;
}
}
// Numbered lists may also start with a number followed by '.'
// To avoid issues if a line starts with a number which is actually the end
// of a previous line, we only consider numbers with up to 2 digits.
static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
hasSpecialMeaningPrefix = hasSpecialMeaningPrefix ||
kNumberedListRegexp.match(Content);
// Simple heuristic for what to reflow: content should contain at least two
// characters and either the first or second character must be
// non-punctuation.

View File

@ -1577,7 +1577,7 @@ TEST_F(FormatTestComments, ReflowsComments) {
" *\n"
" * long */",
getLLVMStyleWithColumns(20)));
// Don't reflow lines having content that is a single character.
EXPECT_EQ("// long long long\n"
"// long\n"
@ -1602,7 +1602,7 @@ TEST_F(FormatTestComments, ReflowsComments) {
format("// long long long long\n"
"// @param arg",
getLLVMStyleWithColumns(20)));
// Don't reflow lines starting with 'TODO'.
EXPECT_EQ("// long long long\n"
"// long\n"
@ -1671,6 +1671,69 @@ TEST_F(FormatTestComments, ReflowsComments) {
"// long",
getLLVMStyleWithColumns(20)));
// Don't reflow separate bullets in list
EXPECT_EQ("// - long long long\n"
"// long\n"
"// - long",
format("// - long long long long\n"
"// - long",
getLLVMStyleWithColumns(20)));
EXPECT_EQ("// * long long long\n"
"// long\n"
"// * long",
format("// * long long long long\n"
"// * long",
getLLVMStyleWithColumns(20)));
EXPECT_EQ("// + long long long\n"
"// long\n"
"// + long",
format("// + long long long long\n"
"// + long",
getLLVMStyleWithColumns(20)));
EXPECT_EQ("// 1. long long long\n"
"// long\n"
"// 2. long",
format("// 1. long long long long\n"
"// 2. long",
getLLVMStyleWithColumns(20)));
EXPECT_EQ("// -# long long long\n"
"// long\n"
"// -# long",
format("// -# long long long long\n"
"// -# long",
getLLVMStyleWithColumns(20)));
EXPECT_EQ("// - long long long\n"
"// long long long\n"
"// - long",
format("// - long long long long\n"
"// long long\n"
"// - long",
getLLVMStyleWithColumns(20)));
EXPECT_EQ("// - long long long\n"
"// long long long\n"
"// long\n"
"// - long",
format("// - long long long long\n"
"// long long long\n"
"// - long",
getLLVMStyleWithColumns(20)));
// Large number (>2 digits) are not list items
EXPECT_EQ("// long long long\n"
"// long 1024. long.",
format("// long long long long\n"
"// 1024. long.",
getLLVMStyleWithColumns(20)));
// Do not break before number, to avoid introducing a non-reflowable doxygen
// list item.
EXPECT_EQ("// long long\n"
"// long 10. long.",
format("// long long long 10.\n"
"// long.",
getLLVMStyleWithColumns(20)));
// Don't break or reflow after implicit string literals.
verifyFormat("#include <t> // l l l\n"
" // l",