forked from OSchip/llvm-project
Support for double width characters.
Summary: Only works for UTF-8-encoded files. Reviewers: djasper Reviewed By: djasper CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1311 llvm-svn: 187935
This commit is contained in:
parent
e80b42a838
commit
ca3e6311a1
|
@ -20,6 +20,7 @@
|
|||
#include "clang/Format/Format.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Locale.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace clang {
|
||||
|
@ -38,6 +39,15 @@ static bool IsBlank(char C) {
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned columnWidth(StringRef Text, encoding::Encoding Encoding) {
|
||||
if (Encoding == encoding::Encoding_UTF8) {
|
||||
int ContentWidth = llvm::sys::locale::columnWidth(Text);
|
||||
if (ContentWidth >= 0)
|
||||
return ContentWidth;
|
||||
}
|
||||
return encoding::getCodePointCount(Text, Encoding);
|
||||
}
|
||||
|
||||
static BreakableToken::Split getCommentSplit(StringRef Text,
|
||||
unsigned ContentStartColumn,
|
||||
unsigned ColumnLimit,
|
||||
|
@ -49,9 +59,12 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
|
|||
unsigned MaxSplitBytes = 0;
|
||||
|
||||
for (unsigned NumChars = 0;
|
||||
NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
|
||||
MaxSplitBytes +=
|
||||
NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
|
||||
unsigned NumBytes =
|
||||
encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
|
||||
NumChars += columnWidth(Text.substr(MaxSplitBytes, NumBytes), Encoding);
|
||||
MaxSplitBytes += NumBytes;
|
||||
}
|
||||
|
||||
StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
|
||||
if (SpaceOffset == StringRef::npos ||
|
||||
|
@ -84,9 +97,8 @@ static BreakableToken::Split getStringSplit(StringRef Text,
|
|||
return BreakableToken::Split(StringRef::npos, 0);
|
||||
if (ColumnLimit <= ContentStartColumn)
|
||||
return BreakableToken::Split(StringRef::npos, 0);
|
||||
unsigned MaxSplit =
|
||||
std::min<unsigned>(ColumnLimit - ContentStartColumn,
|
||||
encoding::getCodePointCount(Text, Encoding) - 1);
|
||||
unsigned MaxSplit = std::min<unsigned>(ColumnLimit - ContentStartColumn,
|
||||
columnWidth(Text, Encoding) - 1);
|
||||
StringRef::size_type SpaceOffset = 0;
|
||||
StringRef::size_type SlashOffset = 0;
|
||||
StringRef::size_type WordStartOffset = 0;
|
||||
|
@ -98,7 +110,7 @@ static BreakableToken::Split getStringSplit(StringRef Text,
|
|||
Chars += Advance;
|
||||
} else {
|
||||
Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
|
||||
Chars += 1;
|
||||
Chars += columnWidth(Text.substr(0, Advance), Encoding);
|
||||
}
|
||||
|
||||
if (Chars > MaxSplit)
|
||||
|
@ -131,7 +143,7 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
|
|||
unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
|
||||
unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
|
||||
return StartColumn + Prefix.size() + Postfix.size() +
|
||||
encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
|
||||
columnWidth(Line.substr(Offset, Length), Encoding);
|
||||
}
|
||||
|
||||
BreakableSingleLineToken::BreakableSingleLineToken(
|
||||
|
@ -329,8 +341,7 @@ unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
|
|||
unsigned BreakableBlockComment::getLineLengthAfterSplit(
|
||||
unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
|
||||
return getContentStartColumn(LineIndex, Offset) +
|
||||
encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
|
||||
Encoding) +
|
||||
columnWidth(Lines[LineIndex].substr(Offset, Length), Encoding) +
|
||||
// The last line gets a "*/" postfix.
|
||||
(LineIndex + 1 == Lines.size() ? 2 : 0);
|
||||
}
|
||||
|
|
|
@ -5704,15 +5704,15 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) {
|
|||
verifyFormat("\"Однажды в студёную зимнюю пору...\"",
|
||||
getLLVMStyleWithColumns(35));
|
||||
verifyFormat("\"一 二 三 四 五 六 七 八 九 十\"",
|
||||
getLLVMStyleWithColumns(21));
|
||||
getLLVMStyleWithColumns(31));
|
||||
verifyFormat("// Однажды в студёную зимнюю пору...",
|
||||
getLLVMStyleWithColumns(36));
|
||||
verifyFormat("// 一 二 三 四 五 六 七 八 九 十",
|
||||
getLLVMStyleWithColumns(22));
|
||||
getLLVMStyleWithColumns(32));
|
||||
verifyFormat("/* Однажды в студёную зимнюю пору... */",
|
||||
getLLVMStyleWithColumns(39));
|
||||
verifyFormat("/* 一 二 三 四 五 六 七 八 九 十 */",
|
||||
getLLVMStyleWithColumns(25));
|
||||
getLLVMStyleWithColumns(35));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, SplitsUTF8Strings) {
|
||||
|
@ -5723,11 +5723,12 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
|
|||
"\"пору,\"",
|
||||
format("\"Однажды, в студёную зимнюю пору,\"",
|
||||
getLLVMStyleWithColumns(13)));
|
||||
EXPECT_EQ("\"一 二 三 四 \"\n"
|
||||
"\"五 六 七 八 \"\n"
|
||||
"\"九 十\"",
|
||||
format("\"一 二 三 四 五 六 七 八 九 十\"",
|
||||
getLLVMStyleWithColumns(10)));
|
||||
EXPECT_EQ("\"一 二 三 \"\n"
|
||||
"\"四 五六 \"\n"
|
||||
"\"七 八 九 \"\n"
|
||||
"\"十\"",
|
||||
format("\"一 二 三 四 五六 七 八 九 十\"",
|
||||
getLLVMStyleWithColumns(11)));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, SplitsUTF8LineComments) {
|
||||
|
@ -5739,9 +5740,9 @@ TEST_F(FormatTest, SplitsUTF8LineComments) {
|
|||
getLLVMStyleWithColumns(13)));
|
||||
EXPECT_EQ("// 一二三\n"
|
||||
"// 四五六七\n"
|
||||
"// 八\n"
|
||||
"// 九 十",
|
||||
format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(6)));
|
||||
"// 八 九\n"
|
||||
"// 十",
|
||||
format("// 一二三 四五六七 八 九 十", getLLVMStyleWithColumns(9)));
|
||||
}
|
||||
|
||||
TEST_F(FormatTest, SplitsUTF8BlockComments) {
|
||||
|
@ -5758,16 +5759,17 @@ TEST_F(FormatTest, SplitsUTF8BlockComments) {
|
|||
getLLVMStyleWithColumns(13)));
|
||||
EXPECT_EQ("/* 一二三\n"
|
||||
" * 四五六七\n"
|
||||
" * 八\n"
|
||||
" * 九 十\n"
|
||||
" */",
|
||||
format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(6)));
|
||||
" * 八 九\n"
|
||||
" * 十 */",
|
||||
format("/* 一二三 四五六七 八 九 十 */", getLLVMStyleWithColumns(9)));
|
||||
EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n"
|
||||
" * 𝕓𝕪𝕥𝕖\n"
|
||||
" * 𝖀𝕿𝕱-𝟠 */",
|
||||
format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12)));
|
||||
}
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
TEST_F(FormatTest, FormatsWithWebKitStyle) {
|
||||
FormatStyle Style = getWebKitStyle();
|
||||
|
||||
|
@ -5847,7 +5849,5 @@ TEST_F(FormatTest, FormatsWithWebKitStyle) {
|
|||
format("if (aaaaaaaaaaaaaaa || bbbbbbbbbbbbbbb) { i++; }", Style));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace tooling
|
||||
} // end namespace clang
|
||||
|
|
Loading…
Reference in New Issue