Handle zero-width and double-width characters in string literals and comments.

Summary: Count column width instead of the number of code points. This also includes correct handling of tabs inside string literals and comments (with an exception of multiline string literals/comments, where tabs are present before the first escaped newline). Reviewers: djasper, klimek Reviewed By: klimek CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1601 llvm-svn: 190052
2013-09-05 14:08:34 +00:00 · 2013-09-05 14:08:34 +00:00 · ebb43caae2
parent 15832288f4
commit ebb43caae2
9 changed files with 189 additions and 76 deletions
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@ -149,9 +149,12 @@ struct FormatStyle {
  /// Otherwise puts them into the right-most column.
  bool AlignEscapedNewlinesLeft;

-  /// \brief The number of characters to use for indentation.
+  /// \brief The number of columns to use for indentation.
  unsigned IndentWidth;

+  /// \brief The number of columns used for tab stops.
+  unsigned TabWidth;
+
  /// \brief The number of characters to use for indentation of constructor
  /// initializer lists.
  unsigned ConstructorInitializerIndentWidth;
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@ -41,6 +41,7 @@ static bool IsBlank(char C) {
 static BreakableToken::Split getCommentSplit(StringRef Text,
                                             unsigned ContentStartColumn,
                                             unsigned ColumnLimit,
+                                             unsigned TabWidth,
                                             encoding::Encoding Encoding) {
  if (ColumnLimit <= ContentStartColumn + 1)
    return BreakableToken::Split(StringRef::npos, 0);
@ -49,9 +50,14 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
  unsigned MaxSplitBytes = 0;

  for (unsigned NumChars = 0;
-       NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
-    MaxSplitBytes +=
+       NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
+    unsigned BytesInChar =
        encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
+    NumChars +=
+        encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
+                                      ContentStartColumn, TabWidth, Encoding);
+    MaxSplitBytes += BytesInChar;
+  }

  StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
  if (SpaceOffset == StringRef::npos ||
@ -78,6 +84,7 @@ static BreakableToken::Split getCommentSplit(StringRef Text,
 static BreakableToken::Split getStringSplit(StringRef Text,
                                            unsigned ContentStartColumn,
                                            unsigned ColumnLimit,
+                                            unsigned TabWidth,
                                            encoding::Encoding Encoding) {
  // FIXME: Reduce unit test case.
  if (Text.empty())
@ -86,7 +93,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
    return BreakableToken::Split(StringRef::npos, 0);
  unsigned MaxSplit =
      std::min<unsigned>(ColumnLimit - ContentStartColumn,
-                         encoding::getCodePointCount(Text, Encoding) - 1);
+                         encoding::columnWidthWithTabs(Text, ContentStartColumn,
+                                                       TabWidth, Encoding) -
+                             1);
  StringRef::size_type SpaceOffset = 0;
  StringRef::size_type SlashOffset = 0;
  StringRef::size_type WordStartOffset = 0;
@ -98,7 +107,9 @@ static BreakableToken::Split getStringSplit(StringRef Text,
      Chars += Advance;
    } else {
      Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
-      Chars += 1;
+      Chars += encoding::columnWidthWithTabs(Text.substr(0, Advance),
+                                             ContentStartColumn + Chars,
+                                             TabWidth, Encoding);
    }

    if (Chars > MaxSplit)
@ -131,14 +142,17 @@ unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
 unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
  return StartColumn + Prefix.size() + Postfix.size() +
-         encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
+         encoding::columnWidthWithTabs(Line.substr(Offset, Length),
+                                       StartColumn + Prefix.size(),
+                                       Style.TabWidth, Encoding);
 }

 BreakableSingleLineToken::BreakableSingleLineToken(
    const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
-    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
-    : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
-      Prefix(Prefix), Postfix(Postfix) {
+    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+    const FormatStyle &Style)
+    : BreakableToken(Tok, InPPDirective, Encoding, Style),
+      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
  assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
  Line = Tok.TokenText.substr(
      Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
@ -147,15 +161,16 @@ BreakableSingleLineToken::BreakableSingleLineToken(
 BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
                                               unsigned StartColumn,
                                               bool InPPDirective,
-                                               encoding::Encoding Encoding)
+                                               encoding::Encoding Encoding,
+                                               const FormatStyle &Style)
    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
-                               Encoding) {}
+                               Encoding, Style) {}

 BreakableToken::Split
 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
                                 unsigned ColumnLimit) const {
  return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit,
-                        Encoding);
+                        Style.TabWidth, Encoding);
 }

 void BreakableStringLiteral::insertBreak(unsigned LineIndex,
@ -177,10 +192,11 @@ static StringRef getLineCommentPrefix(StringRef Comment) {
 BreakableLineComment::BreakableLineComment(const FormatToken &Token,
                                           unsigned StartColumn,
                                           bool InPPDirective,
-                                           encoding::Encoding Encoding)
+                                           encoding::Encoding Encoding,
+                                           const FormatStyle &Style)
    : BreakableSingleLineToken(Token, StartColumn,
                               getLineCommentPrefix(Token.TokenText), "",
-                               InPPDirective, Encoding) {
+                               InPPDirective, Encoding, Style) {
  OriginalPrefix = Prefix;
  if (Token.TokenText.size() > Prefix.size() &&
      isAlphanumeric(Token.TokenText[Prefix.size()])) {
@ -195,7 +211,7 @@ BreakableToken::Split
 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
                               unsigned ColumnLimit) const {
  return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
-                         ColumnLimit, Encoding);
+                         ColumnLimit, Style.TabWidth, Encoding);
 }

 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
@ -216,10 +232,10 @@ BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
 }

 BreakableBlockComment::BreakableBlockComment(
-    const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
+    const FormatToken &Token, unsigned StartColumn,
    unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
-    encoding::Encoding Encoding)
-    : BreakableToken(Token, InPPDirective, Encoding) {
+    encoding::Encoding Encoding, const FormatStyle &Style)
+    : BreakableToken(Token, InPPDirective, Encoding, Style) {
  StringRef TokenText(Token.TokenText);
  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
  TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
@ -229,7 +245,7 @@ BreakableBlockComment::BreakableBlockComment(
  StartOfLineColumn.resize(Lines.size());
  StartOfLineColumn[0] = StartColumn + 2;
  for (size_t i = 1; i < Lines.size(); ++i)
-    adjustWhitespace(Style, i, IndentDelta);
+    adjustWhitespace(i, IndentDelta);

  Decoration = "* ";
  if (Lines.size() == 1 && !FirstInLine) {
@ -282,8 +298,7 @@ BreakableBlockComment::BreakableBlockComment(
  });
 }

-void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
-                                             unsigned LineIndex,
+void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
                                             int IndentDelta) {
  // When in a preprocessor directive, the trailing backslash in a block comment
  // is not needed, but can serve a purpose of uniformity with necessary escaped
@ -306,6 +321,7 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
  if (StartOfLine == StringRef::npos)
    StartOfLine = Lines[LineIndex].size();

+  StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
  // Adjust Lines to only contain relevant text.
  Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
  Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
@ -321,16 +337,19 @@ void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
  // if leading tabs are intermixed with spaces, that is not a high priority.

  // Adjust the start column uniformly accross all lines.
-  StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta);
+  StartOfLineColumn[LineIndex] =
+      std::max<int>(0, Whitespace.size() + IndentDelta);
 }

 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }

 unsigned BreakableBlockComment::getLineLengthAfterSplit(
    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
-  return getContentStartColumn(LineIndex, Offset) +
-         encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),
-                                     Encoding) +
+  unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
+  return ContentStartColumn +
+         encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
+                                       ContentStartColumn, Style.TabWidth,
+                                       Encoding) +
         // The last line gets a "*/" postfix.
         (LineIndex + 1 == Lines.size() ? 2 : 0);
 }
@ -340,7 +359,7 @@ BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
                                unsigned ColumnLimit) const {
  return getCommentSplit(Lines[LineIndex].substr(TailOffset),
                         getContentStartColumn(LineIndex, TailOffset),
-                         ColumnLimit, Encoding);
+                         ColumnLimit, Style.TabWidth, Encoding);
 }

 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
--- a/clang/lib/Format/BreakableToken.h
+++ b/clang/lib/Format/BreakableToken.h
@ -67,12 +67,14 @@ public:

 protected:
  BreakableToken(const FormatToken &Tok, bool InPPDirective,
-                 encoding::Encoding Encoding)
-      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
+                 encoding::Encoding Encoding, const FormatStyle &Style)
+      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
+        Style(Style) {}

  const FormatToken &Tok;
  const bool InPPDirective;
  const encoding::Encoding Encoding;
+  const FormatStyle &Style;
 };

 /// \brief Base class for single line tokens that can be broken.
@ -88,7 +90,8 @@ public:
 protected:
  BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
                           StringRef Prefix, StringRef Postfix,
-                           bool InPPDirective, encoding::Encoding Encoding);
+                           bool InPPDirective, encoding::Encoding Encoding,
+                           const FormatStyle &Style);

  // The column in which the token starts.
  unsigned StartColumn;
@ -107,7 +110,8 @@ public:
  /// \p StartColumn specifies the column in which the token will start
  /// after formatting.
  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
-                         bool InPPDirective, encoding::Encoding Encoding);
+                         bool InPPDirective, encoding::Encoding Encoding,
+                         const FormatStyle &Style);

  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                         unsigned ColumnLimit) const;
@ -122,7 +126,8 @@ public:
  /// \p StartColumn specifies the column in which the comment will start
  /// after formatting.
  BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
-                       bool InPPDirective, encoding::Encoding Encoding);
+                       bool InPPDirective, encoding::Encoding Encoding,
+                       const FormatStyle &Style);

  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                         unsigned ColumnLimit) const;
@ -144,10 +149,10 @@ public:
  /// after formatting, while \p OriginalStartColumn specifies in which
  /// column the comment started before formatting.
  /// If the comment starts a line after formatting, set \p FirstInLine to true.
-  BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
-                        unsigned StartColumn, unsigned OriginaStartColumn,
-                        bool FirstInLine, bool InPPDirective,
-                        encoding::Encoding Encoding);
+  BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
+                        unsigned OriginaStartColumn, bool FirstInLine,
+                        bool InPPDirective, encoding::Encoding Encoding,
+                        const FormatStyle &Style);

  virtual unsigned getLineCount() const;
  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
@ -172,8 +177,7 @@ private:
  // Sets StartOfLineColumn to the intended column in which the text at
  // Lines[LineIndex] starts (note that the decoration, if present, is not
  // considered part of the text).
-  void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
-                        int IndentDelta);
+  void adjustWhitespace(unsigned LineIndex, int IndentDelta);

  // Returns the column at which the text in line LineIndex starts, when broken
  // at TailOffset. Note that the decoration (if present) is not considered part
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@ -623,10 +623,10 @@ ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
    State.Stack[i].BreakBeforeParameter = true;

  unsigned ColumnsUsed =
-      State.Column - Current.CodePointCount + Current.CodePointsInFirstLine;
+      State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
  // We can only affect layout of the first and the last line, so the penalty
  // for all other lines is constant, and we ignore it.
-  State.Column = Current.CodePointsInLastLine;
+  State.Column = Current.LastLineColumnWidth;

  if (ColumnsUsed > getColumnLimit(State))
    return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
@ -659,14 +659,14 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
      return 0;

    Token.reset(new BreakableStringLiteral(
-        Current, StartColumn, State.Line->InPPDirective, Encoding));
+        Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
  } else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
    unsigned OriginalStartColumn =
        SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
        1;
    Token.reset(new BreakableBlockComment(
-        Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
-        State.Line->InPPDirective, Encoding));
+        Current, StartColumn, OriginalStartColumn, !Current.Previous,
+        State.Line->InPPDirective, Encoding, Style));
  } else if (Current.Type == TT_LineComment &&
             (Current.Previous == NULL ||
              Current.Previous->Type != TT_ImplicitStringLiteral)) {
@ -678,12 +678,12 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
    // leading whitespace in consecutive lines when changing indentation of
    // the first line similar to what we do with block comments.
    if (Current.isMultiline()) {
-      State.Column = StartColumn + Current.CodePointsInFirstLine;
+      State.Column = StartColumn + Current.FirstLineColumnWidth;
      return 0;
    }

-    Token.reset(new BreakableLineComment(Current, StartColumn,
-                                         State.Line->InPPDirective, Encoding));
+    Token.reset(new BreakableLineComment(
+        Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
  } else {
    return 0;
  }
--- a/clang/lib/Format/Encoding.h
+++ b/clang/lib/Format/Encoding.h
@ -18,6 +18,7 @@

 #include "clang/Basic/LLVM.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Unicode.h"

 namespace clang {
 namespace format {
@ -57,6 +58,37 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) {
  }
 }

+/// \brief Returns the number of columns required to display the \p Text on a
+/// generic Unicode-capable terminal. Text is assumed to use the specified
+/// \p Encoding.
+inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
+  if (Encoding == Encoding_UTF8) {
+    int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
+    if (ContentWidth >= 0)
+      return ContentWidth;
+  }
+  return Text.size();
+}
+
+/// \brief Returns the number of columns required to display the \p Text,
+/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
+/// text is assumed to use the specified \p Encoding.
+inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
+                                    unsigned TabWidth, Encoding Encoding) {
+  unsigned TotalWidth = 0;
+  StringRef Tail = Text;
+  for (;;) {
+    StringRef::size_type TabPos = Tail.find('\t');
+    if (TabPos == StringRef::npos)
+      return TotalWidth + columnWidth(Tail, Encoding);
+    int Width = columnWidth(Tail.substr(0, TabPos), Encoding);
+    assert(Width >= 0);
+    TotalWidth += Width;
+    TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
+    Tail = Tail.substr(TabPos + 1);
+  }
+}
+
 /// \brief Gets the number of bytes in a sequence representing a single
 /// codepoint and starting with FirstChar in the specified Encoding.
 inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@ -136,6 +136,7 @@ template <> struct MappingTraits<clang::format::FormatStyle> {
    IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
    IO.mapOptional("Standard", Style.Standard);
    IO.mapOptional("IndentWidth", Style.IndentWidth);
+    IO.mapOptional("TabWidth", Style.TabWidth);
    IO.mapOptional("UseTab", Style.UseTab);
    IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
    IO.mapOptional("IndentFunctionDeclarationAfterType",
@ -184,6 +185,7 @@ FormatStyle getLLVMStyle() {
  LLVMStyle.IndentCaseLabels = false;
  LLVMStyle.IndentFunctionDeclarationAfterType = false;
  LLVMStyle.IndentWidth = 2;
+  LLVMStyle.TabWidth = 8;
  LLVMStyle.MaxEmptyLinesToKeep = 1;
  LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
@ -225,6 +227,7 @@ FormatStyle getGoogleStyle() {
  GoogleStyle.IndentCaseLabels = true;
  GoogleStyle.IndentFunctionDeclarationAfterType = true;
  GoogleStyle.IndentWidth = 2;
+  GoogleStyle.TabWidth = 8;
  GoogleStyle.MaxEmptyLinesToKeep = 1;
  GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
@ -629,7 +632,7 @@ private:
          ++Column;
          break;
        case '\t':
-          Column += Style.IndentWidth - Column % Style.IndentWidth;
+          Column += Style.TabWidth - Column % Style.TabWidth;
          break;
        default:
          ++Column;
@ -681,10 +684,12 @@ private:
      StringRef Text = FormatTok->TokenText;
      size_t FirstNewlinePos = Text.find('\n');
      if (FirstNewlinePos != StringRef::npos) {
-        FormatTok->CodePointsInFirstLine = encoding::getCodePointCount(
-            Text.substr(0, FirstNewlinePos), Encoding);
-        FormatTok->CodePointsInLastLine = encoding::getCodePointCount(
-            Text.substr(Text.find_last_of('\n') + 1), Encoding);
+        // FIXME: Handle embedded tabs.
+        FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs(
+            Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding);
+        FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+            Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
+            Encoding);
      }
    }
    // FIXME: Add the CodePointCount to Column.
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@ -83,7 +83,7 @@ class AnnotatedLine;
 struct FormatToken {
  FormatToken()
      : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
-        CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
+        CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
        IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
        BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
        CanBreakBefore(false), ClosesTemplateDeclaration(false),
@ -120,15 +120,15 @@ struct FormatToken {
  /// \brief Contains the number of code points in the first line of a
  /// multi-line string literal or comment. Zero if there's no newline in the
  /// token.
-  unsigned CodePointsInFirstLine;
+  unsigned FirstLineColumnWidth;

  /// \brief Contains the number of code points in the last line of a
  /// multi-line string literal or comment. Can be zero for line comments.
-  unsigned CodePointsInLastLine;
+  unsigned LastLineColumnWidth;

  /// \brief Returns \c true if the token text contains newlines (escaped or
  /// not).
-  bool isMultiline() const { return CodePointsInFirstLine != 0; }
+  bool isMultiline() const { return FirstLineColumnWidth != 0; }

  /// \brief Indicates that this is the first token.
  bool IsFirst;
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@ -272,8 +272,8 @@ std::string WhitespaceManager::getIndentText(unsigned Spaces) {
  if (!Style.UseTab)
    return std::string(Spaces, ' ');

-  return std::string(Spaces / Style.IndentWidth, '\t') +
-         std::string(Spaces % Style.IndentWidth, ' ');
+  return std::string(Spaces / Style.TabWidth, '\t') +
+         std::string(Spaces % Style.TabWidth, ' ');
 }

 } // namespace format
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@ -5638,9 +5638,41 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
                   "}",
                   21, 0, Tab));

+  Tab.TabWidth = 4;
+  Tab.IndentWidth = 8;
+  verifyFormat("class TabWidth4Indent8 {\n"
+               "\t\tvoid f() {\n"
+               "\t\t\t\tsomeFunction(parameter1,\n"
+               "\t\t\t\t\t\t\t parameter2);\n"
+               "\t\t}\n"
+               "};",
+               Tab);
+
+  Tab.TabWidth = 4;
+  Tab.IndentWidth = 4;
+  verifyFormat("class TabWidth4Indent4 {\n"
+               "\tvoid f() {\n"
+               "\t\tsomeFunction(parameter1,\n"
+               "\t\t\t\t\t parameter2);\n"
+               "\t}\n"
+               "};",
+               Tab);
+
+  Tab.TabWidth = 8;
+  Tab.IndentWidth = 4;
+  verifyFormat("class TabWidth8Indent4 {\n"
+               "    void f() {\n"
+               "\tsomeFunction(parameter1,\n"
+               "\t\t     parameter2);\n"
+               "    }\n"
+               "};",
+               Tab);
+
  // FIXME: To correctly count mixed whitespace we need to
  // also correctly count mixed whitespace in front of the comment.
-  //
+
+  // Tab.TabWidth = 8;
+  // Tab.IndentWidth = 8;
  // EXPECT_EQ("/*\n"
  //           "\t      a\t\tcomment\n"
  //           "\t      in multiple lines\n"
@ -6074,15 +6106,15 @@ TEST_F(FormatTest, CountsUTF8CharactersProperly) {
  verifyFormat("\"Однажды в студёную зимнюю пору...\"",
               getLLVMStyleWithColumns(35));
  verifyFormat("\"一 二 三 四 五 六 七 八 九 十\"",
-               getLLVMStyleWithColumns(21));
+               getLLVMStyleWithColumns(31));
  verifyFormat("// Однажды в студёную зимнюю пору...",
               getLLVMStyleWithColumns(36));
  verifyFormat("// 一 二 三 四 五 六 七 八 九 十",
-               getLLVMStyleWithColumns(22));
+               getLLVMStyleWithColumns(32));
  verifyFormat("/* Однажды в студёную зимнюю пору... */",
               getLLVMStyleWithColumns(39));
  verifyFormat("/* 一 二 三 四 五 六 七 八 九 十 */",
-               getLLVMStyleWithColumns(25));
+               getLLVMStyleWithColumns(35));
 }

 TEST_F(FormatTest, SplitsUTF8Strings) {
@ -6093,11 +6125,29 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
      "\"пору,\"",
      format("\"Однажды, в студёную зимнюю пору,\"",
             getLLVMStyleWithColumns(13)));
-  EXPECT_EQ("\"一 二 三 四 \"\n"
-            "\"五 六 七 八 \"\n"
-            "\"九 十\"",
-            format("\"一 二 三 四 五 六 七 八 九 十\"",
-                   getLLVMStyleWithColumns(10)));
+  EXPECT_EQ("\"一 二 三 \"\n"
+            "\"四 五六 \"\n"
+            "\"七 八 九 \"\n"
+            "\"十\"",
+            format("\"一 二 三 四 五六 七 八 九 十\"",
+                   getLLVMStyleWithColumns(11)));
+  EXPECT_EQ("\"一\t二 \"\n"
+            "\"\t三 \"\n"
+            "\"四 五\t六 \"\n"
+            "\"\t七 \"\n"
+            "\"八九十\tqq\"",
+            format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
+                   getLLVMStyleWithColumns(11)));
+}
+
+
+TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) {
+  EXPECT_EQ("const char *sssss =\n"
+            "    \"一二三四五六七八\\\n"
+            " 九 十\";",
+            format("const char *sssss = \"一二三四五六七八\\\n"
+                   " 九 十\";",
+                   getLLVMStyleWithColumns(30)));
 }

 TEST_F(FormatTest, SplitsUTF8LineComments) {
@ -6109,9 +6159,9 @@ TEST_F(FormatTest, SplitsUTF8LineComments) {
                   getLLVMStyleWithColumns(13)));
  EXPECT_EQ("// 一二三\n"
            "// 四五六七\n"
-            "// 八\n"
-            "// 九 十",
-            format("// 一二三 四五六七 八  九 十", getLLVMStyleWithColumns(6)));
+            "// 八  九\n"
+            "// 十",
+            format("// 一二三 四五六七 八  九 十", getLLVMStyleWithColumns(9)));
 }

 TEST_F(FormatTest, SplitsUTF8BlockComments) {
@ -6126,18 +6176,20 @@ TEST_F(FormatTest, SplitsUTF8BlockComments) {
            format("/* Гляжу, поднимается медленно в гору\n"
                   " * Лошадка, везущая хворосту воз. */",
                   getLLVMStyleWithColumns(13)));
-  EXPECT_EQ("/* 一二三\n"
-            " * 四五六七\n"
-            " * 八\n"
-            " * 九 十\n"
-            " */",
-            format("/* 一二三 四五六七 八  九 十 */", getLLVMStyleWithColumns(6)));
+  EXPECT_EQ(
+      "/* 一二三\n"
+      " * 四五六七\n"
+      " * 八  九\n"
+      " * 十  */",
+      format("/* 一二三 四五六七 八  九 十  */", getLLVMStyleWithColumns(9)));
  EXPECT_EQ("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯\n"
            " * 𝕓𝕪𝕥𝕖\n"
            " * 𝖀𝕿𝕱-𝟠 */",
            format("/* 𝓣𝓮𝓼𝓽 𝔣𝔬𝔲𝔯 𝕓𝕪𝕥𝕖 𝖀𝕿𝕱-𝟠 */", getLLVMStyleWithColumns(12)));
 }

+#endif // _MSC_VER
+
 TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
  FormatStyle Style = getLLVMStyle();

@ -6185,8 +6237,6 @@ TEST_F(FormatTest, ConstructorInitializerIndentWidth) {
               Style);
 }

-#endif
-
 TEST_F(FormatTest, FormatsWithWebKitStyle) {
  FormatStyle Style = getWebKitStyle();