Move UTF functions into namespace llvm.

Summary: This lets people link against LLVM and their own version of the UTF library. I determined this only affects llvm, clang, lld, and lldb by running $ git grep -wl 'UTF[0-9]\+\|\bConvertUTF\bisLegalUTF\|getNumBytesFor' | cut -f 1 -d '/' | sort | uniq clang lld lldb llvm Tested with ninja lldb ninja check-clang check-llvm check-lld (ninja check-lldb doesn't complete for me with or without this patch.) Reviewers: rnk Subscribers: klimek, beanz, mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D24996 llvm-svn: 282822
2016-09-30 00:38:45 +00:00 · 2016-09-30 00:38:45 +00:00 · 9091055efa
parent 951c6b1955
commit 9091055efa
13 changed files with 114 additions and 114 deletions
--- a/clang/lib/Analysis/FormatString.cpp
+++ b/clang/lib/Analysis/FormatString.cpp
@ -266,14 +266,15 @@ bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
  if (SpecifierBegin + 1 >= FmtStrEnd)
    return false;

-  const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
-  const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+  const llvm::UTF8 *SB =
+      reinterpret_cast<const llvm::UTF8 *>(SpecifierBegin + 1);
+  const llvm::UTF8 *SE = reinterpret_cast<const llvm::UTF8 *>(FmtStrEnd);
  const char FirstByte = *SB;

  // If the invalid specifier is a multibyte UTF-8 string, return the
  // total length accordingly so that the conversion specifier can be
  // properly updated to reflect a complete UTF-8 specifier.
-  unsigned NumBytes = getNumBytesForUTF8(FirstByte);
+  unsigned NumBytes = llvm::getNumBytesForUTF8(FirstByte);
  if (NumBytes == 1)
    return false;
  if (SB + NumBytes > SE)
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@ -3136,13 +3136,12 @@ GetConstantCFStringEntry(llvm::StringMap<llvm::GlobalVariable *> &Map,
  // Otherwise, convert the UTF8 literals into a string of shorts.
  IsUTF16 = true;

-  SmallVector<UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
-  const UTF8 *FromPtr = (const UTF8 *)String.data();
-  UTF16 *ToPtr = &ToBuf[0];
+  SmallVector<llvm::UTF16, 128> ToBuf(NumBytes + 1); // +1 for ending nulls.
+  const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data();
+  llvm::UTF16 *ToPtr = &ToBuf[0];

-  (void)ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
-                           &ToPtr, ToPtr + NumBytes,
-                           strictConversion);
+  (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr,
+                                 ToPtr + NumBytes, llvm::strictConversion);

  // ConvertUTF8toUTF16 returns the length in ToPtr.
  StringLength = ToPtr - &ToBuf[0];
--- a/clang/lib/Format/Encoding.h
+++ b/clang/lib/Format/Encoding.h
@ -33,16 +33,17 @@ enum Encoding {
 /// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8,
 /// it is considered UTF8, otherwise we treat it as some 8-bit encoding.
 inline Encoding detectEncoding(StringRef Text) {
-  const UTF8 *Ptr = reinterpret_cast<const UTF8 *>(Text.begin());
-  const UTF8 *BufEnd = reinterpret_cast<const UTF8 *>(Text.end());
-  if (::isLegalUTF8String(&Ptr, BufEnd))
+  const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin());
+  const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end());
+  if (llvm::isLegalUTF8String(&Ptr, BufEnd))
    return Encoding_UTF8;
  return Encoding_Unknown;
 }

 inline unsigned getCodePointCountUTF8(StringRef Text) {
  unsigned CodePoints = 0;
-  for (size_t i = 0, e = Text.size(); i < e; i += getNumBytesForUTF8(Text[i])) {
+  for (size_t i = 0, e = Text.size(); i < e;
+       i += llvm::getNumBytesForUTF8(Text[i])) {
    ++CodePoints;
  }
  return CodePoints;
@ -97,7 +98,7 @@ inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
 inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
  switch (Encoding) {
  case Encoding_UTF8:
-    return getNumBytesForUTF8(FirstChar);
+    return llvm::getNumBytesForUTF8(FirstChar);
  default:
    return 1;
  }
@ -136,7 +137,7 @@ inline unsigned getEscapeSequenceLength(StringRef Text) {
        ++I;
      return I;
    }
-    return 1 + getNumBytesForUTF8(Text[1]);
+    return 1 + llvm::getNumBytesForUTF8(Text[1]);
  }
 }

--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@ -119,16 +119,17 @@ printableTextForNextCharacter(StringRef SourceLine, size_t *i,
  begin = reinterpret_cast<unsigned char const *>(&*(SourceLine.begin() + *i));
  end = begin + (SourceLine.size() - *i);
  
-  if (isLegalUTF8Sequence(begin, end)) {
-    UTF32 c;
-    UTF32 *cptr = &c;
+  if (llvm::isLegalUTF8Sequence(begin, end)) {
+    llvm::UTF32 c;
+    llvm::UTF32 *cptr = &c;
    unsigned char const *original_begin = begin;
-    unsigned char const *cp_end = begin+getNumBytesForUTF8(SourceLine[*i]);
+    unsigned char const *cp_end =
+        begin + llvm::getNumBytesForUTF8(SourceLine[*i]);

-    ConversionResult res = ConvertUTF8toUTF32(&begin, cp_end, &cptr, cptr+1,
-                                              strictConversion);
+    llvm::ConversionResult res = llvm::ConvertUTF8toUTF32(
+        &begin, cp_end, &cptr, cptr + 1, llvm::strictConversion);
    (void)res;
-    assert(conversionOK==res);
+    assert(llvm::conversionOK == res);
    assert(0 < begin-original_begin
           && "we must be further along in the string now");
    *i += begin-original_begin;
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@ -1485,13 +1485,13 @@ bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,

 bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
  const char *UnicodePtr = CurPtr;
-  UTF32 CodePoint;
-  ConversionResult Result =
-      llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
-                                (const UTF8 *)BufferEnd,
+  llvm::UTF32 CodePoint;
+  llvm::ConversionResult Result =
+      llvm::convertUTF8Sequence((const llvm::UTF8 **)&UnicodePtr,
+                                (const llvm::UTF8 *)BufferEnd,
                                &CodePoint,
-                                strictConversion);
-  if (Result != conversionOK ||
+                                llvm::strictConversion);
+  if (Result != llvm::conversionOK ||
      !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
    return false;

@ -3625,17 +3625,17 @@ LexNextToken:
      break;
    }

-    UTF32 CodePoint;
+    llvm::UTF32 CodePoint;

    // We can't just reset CurPtr to BufferPtr because BufferPtr may point to
    // an escaped newline.
    --CurPtr;
-    ConversionResult Status =
-        llvm::convertUTF8Sequence((const UTF8 **)&CurPtr,
-                                  (const UTF8 *)BufferEnd,
+    llvm::ConversionResult Status =
+        llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr,
+                                  (const llvm::UTF8 *)BufferEnd,
                                  &CodePoint,
-                                  strictConversion);
-    if (Status == conversionOK) {
+                                  llvm::strictConversion);
+    if (Status == llvm::conversionOK) {
      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
          return true; // KeepWhitespaceMode
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@ -402,7 +402,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
  if (CharByteWidth == 4) {
    // FIXME: Make the type of the result buffer correct instead of
    // using reinterpret_cast.
-    UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf);
+    llvm::UTF32 *ResultPtr = reinterpret_cast<llvm::UTF32*>(ResultBuf);
    *ResultPtr = UcnVal;
    ResultBuf += 4;
    return;
@ -411,7 +411,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
  if (CharByteWidth == 2) {
    // FIXME: Make the type of the result buffer correct instead of
    // using reinterpret_cast.
-    UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf);
+    llvm::UTF16 *ResultPtr = reinterpret_cast<llvm::UTF16*>(ResultBuf);

    if (UcnVal <= (UTF32)0xFFFF) {
      *ResultPtr = UcnVal;
@ -1114,11 +1114,11 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,

      char const *tmp_in_start = start;
      uint32_t *tmp_out_start = buffer_begin;
-      ConversionResult res =
-          ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
-                             reinterpret_cast<UTF8 const *>(begin),
-                             &buffer_begin, buffer_end, strictConversion);
-      if (res != conversionOK) {
+      llvm::ConversionResult res =
+          llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
+                             reinterpret_cast<llvm::UTF8 const *>(begin),
+                             &buffer_begin, buffer_end, llvm::strictConversion);
+      if (res != llvm::conversionOK) {
        // If we see bad encoding for unprefixed character literals, warn and
        // simply copy the byte values, for compatibility with gcc and
        // older versions of clang.
@ -1510,13 +1510,13 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
        if (CharByteWidth == 4) {
          // FIXME: Make the type of the result buffer correct instead of
          // using reinterpret_cast.
-          UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr);
+          llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultPtr);
          *ResultWidePtr = ResultChar;
          ResultPtr += 4;
        } else if (CharByteWidth == 2) {
          // FIXME: Make the type of the result buffer correct instead of
          // using reinterpret_cast.
-          UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr);
+          llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultPtr);
          *ResultWidePtr = ResultChar & 0xFFFF;
          ResultPtr += 2;
        } else {
@ -1531,12 +1531,12 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
    if (CharByteWidth == 4) {
      // FIXME: Make the type of the result buffer correct instead of
      // using reinterpret_cast.
-      UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultBuf.data());
+      llvm::UTF32 *ResultWidePtr = reinterpret_cast<llvm::UTF32*>(ResultBuf.data());
      ResultWidePtr[0] = GetNumStringChars() - 1;
    } else if (CharByteWidth == 2) {
      // FIXME: Make the type of the result buffer correct instead of
      // using reinterpret_cast.
-      UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultBuf.data());
+      llvm::UTF16 *ResultWidePtr = reinterpret_cast<llvm::UTF16*>(ResultBuf.data());
      ResultWidePtr[0] = GetNumStringChars() - 1;
    } else {
      assert(CharByteWidth == 1 && "Unexpected char width");
@ -1570,7 +1570,7 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
 static const char *resyncUTF8(const char *Err, const char *End) {
  if (Err == End)
    return End;
-  End = Err + std::min<unsigned>(getNumBytesForUTF8(*Err), End-Err);
+  End = Err + std::min<unsigned>(llvm::getNumBytesForUTF8(*Err), End-Err);
  while (++Err != End && (*Err & 0xC0) == 0x80)
    ;
  return Err;
@ -1582,7 +1582,7 @@ static const char *resyncUTF8(const char *Err, const char *End) {
 bool StringLiteralParser::CopyStringFragment(const Token &Tok,
                                             const char *TokBegin,
                                             StringRef Fragment) {
-  const UTF8 *ErrorPtrTmp;
+  const llvm::UTF8 *ErrorPtrTmp;
  if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
    return false;

--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@ -3262,15 +3262,15 @@ bool Sema::CheckObjCString(Expr *Arg) {
  if (Literal->containsNonAsciiOrNull()) {
    StringRef String = Literal->getString();
    unsigned NumBytes = String.size();
-    SmallVector<UTF16, 128> ToBuf(NumBytes);
-    const UTF8 *FromPtr = (const UTF8 *)String.data();
-    UTF16 *ToPtr = &ToBuf[0];
-    
-    ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
-                                                 &ToPtr, ToPtr + NumBytes,
-                                                 strictConversion);
+    SmallVector<llvm::UTF16, 128> ToBuf(NumBytes);
+    const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data();
+    llvm::UTF16 *ToPtr = &ToBuf[0];
+
+    llvm::ConversionResult Result =
+        llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr,
+                                 ToPtr + NumBytes, llvm::strictConversion);
    // Check for conversion failure.
-    if (Result != conversionOK)
+    if (Result != llvm::conversionOK)
      Diag(Arg->getLocStart(),
           diag::warn_cfstring_truncated) << Arg->getSourceRange();
  }
@ -4777,16 +4777,16 @@ CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex,
  // hex value.
  std::string CodePointStr;
  if (!llvm::sys::locale::isPrint(*csStart)) {
-    UTF32 CodePoint;
-    const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart);
-    const UTF8 *E =
-        reinterpret_cast<const UTF8 *>(csStart + csLen);
-    ConversionResult Result =
-        llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion);
+    llvm::UTF32 CodePoint;
+    const llvm::UTF8 **B = reinterpret_cast<const llvm::UTF8 **>(&csStart);
+    const llvm::UTF8 *E =
+        reinterpret_cast<const llvm::UTF8 *>(csStart + csLen);
+    llvm::ConversionResult Result =
+        llvm::convertUTF8Sequence(B, E, &CodePoint, llvm::strictConversion);

-    if (Result != conversionOK) {
+    if (Result != llvm::conversionOK) {
      unsigned char FirstChar = *csStart;
-      CodePoint = (UTF32)FirstChar;
+      CodePoint = (llvm::UTF32)FirstChar;
    }

    llvm::raw_string_ostream OS(CodePointStr);
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@ -3070,8 +3070,9 @@ static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source,
                                    SmallString<32> &Target) {
  Target.resize(CharByteWidth * (Source.size() + 1));
  char *ResultPtr = &Target[0];
-  const UTF8 *ErrorPtr;
-  bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr);
+  const llvm::UTF8 *ErrorPtr;
+  bool success =
+      llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr);
  (void)success;
  assert(success);
  Target.resize(ResultPtr - &Target[0]);
--- a/lldb/source/DataFormatters/StringPrinter.cpp
+++ b/lldb/source/DataFormatters/StringPrinter.cpp
@ -133,7 +133,7 @@ GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
                                                         uint8_t *&next) {
  StringPrinter::StringPrinterBufferPointer<> retval{nullptr};

-  unsigned utf8_encoded_len = getNumBytesForUTF8(*buffer);
+  unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);

  if (1 + buffer_end - buffer < utf8_encoded_len) {
    // I don't have enough bytes - print whatever I have left
@ -266,9 +266,10 @@ StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) {
 // use this call if you already have an LLDB-side buffer for the data
 template <typename SourceDataType>
 static bool DumpUTFBufferToStream(
-    ConversionResult (*ConvertFunction)(const SourceDataType **,
-                                        const SourceDataType *, UTF8 **, UTF8 *,
-                                        ConversionFlags),
+    llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
+                                              const SourceDataType *,
+                                              llvm::UTF8 **, llvm::UTF8 *,
+                                              llvm::ConversionFlags),
    const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) {
  Stream &stream(*dump_options.GetStream());
  if (dump_options.GetPrefixToken() != 0)
@ -303,30 +304,29 @@ static bool DumpUTFBufferToStream(
    }

    lldb::DataBufferSP utf8_data_buffer_sp;
-    UTF8 *utf8_data_ptr = nullptr;
-    UTF8 *utf8_data_end_ptr = nullptr;
+    llvm::UTF8 *utf8_data_ptr = nullptr;
+    llvm::UTF8 *utf8_data_end_ptr = nullptr;

    if (ConvertFunction) {
      utf8_data_buffer_sp.reset(new DataBufferHeap(4 * bufferSPSize, 0));
-      utf8_data_ptr = (UTF8 *)utf8_data_buffer_sp->GetBytes();
+      utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
      utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
      ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr,
-                      utf8_data_end_ptr, lenientConversion);
+                      utf8_data_end_ptr, llvm::lenientConversion);
      if (false == zero_is_terminator)
        utf8_data_end_ptr = utf8_data_ptr;
+      // needed because the ConvertFunction will change the value of the
+      // data_ptr.
      utf8_data_ptr =
-          (UTF8 *)utf8_data_buffer_sp->GetBytes(); // needed because the
-                                                   // ConvertFunction will
-                                                   // change the value of the
-                                                   // data_ptr
+          (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
    } else {
      // just copy the pointers - the cast is necessary to make the compiler
      // happy
      // but this should only happen if we are reading UTF8 data
-      utf8_data_ptr =
-          const_cast<UTF8 *>(reinterpret_cast<const UTF8 *>(data_ptr));
-      utf8_data_end_ptr =
-          const_cast<UTF8 *>(reinterpret_cast<const UTF8 *>(data_end_ptr));
+      utf8_data_ptr = const_cast<llvm::UTF8 *>(
+          reinterpret_cast<const llvm::UTF8 *>(data_ptr));
+      utf8_data_end_ptr = const_cast<llvm::UTF8 *>(
+          reinterpret_cast<const llvm::UTF8 *>(data_end_ptr));
    }

    const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
@ -512,9 +512,10 @@ bool StringPrinter::ReadStringAndDumpToStream<
 template <typename SourceDataType>
 static bool ReadUTFBufferAndDumpToStream(
    const StringPrinter::ReadStringAndDumpToStreamOptions &options,
-    ConversionResult (*ConvertFunction)(const SourceDataType **,
-                                        const SourceDataType *, UTF8 **, UTF8 *,
-                                        ConversionFlags)) {
+    llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
+                                              const SourceDataType *,
+                                              llvm::UTF8 **, llvm::UTF8 *,
+                                              llvm::ConversionFlags)) {
  assert(options.GetStream() && "need a Stream to print the string to");

  if (options.GetLocation() == 0 ||
@ -591,21 +592,23 @@ template <>
 bool StringPrinter::ReadStringAndDumpToStream<
    StringPrinter::StringElementType::UTF8>(
    const ReadStringAndDumpToStreamOptions &options) {
-  return ReadUTFBufferAndDumpToStream<UTF8>(options, nullptr);
+  return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr);
 }

 template <>
 bool StringPrinter::ReadStringAndDumpToStream<
    StringPrinter::StringElementType::UTF16>(
    const ReadStringAndDumpToStreamOptions &options) {
-  return ReadUTFBufferAndDumpToStream<UTF16>(options, ConvertUTF16toUTF8);
+  return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options,
+                                                   llvm::ConvertUTF16toUTF8);
 }

 template <>
 bool StringPrinter::ReadStringAndDumpToStream<
    StringPrinter::StringElementType::UTF32>(
    const ReadStringAndDumpToStreamOptions &options) {
-  return ReadUTFBufferAndDumpToStream<UTF32>(options, ConvertUTF32toUTF8);
+  return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options,
+                                                   llvm::ConvertUTF32toUTF8);
 }

 template <>
@ -614,7 +617,7 @@ bool StringPrinter::ReadBufferAndDumpToStream<
    const ReadBufferAndDumpToStreamOptions &options) {
  assert(options.GetStream() && "need a Stream to print the string to");

-  return DumpUTFBufferToStream<UTF8>(nullptr, options);
+  return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options);
 }

 template <>
@ -632,7 +635,7 @@ bool StringPrinter::ReadBufferAndDumpToStream<
    const ReadBufferAndDumpToStreamOptions &options) {
  assert(options.GetStream() && "need a Stream to print the string to");

-  return DumpUTFBufferToStream(ConvertUTF16toUTF8, options);
+  return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options);
 }

 template <>
@ -641,7 +644,7 @@ bool StringPrinter::ReadBufferAndDumpToStream<
    const ReadBufferAndDumpToStreamOptions &options) {
  assert(options.GetStream() && "need a Stream to print the string to");

-  return DumpUTFBufferToStream(ConvertUTF32toUTF8, options);
+  return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options);
 }

 } // namespace formatters
--- a/lldb/source/Plugins/Process/minidump/MinidumpTypes.cpp
+++ b/lldb/source/Plugins/Process/minidump/MinidumpTypes.cpp
@ -49,7 +49,7 @@ lldb_private::minidump::parseMinidumpString(llvm::ArrayRef<uint8_t> &data) {
  if (error.Fail() || *source_length > data.size() || *source_length % 2 != 0)
    return llvm::None;

-  auto source_start = reinterpret_cast<const UTF16 *>(data.data());
+  auto source_start = reinterpret_cast<const llvm::UTF16 *>(data.data());
  // source_length is the length of the string in bytes
  // we need the length of the string in UTF-16 characters/code points (16 bits
  // per char)
@ -57,12 +57,12 @@ lldb_private::minidump::parseMinidumpString(llvm::ArrayRef<uint8_t> &data) {
  const auto source_end = source_start + (*source_length) / 2;
  // resize to worst case length
  result.resize(UNI_MAX_UTF8_BYTES_PER_CODE_POINT * (*source_length) / 2);
-  auto result_start = reinterpret_cast<UTF8 *>(&result[0]);
+  auto result_start = reinterpret_cast<llvm::UTF8 *>(&result[0]);
  const auto result_end = result_start + result.size();
-  ConvertUTF16toUTF8(&source_start, source_end, &result_start, result_end,
-                     strictConversion);
+  llvm::ConvertUTF16toUTF8(&source_start, source_end, &result_start, result_end,
+                           llvm::strictConversion);
  const auto result_size =
-      std::distance(reinterpret_cast<UTF8 *>(&result[0]), result_start);
+      std::distance(reinterpret_cast<llvm::UTF8 *>(&result[0]), result_start);
  result.resize(result_size); // shrink to actual length

  return result;
--- a/llvm/include/llvm/Support/ConvertUTF.h
+++ b/llvm/include/llvm/Support/ConvertUTF.h
@ -90,6 +90,14 @@
 #ifndef LLVM_SUPPORT_CONVERTUTF_H
 #define LLVM_SUPPORT_CONVERTUTF_H

+#include <string>
+#include <cstddef>
+
+// Wrap everything in namespace llvm so that programs can link with llvm and
+// their own version of the unicode libraries.
+
+namespace llvm {
+
 /* ---------------------------------------------------------------------
    The following 4 definitions are compiler-specific.
    The C standard does not guarantee that wchar_t has at least
@ -127,11 +135,6 @@ typedef enum {
  lenientConversion
 } ConversionFlags;

-/* This is for C++ and does no harm in C */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 ConversionResult ConvertUTF8toUTF16 (
  const UTF8** sourceStart, const UTF8* sourceEnd,
  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
@ -174,16 +177,9 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);

 unsigned getNumBytesForUTF8(UTF8 firstByte);

-#ifdef __cplusplus
-}
-
 /*************************************************************************/
 /* Below are LLVM-specific wrappers of the functions above. */

-#include <string>
-#include <cstddef>
-
-namespace llvm {
 template <typename T> class ArrayRef;
 template <typename T> class SmallVectorImpl;
 class StringRef;
@ -293,7 +289,3 @@ bool convertUTF8ToUTF16String(StringRef SrcUTF8,
 } /* end namespace llvm */

 #endif
-
-/* --------------------------------------------------------------------- */
-
-#endif
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@ -40,7 +40,7 @@ add_llvm_library(LLVMSupport
  COM.cpp
  CommandLine.cpp
  Compression.cpp
-  ConvertUTF.c
+  ConvertUTF.cpp
  ConvertUTFWrapper.cpp
  CrashRecoveryContext.cpp
  DataExtractor.cpp
--- a/llvm/lib/Support/ConvertUTF.cpp
+++ b/llvm/lib/Support/ConvertUTF.cpp
@ -53,6 +53,8 @@
 #endif
 #include <assert.h>

+namespace llvm {
+
 static const int halfShift  = 10; /* used for shifting by 10 bits */

 static const UTF32 halfBase = 0x0010000UL;
@ -62,8 +64,6 @@ static const UTF32 halfMask = 0x3FFUL;
 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
 #define UNI_SUR_LOW_START   (UTF32)0xDC00
 #define UNI_SUR_LOW_END     (UTF32)0xDFFF
-#define false      0
-#define true        1

 /* --------------------------------------------------------------------- */

@ -706,3 +706,5 @@ ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
    similarly unrolled loops.

   --------------------------------------------------------------------- */
+
+} // namespace llvm