Share ConvertUTF8toWide() between Lex and CodeGen.

llvm-svn: 159634
This commit is contained in:
Nico Weber 2012-07-03 02:24:52 +00:00
parent b8124d1af1
commit 4b18c3ff40
5 changed files with 77 additions and 73 deletions

View File

@ -159,6 +159,25 @@ Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd);
#ifdef __cplusplus
}
/*************************************************************************/
/* Below are LLVM-specific wrappers of the functions above. */
#include "llvm/ADT/StringRef.h"
namespace clang {
/**
* Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
* WideCharWidth. The converted data is written to ResultPtr, which needs to
* point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
* ResultPtr will point one after the end of the copied string.
* \return true on success.
*/
bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
char *&ResultPtr);
}
#endif
#endif

View File

@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS mc)
add_clang_library(clangBasic
Builtins.cpp
ConvertUTF.c
ConvertUTFWrapper.cpp
Diagnostic.cpp
DiagnosticIDs.cpp
FileManager.cpp

View File

@ -0,0 +1,54 @@
//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "clang/Basic/ConvertUTF.h"
#include "clang/Basic/LLVM.h"
namespace clang {
bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source,
char *&ResultPtr) {
assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
ConversionResult result = conversionOK;
// Copy the character span over.
if (WideCharWidth == 1) {
if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Source.begin()),
reinterpret_cast<const UTF8*>(Source.end())))
result = sourceIllegal;
memcpy(ResultPtr, Source.data(), Source.size());
ResultPtr += Source.size();
} else if (WideCharWidth == 2) {
const UTF8 *sourceStart = (const UTF8*)Source.data();
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
ConversionFlags flags = strictConversion;
result = ConvertUTF8toUTF16(
&sourceStart, sourceStart + Source.size(),
&targetStart, targetStart + 2*Source.size(), flags);
if (result == conversionOK)
ResultPtr = reinterpret_cast<char*>(targetStart);
} else if (WideCharWidth == 4) {
const UTF8 *sourceStart = (const UTF8*)Source.data();
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
ConversionFlags flags = strictConversion;
result = ConvertUTF8toUTF32(
&sourceStart, sourceStart + Source.size(),
&targetStart, targetStart + 4*Source.size(), flags);
if (result == conversionOK)
ResultPtr = reinterpret_cast<char*>(targetStart);
}
assert((result != targetExhausted)
&& "ConvertUTF8toUTFXX exhausted target buffer");
return result == conversionOK;
}
}

View File

@ -1732,47 +1732,12 @@ GetAddrOfConstantWideString(StringRef Str,
return GV;
}
// FIXME: Mostly copied from StringLiteralParser::CopyStringFragment
static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source,
SmallString<32>& Target) {
Target.resize(CharByteWidth * (Source.size() + 1));
char* ResultPtr = &Target[0];
assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4);
ConversionResult result = conversionOK;
// Copy the character span over.
if (CharByteWidth == 1) {
if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(&*Source.begin()),
reinterpret_cast<const UTF8*>(&*Source.end())))
result = sourceIllegal;
memcpy(ResultPtr, Source.data(), Source.size());
ResultPtr += Source.size();
} else if (CharByteWidth == 2) {
UTF8 const *sourceStart = (UTF8 const *)Source.data();
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
ConversionFlags flags = strictConversion;
result = ConvertUTF8toUTF16(
&sourceStart,sourceStart + Source.size(),
&targetStart,targetStart + 2*Source.size(),flags);
if (result==conversionOK)
ResultPtr = reinterpret_cast<char*>(targetStart);
} else if (CharByteWidth == 4) {
UTF8 const *sourceStart = (UTF8 const *)Source.data();
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
ConversionFlags flags = strictConversion;
result = ConvertUTF8toUTF32(
&sourceStart,sourceStart + Source.size(),
&targetStart,targetStart + 4*Source.size(),flags);
if (result==conversionOK)
ResultPtr = reinterpret_cast<char*>(targetStart);
}
assert((result != targetExhausted)
&& "ConvertUTF8toUTFXX exhausted target buffer");
assert(result == conversionOK);
bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr);
assert(success);
Target.resize(ResultPtr - &Target[0]);
}

View File

@ -1330,45 +1330,10 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
}
}
/// copyStringFragment - This function copies from Start to End into ResultPtr.
/// Performs widening for multi-byte characters.
bool StringLiteralParser::CopyStringFragment(StringRef Fragment) {
assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4);
ConversionResult result = conversionOK;
// Copy the character span over.
if (CharByteWidth == 1) {
if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Fragment.begin()),
reinterpret_cast<const UTF8*>(Fragment.end())))
result = sourceIllegal;
memcpy(ResultPtr, Fragment.data(), Fragment.size());
ResultPtr += Fragment.size();
} else if (CharByteWidth == 2) {
UTF8 const *sourceStart = (UTF8 const *)Fragment.data();
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
ConversionFlags flags = strictConversion;
result = ConvertUTF8toUTF16(
&sourceStart,sourceStart + Fragment.size(),
&targetStart,targetStart + 2*Fragment.size(),flags);
if (result==conversionOK)
ResultPtr = reinterpret_cast<char*>(targetStart);
} else if (CharByteWidth == 4) {
UTF8 const *sourceStart = (UTF8 const *)Fragment.data();
// FIXME: Make the type of the result buffer correct instead of
// using reinterpret_cast.
UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
ConversionFlags flags = strictConversion;
result = ConvertUTF8toUTF32(
&sourceStart,sourceStart + Fragment.size(),
&targetStart,targetStart + 4*Fragment.size(),flags);
if (result==conversionOK)
ResultPtr = reinterpret_cast<char*>(targetStart);
}
assert((result != targetExhausted)
&& "ConvertUTF8toUTFXX exhausted target buffer");
return result != conversionOK;
return !ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr);
}
bool StringLiteralParser::DiagnoseBadString(const Token &Tok) {