llvm-project/lldb/source/DataFormatters/StringPrinter.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

640 lines
21 KiB
C++
Raw Normal View History

//===-- StringPrinter.cpp -------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "lldb/DataFormatters/StringPrinter.h"
#include "lldb/Core/Debugger.h"
#include "lldb/Core/ValueObject.h"
#include "lldb/Target/Language.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/Target.h"
#include "lldb/Utility/Status.h"
#include "llvm/Support/ConvertUTF.h"
#include <ctype.h>
#include <locale>
#include <memory>
using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::formatters;
// we define this for all values of type but only implement it for those we
// care about that's good because we get linker errors for any unsupported type
template <lldb_private::formatters::StringPrinter::StringElementType type>
static StringPrinter::StringPrinterBufferPointer
GetPrintableImpl(uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next);
// mimic isprint() for Unicode codepoints
static bool isprint(char32_t codepoint) {
if (codepoint <= 0x1F || codepoint == 0x7F) // C0
{
return false;
}
if (codepoint >= 0x80 && codepoint <= 0x9F) // C1
{
return false;
}
if (codepoint == 0x2028 || codepoint == 0x2029) // line/paragraph separators
{
return false;
}
if (codepoint == 0x200E || codepoint == 0x200F ||
(codepoint >= 0x202A &&
codepoint <= 0x202E)) // bidirectional text control
{
return false;
}
if (codepoint >= 0xFFF9 &&
codepoint <= 0xFFFF) // interlinears and generally specials
{
return false;
}
return true;
}
template <>
StringPrinter::StringPrinterBufferPointer
GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer,
uint8_t *buffer_end,
uint8_t *&next) {
StringPrinter::StringPrinterBufferPointer retval = {nullptr};
switch (*buffer) {
case 0:
retval = {"\\0", 2};
break;
case '\a':
retval = {"\\a", 2};
break;
case '\b':
retval = {"\\b", 2};
break;
case '\f':
retval = {"\\f", 2};
break;
case '\n':
retval = {"\\n", 2};
break;
case '\r':
retval = {"\\r", 2};
break;
case '\t':
retval = {"\\t", 2};
break;
case '\v':
retval = {"\\v", 2};
break;
case '\"':
retval = {"\\\"", 2};
break;
case '\\':
retval = {"\\\\", 2};
break;
default:
if (isprint(*buffer))
retval = {buffer, 1};
else {
uint8_t *data = new uint8_t[5];
sprintf((char *)data, "\\x%02x", *buffer);
retval = {data, 4, [](const uint8_t *c) { delete[] c; }};
break;
}
}
next = buffer + 1;
return retval;
}
static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1) {
return (c0 - 192) * 64 + (c1 - 128);
}
static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1,
unsigned char c2) {
return (c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128);
}
static char32_t ConvertUTF8ToCodePoint(unsigned char c0, unsigned char c1,
unsigned char c2, unsigned char c3) {
return (c0 - 240) * 262144 + (c2 - 128) * 4096 + (c2 - 128) * 64 + (c3 - 128);
}
template <>
StringPrinter::StringPrinterBufferPointer
GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
uint8_t *buffer_end,
uint8_t *&next) {
StringPrinter::StringPrinterBufferPointer retval{nullptr};
[lldb/StringPrinter] Avoid reading garbage in uninitialized strings This patch fixes a few related out-of-bounds read bugs in the string data formatters. These issues have to do with mishandling of un- initialized strings. These manifest as ASan exceptions when debugging a clang binary. The first issue was that the std::string formatter treated strings in "short mode" with length greater than the size of the inline buffer as valid. The second issue was that the StringPrinter facility did not check that a full utf8 codepoint sequence can be read from the buffer (i.e. there are some missing range checks). I took the opportunity here to delete some untested code that was meant to deal with invalid input and replace it with fail-on-invalid logic ([1][2][3]). This means we'll give up on formatting an invalid string instead of guessing our way through it. The third issue is that StringPrinter did not check that a utf8 sequence could actually be fully read from the string payload. This one is especially tricky as we may overflow the buffer pointer while reading the sequence. I also noticed that the std::string formatter would spew the raw version of the underlying ValueObject when garbage is detected. I've changed this to just print "Summary Unavailable" instead, as we do elsewhere. I've added regression tests for these issues to test/functionalities/data-formatter/data-formatter-stl/libcxx/string. [1] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L136 [2] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L163 [3] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L357 rdar://59080026 Differential Revision: https://reviews.llvm.org/D73860
2020-02-01 15:18:17 +08:00
const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
[lldb/StringPrinter] Avoid reading garbage in uninitialized strings This patch fixes a few related out-of-bounds read bugs in the string data formatters. These issues have to do with mishandling of un- initialized strings. These manifest as ASan exceptions when debugging a clang binary. The first issue was that the std::string formatter treated strings in "short mode" with length greater than the size of the inline buffer as valid. The second issue was that the StringPrinter facility did not check that a full utf8 codepoint sequence can be read from the buffer (i.e. there are some missing range checks). I took the opportunity here to delete some untested code that was meant to deal with invalid input and replace it with fail-on-invalid logic ([1][2][3]). This means we'll give up on formatting an invalid string instead of guessing our way through it. The third issue is that StringPrinter did not check that a utf8 sequence could actually be fully read from the string payload. This one is especially tricky as we may overflow the buffer pointer while reading the sequence. I also noticed that the std::string formatter would spew the raw version of the underlying ValueObject when garbage is detected. I've changed this to just print "Summary Unavailable" instead, as we do elsewhere. I've added regression tests for these issues to test/functionalities/data-formatter/data-formatter-stl/libcxx/string. [1] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L136 [2] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L163 [3] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L357 rdar://59080026 Differential Revision: https://reviews.llvm.org/D73860
2020-02-01 15:18:17 +08:00
// If the utf8 encoded length is invalid, or if there aren't enough bytes to
// print, this is some kind of corrupted string.
if (utf8_encoded_len == 0 || utf8_encoded_len > 4)
return retval;
if ((buffer_end - buffer) < utf8_encoded_len)
// There's no room in the buffer for the utf8 sequence.
return retval;
char32_t codepoint = 0;
switch (utf8_encoded_len) {
case 1:
// this is just an ASCII byte - ask ASCII
return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(
buffer, buffer_end, next);
case 2:
codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer,
(unsigned char)*(buffer + 1));
break;
case 3:
codepoint = ConvertUTF8ToCodePoint((unsigned char)*buffer,
(unsigned char)*(buffer + 1),
(unsigned char)*(buffer + 2));
break;
case 4:
codepoint = ConvertUTF8ToCodePoint(
(unsigned char)*buffer, (unsigned char)*(buffer + 1),
(unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
break;
}
if (codepoint) {
switch (codepoint) {
case 0:
retval = {"\\0", 2};
break;
case '\a':
retval = {"\\a", 2};
break;
case '\b':
retval = {"\\b", 2};
break;
case '\f':
retval = {"\\f", 2};
break;
case '\n':
retval = {"\\n", 2};
break;
case '\r':
retval = {"\\r", 2};
break;
case '\t':
retval = {"\\t", 2};
break;
case '\v':
retval = {"\\v", 2};
break;
case '\"':
retval = {"\\\"", 2};
break;
case '\\':
retval = {"\\\\", 2};
break;
default:
if (isprint(codepoint))
retval = {buffer, utf8_encoded_len};
else {
uint8_t *data = new uint8_t[11];
sprintf((char *)data, "\\U%08x", (unsigned)codepoint);
retval = {data, 10, [](const uint8_t *c) { delete[] c; }};
break;
}
}
next = buffer + utf8_encoded_len;
return retval;
}
[lldb/StringPrinter] Avoid reading garbage in uninitialized strings This patch fixes a few related out-of-bounds read bugs in the string data formatters. These issues have to do with mishandling of un- initialized strings. These manifest as ASan exceptions when debugging a clang binary. The first issue was that the std::string formatter treated strings in "short mode" with length greater than the size of the inline buffer as valid. The second issue was that the StringPrinter facility did not check that a full utf8 codepoint sequence can be read from the buffer (i.e. there are some missing range checks). I took the opportunity here to delete some untested code that was meant to deal with invalid input and replace it with fail-on-invalid logic ([1][2][3]). This means we'll give up on formatting an invalid string instead of guessing our way through it. The third issue is that StringPrinter did not check that a utf8 sequence could actually be fully read from the string payload. This one is especially tricky as we may overflow the buffer pointer while reading the sequence. I also noticed that the std::string formatter would spew the raw version of the underlying ValueObject when garbage is detected. I've changed this to just print "Summary Unavailable" instead, as we do elsewhere. I've added regression tests for these issues to test/functionalities/data-formatter/data-formatter-stl/libcxx/string. [1] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L136 [2] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L163 [3] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L357 rdar://59080026 Differential Revision: https://reviews.llvm.org/D73860
2020-02-01 15:18:17 +08:00
// We couldn't figure out how to print this string.
return retval;
}
// Given a sequence of bytes, this function returns: a sequence of bytes to
// actually print out + a length the following unscanned position of the buffer
// is in next
static StringPrinter::StringPrinterBufferPointer
GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer,
uint8_t *buffer_end, uint8_t *&next) {
[lldb/StringPrinter] Avoid reading garbage in uninitialized strings This patch fixes a few related out-of-bounds read bugs in the string data formatters. These issues have to do with mishandling of un- initialized strings. These manifest as ASan exceptions when debugging a clang binary. The first issue was that the std::string formatter treated strings in "short mode" with length greater than the size of the inline buffer as valid. The second issue was that the StringPrinter facility did not check that a full utf8 codepoint sequence can be read from the buffer (i.e. there are some missing range checks). I took the opportunity here to delete some untested code that was meant to deal with invalid input and replace it with fail-on-invalid logic ([1][2][3]). This means we'll give up on formatting an invalid string instead of guessing our way through it. The third issue is that StringPrinter did not check that a utf8 sequence could actually be fully read from the string payload. This one is especially tricky as we may overflow the buffer pointer while reading the sequence. I also noticed that the std::string formatter would spew the raw version of the underlying ValueObject when garbage is detected. I've changed this to just print "Summary Unavailable" instead, as we do elsewhere. I've added regression tests for these issues to test/functionalities/data-formatter/data-formatter-stl/libcxx/string. [1] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L136 [2] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L163 [3] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L357 rdar://59080026 Differential Revision: https://reviews.llvm.org/D73860
2020-02-01 15:18:17 +08:00
if (!buffer || buffer >= buffer_end)
return {nullptr};
switch (type) {
case StringPrinter::StringElementType::ASCII:
return GetPrintableImpl<StringPrinter::StringElementType::ASCII>(
buffer, buffer_end, next);
case StringPrinter::StringElementType::UTF8:
return GetPrintableImpl<StringPrinter::StringElementType::UTF8>(
buffer, buffer_end, next);
default:
return {nullptr};
}
}
StringPrinter::EscapingHelper
StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type) {
switch (elem_type) {
case GetPrintableElementType::UTF8:
return [](uint8_t *buffer, uint8_t *buffer_end,
uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer {
return GetPrintable(StringPrinter::StringElementType::UTF8, buffer,
buffer_end, next);
};
case GetPrintableElementType::ASCII:
return [](uint8_t *buffer, uint8_t *buffer_end,
uint8_t *&next) -> StringPrinter::StringPrinterBufferPointer {
return GetPrintable(StringPrinter::StringElementType::ASCII, buffer,
buffer_end, next);
};
}
llvm_unreachable("bad element type");
}
// use this call if you already have an LLDB-side buffer for the data
template <typename SourceDataType>
static bool DumpUTFBufferToStream(
llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
const SourceDataType *,
llvm::UTF8 **, llvm::UTF8 *,
llvm::ConversionFlags),
const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) {
Stream &stream(*dump_options.GetStream());
if (dump_options.GetPrefixToken() != nullptr)
stream.Printf("%s", dump_options.GetPrefixToken());
if (dump_options.GetQuote() != 0)
stream.Printf("%c", dump_options.GetQuote());
auto data(dump_options.GetData());
auto source_size(dump_options.GetSourceSize());
if (data.GetByteSize() && data.GetDataStart() && data.GetDataEnd()) {
const int bufferSPSize = data.GetByteSize();
if (dump_options.GetSourceSize() == 0) {
const int origin_encoding = 8 * sizeof(SourceDataType);
source_size = bufferSPSize / (origin_encoding / 4);
}
const SourceDataType *data_ptr =
(const SourceDataType *)data.GetDataStart();
const SourceDataType *data_end_ptr = data_ptr + source_size;
const bool zero_is_terminator = dump_options.GetBinaryZeroIsTerminator();
if (zero_is_terminator) {
while (data_ptr < data_end_ptr) {
if (!*data_ptr) {
data_end_ptr = data_ptr;
break;
}
data_ptr++;
}
data_ptr = (const SourceDataType *)data.GetDataStart();
}
lldb::DataBufferSP utf8_data_buffer_sp;
llvm::UTF8 *utf8_data_ptr = nullptr;
llvm::UTF8 *utf8_data_end_ptr = nullptr;
if (ConvertFunction) {
utf8_data_buffer_sp =
std::make_shared<DataBufferHeap>(4 * bufferSPSize, 0);
utf8_data_ptr = (llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
utf8_data_end_ptr = utf8_data_ptr + utf8_data_buffer_sp->GetByteSize();
ConvertFunction(&data_ptr, data_end_ptr, &utf8_data_ptr,
utf8_data_end_ptr, llvm::lenientConversion);
if (!zero_is_terminator)
utf8_data_end_ptr = utf8_data_ptr;
// needed because the ConvertFunction will change the value of the
// data_ptr.
utf8_data_ptr =
(llvm::UTF8 *)utf8_data_buffer_sp->GetBytes();
} else {
// just copy the pointers - the cast is necessary to make the compiler
// happy but this should only happen if we are reading UTF8 data
utf8_data_ptr = const_cast<llvm::UTF8 *>(
reinterpret_cast<const llvm::UTF8 *>(data_ptr));
utf8_data_end_ptr = const_cast<llvm::UTF8 *>(
reinterpret_cast<const llvm::UTF8 *>(data_end_ptr));
}
const bool escape_non_printables = dump_options.GetEscapeNonPrintables();
lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
if (escape_non_printables) {
if (Language *language = Language::FindPlugin(dump_options.GetLanguage()))
escaping_callback = language->GetStringPrinterEscapingHelper(
lldb_private::formatters::StringPrinter::GetPrintableElementType::
UTF8);
else
escaping_callback =
lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(
lldb_private::formatters::StringPrinter::
GetPrintableElementType::UTF8);
}
// since we tend to accept partial data (and even partially malformed data)
// we might end up with no NULL terminator before the end_ptr hence we need
// to take a slower route and ensure we stay within boundaries
for (; utf8_data_ptr < utf8_data_end_ptr;) {
if (zero_is_terminator && !*utf8_data_ptr)
break;
if (escape_non_printables) {
uint8_t *next_data = nullptr;
auto printable =
escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
auto printable_bytes = printable.GetBytes();
auto printable_size = printable.GetSize();
[lldb/StringPrinter] Avoid reading garbage in uninitialized strings This patch fixes a few related out-of-bounds read bugs in the string data formatters. These issues have to do with mishandling of un- initialized strings. These manifest as ASan exceptions when debugging a clang binary. The first issue was that the std::string formatter treated strings in "short mode" with length greater than the size of the inline buffer as valid. The second issue was that the StringPrinter facility did not check that a full utf8 codepoint sequence can be read from the buffer (i.e. there are some missing range checks). I took the opportunity here to delete some untested code that was meant to deal with invalid input and replace it with fail-on-invalid logic ([1][2][3]). This means we'll give up on formatting an invalid string instead of guessing our way through it. The third issue is that StringPrinter did not check that a utf8 sequence could actually be fully read from the string payload. This one is especially tricky as we may overflow the buffer pointer while reading the sequence. I also noticed that the std::string formatter would spew the raw version of the underlying ValueObject when garbage is detected. I've changed this to just print "Summary Unavailable" instead, as we do elsewhere. I've added regression tests for these issues to test/functionalities/data-formatter/data-formatter-stl/libcxx/string. [1] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L136 [2] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L163 [3] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L357 rdar://59080026 Differential Revision: https://reviews.llvm.org/D73860
2020-02-01 15:18:17 +08:00
// We failed to figure out how to print this string.
if (!printable_bytes || !next_data)
return false;
for (unsigned c = 0; c < printable_size; c++)
stream.Printf("%c", *(printable_bytes + c));
utf8_data_ptr = (uint8_t *)next_data;
} else {
stream.Printf("%c", *utf8_data_ptr);
utf8_data_ptr++;
}
}
}
if (dump_options.GetQuote() != 0)
stream.Printf("%c", dump_options.GetQuote());
if (dump_options.GetSuffixToken() != nullptr)
stream.Printf("%s", dump_options.GetSuffixToken());
if (dump_options.GetIsTruncated())
stream.Printf("...");
return true;
}
lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::
ReadStringAndDumpToStreamOptions(ValueObject &valobj)
: ReadStringAndDumpToStreamOptions() {
SetEscapeNonPrintables(
valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
}
lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
ReadBufferAndDumpToStreamOptions(ValueObject &valobj)
: ReadBufferAndDumpToStreamOptions() {
SetEscapeNonPrintables(
valobj.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
}
lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
ReadBufferAndDumpToStreamOptions(
const ReadStringAndDumpToStreamOptions &options)
: ReadBufferAndDumpToStreamOptions() {
SetStream(options.GetStream());
SetPrefixToken(options.GetPrefixToken());
SetSuffixToken(options.GetSuffixToken());
SetQuote(options.GetQuote());
SetEscapeNonPrintables(options.GetEscapeNonPrintables());
SetBinaryZeroIsTerminator(options.GetBinaryZeroIsTerminator());
SetLanguage(options.GetLanguage());
}
namespace lldb_private {
namespace formatters {
template <>
bool StringPrinter::ReadStringAndDumpToStream<
StringPrinter::StringElementType::ASCII>(
const ReadStringAndDumpToStreamOptions &options) {
assert(options.GetStream() && "need a Stream to print the string to");
Status my_error;
ProcessSP process_sp(options.GetProcessSP());
if (process_sp.get() == nullptr || options.GetLocation() == 0)
return false;
size_t size;
const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
bool is_truncated = false;
if (options.GetSourceSize() == 0)
size = max_size;
else if (!options.GetIgnoreMaxLength()) {
size = options.GetSourceSize();
if (size > max_size) {
size = max_size;
is_truncated = true;
}
} else
size = options.GetSourceSize();
lldb::DataBufferSP buffer_sp(new DataBufferHeap(size, 0));
process_sp->ReadCStringFromMemory(
options.GetLocation(), (char *)buffer_sp->GetBytes(), size, my_error);
if (my_error.Fail())
return false;
const char *prefix_token = options.GetPrefixToken();
char quote = options.GetQuote();
if (prefix_token != nullptr)
options.GetStream()->Printf("%s%c", prefix_token, quote);
else if (quote != 0)
options.GetStream()->Printf("%c", quote);
uint8_t *data_end = buffer_sp->GetBytes() + buffer_sp->GetByteSize();
const bool escape_non_printables = options.GetEscapeNonPrintables();
lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback;
if (escape_non_printables) {
if (Language *language = Language::FindPlugin(options.GetLanguage()))
escaping_callback = language->GetStringPrinterEscapingHelper(
lldb_private::formatters::StringPrinter::GetPrintableElementType::
ASCII);
else
escaping_callback =
lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(
lldb_private::formatters::StringPrinter::GetPrintableElementType::
ASCII);
}
// since we tend to accept partial data (and even partially malformed data)
// we might end up with no NULL terminator before the end_ptr hence we need
// to take a slower route and ensure we stay within boundaries
for (uint8_t *data = buffer_sp->GetBytes(); *data && (data < data_end);) {
if (escape_non_printables) {
uint8_t *next_data = nullptr;
auto printable = escaping_callback(data, data_end, next_data);
auto printable_bytes = printable.GetBytes();
auto printable_size = printable.GetSize();
[lldb/StringPrinter] Avoid reading garbage in uninitialized strings This patch fixes a few related out-of-bounds read bugs in the string data formatters. These issues have to do with mishandling of un- initialized strings. These manifest as ASan exceptions when debugging a clang binary. The first issue was that the std::string formatter treated strings in "short mode" with length greater than the size of the inline buffer as valid. The second issue was that the StringPrinter facility did not check that a full utf8 codepoint sequence can be read from the buffer (i.e. there are some missing range checks). I took the opportunity here to delete some untested code that was meant to deal with invalid input and replace it with fail-on-invalid logic ([1][2][3]). This means we'll give up on formatting an invalid string instead of guessing our way through it. The third issue is that StringPrinter did not check that a utf8 sequence could actually be fully read from the string payload. This one is especially tricky as we may overflow the buffer pointer while reading the sequence. I also noticed that the std::string formatter would spew the raw version of the underlying ValueObject when garbage is detected. I've changed this to just print "Summary Unavailable" instead, as we do elsewhere. I've added regression tests for these issues to test/functionalities/data-formatter/data-formatter-stl/libcxx/string. [1] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L136 [2] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L163 [3] http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L357 rdar://59080026 Differential Revision: https://reviews.llvm.org/D73860
2020-02-01 15:18:17 +08:00
// We failed to figure out how to print this string.
if (!printable_bytes || !next_data)
return false;
for (unsigned c = 0; c < printable_size; c++)
options.GetStream()->Printf("%c", *(printable_bytes + c));
data = (uint8_t *)next_data;
} else {
options.GetStream()->Printf("%c", *data);
data++;
}
}
const char *suffix_token = options.GetSuffixToken();
if (suffix_token != nullptr)
options.GetStream()->Printf("%c%s", quote, suffix_token);
else if (quote != 0)
options.GetStream()->Printf("%c", quote);
if (is_truncated)
options.GetStream()->Printf("...");
return true;
}
template <typename SourceDataType>
static bool ReadUTFBufferAndDumpToStream(
const StringPrinter::ReadStringAndDumpToStreamOptions &options,
llvm::ConversionResult (*ConvertFunction)(const SourceDataType **,
const SourceDataType *,
llvm::UTF8 **, llvm::UTF8 *,
llvm::ConversionFlags)) {
assert(options.GetStream() && "need a Stream to print the string to");
if (options.GetLocation() == 0 ||
options.GetLocation() == LLDB_INVALID_ADDRESS)
return false;
lldb::ProcessSP process_sp(options.GetProcessSP());
if (!process_sp)
return false;
const int type_width = sizeof(SourceDataType);
const int origin_encoding = 8 * type_width;
if (origin_encoding != 8 && origin_encoding != 16 && origin_encoding != 32)
return false;
// if not UTF8, I need a conversion function to return proper UTF8
if (origin_encoding != 8 && !ConvertFunction)
return false;
if (!options.GetStream())
return false;
uint32_t sourceSize = options.GetSourceSize();
bool needs_zero_terminator = options.GetNeedsZeroTermination();
bool is_truncated = false;
const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
if (!sourceSize) {
sourceSize = max_size;
needs_zero_terminator = true;
} else if (!options.GetIgnoreMaxLength()) {
if (sourceSize > max_size) {
sourceSize = max_size;
is_truncated = true;
}
}
const int bufferSPSize = sourceSize * type_width;
lldb::DataBufferSP buffer_sp(new DataBufferHeap(bufferSPSize, 0));
if (!buffer_sp->GetBytes())
return false;
Status error;
char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
if (needs_zero_terminator)
process_sp->ReadStringFromMemory(options.GetLocation(), buffer,
bufferSPSize, error, type_width);
else
process_sp->ReadMemoryFromInferior(options.GetLocation(),
(char *)buffer_sp->GetBytes(),
bufferSPSize, error);
if (error.Fail()) {
options.GetStream()->Printf("unable to read data");
return true;
}
DataExtractor data(buffer_sp, process_sp->GetByteOrder(),
process_sp->GetAddressByteSize());
StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
dump_options.SetData(data);
dump_options.SetSourceSize(sourceSize);
dump_options.SetIsTruncated(is_truncated);
return DumpUTFBufferToStream(ConvertFunction, dump_options);
}
template <>
bool StringPrinter::ReadStringAndDumpToStream<
StringPrinter::StringElementType::UTF8>(
const ReadStringAndDumpToStreamOptions &options) {
return ReadUTFBufferAndDumpToStream<llvm::UTF8>(options, nullptr);
}
template <>
bool StringPrinter::ReadStringAndDumpToStream<
StringPrinter::StringElementType::UTF16>(
const ReadStringAndDumpToStreamOptions &options) {
return ReadUTFBufferAndDumpToStream<llvm::UTF16>(options,
llvm::ConvertUTF16toUTF8);
}
template <>
bool StringPrinter::ReadStringAndDumpToStream<
StringPrinter::StringElementType::UTF32>(
const ReadStringAndDumpToStreamOptions &options) {
return ReadUTFBufferAndDumpToStream<llvm::UTF32>(options,
llvm::ConvertUTF32toUTF8);
}
template <>
bool StringPrinter::ReadBufferAndDumpToStream<
StringPrinter::StringElementType::UTF8>(
const ReadBufferAndDumpToStreamOptions &options) {
assert(options.GetStream() && "need a Stream to print the string to");
return DumpUTFBufferToStream<llvm::UTF8>(nullptr, options);
}
template <>
bool StringPrinter::ReadBufferAndDumpToStream<
StringPrinter::StringElementType::ASCII>(
const ReadBufferAndDumpToStreamOptions &options) {
// treat ASCII the same as UTF8
// FIXME: can we optimize ASCII some more?
return ReadBufferAndDumpToStream<StringElementType::UTF8>(options);
}
template <>
bool StringPrinter::ReadBufferAndDumpToStream<
StringPrinter::StringElementType::UTF16>(
const ReadBufferAndDumpToStreamOptions &options) {
assert(options.GetStream() && "need a Stream to print the string to");
return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8, options);
}
template <>
bool StringPrinter::ReadBufferAndDumpToStream<
StringPrinter::StringElementType::UTF32>(
const ReadBufferAndDumpToStreamOptions &options) {
assert(options.GetStream() && "need a Stream to print the string to");
return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8, options);
}
} // namespace formatters
} // namespace lldb_private