forked from OSchip/llvm-project
llvm-undname: Improve string literal demangling with embedded \0 chars
- Don't assert when a string looks like a u32 string to the heuristic but doesn't have a length that's 0 mod 4. Instead, classify those as u16 with embedded \0 chars. Found by oss-fuzz. - Print embedded nul bytes as \0 instead of \x00. llvm-svn: 358835
This commit is contained in:
parent
f2654b638d
commit
8eeaf5178d
|
@ -1088,6 +1088,9 @@ static void outputHex(OutputStream &OS, unsigned C) {
|
|||
|
||||
static void outputEscapedChar(OutputStream &OS, unsigned C) {
|
||||
switch (C) {
|
||||
case '\0': // nul
|
||||
OS << "\\0";
|
||||
return;
|
||||
case '\'': // single quote
|
||||
OS << "\\\'";
|
||||
return;
|
||||
|
@ -1165,7 +1168,7 @@ static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
|
|||
// 2-byte, or 4-byte null terminator.
|
||||
if (NumBytes < 32) {
|
||||
unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
|
||||
if (TrailingNulls >= 4)
|
||||
if (TrailingNulls >= 4 && NumBytes % 4 == 0)
|
||||
return 4;
|
||||
if (TrailingNulls >= 2)
|
||||
return 2;
|
||||
|
@ -1179,7 +1182,7 @@ static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
|
|||
// perfect and is biased towards languages that have ascii alphabets, but this
|
||||
// was always going to be best effort since the encoding is lossy.
|
||||
unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
|
||||
if (Nulls >= 2 * NumChars / 3)
|
||||
if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
|
||||
return 4;
|
||||
if (Nulls >= NumChars / 3)
|
||||
return 2;
|
||||
|
|
|
@ -771,3 +771,13 @@
|
|||
|
||||
??_C@_0CG@HJGBPLNO@l?$AAo?$AAo?$AAk?$AAA?$AAh?$AAe?$AAa?$AAd?$AAH?$AAa?$AAr?$AAd?$AAB?$AAr?$AAe?$AA@
|
||||
; CHECK: u"lookAheadHardBre"...
|
||||
|
||||
|
||||
; These are u16 strings that the diagnostic would classify as u32 -- except
|
||||
; that their byte length % 4 is 2, so they can't be u32.
|
||||
|
||||
??_C@_05LABPAAN@b?$AA?$AA?$AA?$AA?$AA@
|
||||
; CHECK: u"b\0"
|
||||
|
||||
??_C@_0CC@MBPKDIAM@a?$AA?$AA?$AAb?$AA?$AA?$AAc?$AA?$AA?$AAd?$AA?$AA?$AAe?$AA?$AA?$AAf?$AA?$AA?$AAg?$AA?$AA?$AAh?$AA?$AA?$AA@
|
||||
; CHECK: u"a\0b\0c\0d\0e\0f\0g\0h\0"...
|
||||
|
|
Loading…
Reference in New Issue