From 8eeaf5178dfda82b51766ea106febd7f563bc08f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sat, 20 Apr 2019 23:59:06 +0000 Subject: [PATCH] llvm-undname: Improve string literal demangling with embedded \0 chars - Don't assert when a string looks like a u32 string to the heuristic but doesn't have a length that's 0 mod 4. Instead, classify those as u16 with embedded \0 chars. Found by oss-fuzz. - Print embedded nul bytes as \0 instead of \x00. llvm-svn: 358835 --- llvm/lib/Demangle/MicrosoftDemangle.cpp | 7 +++++-- llvm/test/Demangle/ms-string-literals.test | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index ebe2ef5de09d..6431e4ab130b 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -1088,6 +1088,9 @@ static void outputHex(OutputStream &OS, unsigned C) { static void outputEscapedChar(OutputStream &OS, unsigned C) { switch (C) { + case '\0': // nul + OS << "\\0"; + return; case '\'': // single quote OS << "\\\'"; return; @@ -1165,7 +1168,7 @@ static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, // 2-byte, or 4-byte null terminator. if (NumBytes < 32) { unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars); - if (TrailingNulls >= 4) + if (TrailingNulls >= 4 && NumBytes % 4 == 0) return 4; if (TrailingNulls >= 2) return 2; @@ -1179,7 +1182,7 @@ static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars, // perfect and is biased towards languages that have ascii alphabets, but this // was always going to be best effort since the encoding is lossy. unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars); - if (Nulls >= 2 * NumChars / 3) + if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0) return 4; if (Nulls >= NumChars / 3) return 2; diff --git a/llvm/test/Demangle/ms-string-literals.test b/llvm/test/Demangle/ms-string-literals.test index 1e634099124b..2fe3384fe748 100644 --- a/llvm/test/Demangle/ms-string-literals.test +++ b/llvm/test/Demangle/ms-string-literals.test @@ -771,3 +771,13 @@ ??_C@_0CG@HJGBPLNO@l?$AAo?$AAo?$AAk?$AAA?$AAh?$AAe?$AAa?$AAd?$AAH?$AAa?$AAr?$AAd?$AAB?$AAr?$AAe?$AA@ ; CHECK: u"lookAheadHardBre"... + + +; These are u16 strings that the diagnostic would classify as u32 -- except +; that their byte length % 4 is 2, so they can't be u32. + +??_C@_05LABPAAN@b?$AA?$AA?$AA?$AA?$AA@ +; CHECK: u"b\0" + +??_C@_0CC@MBPKDIAM@a?$AA?$AA?$AAb?$AA?$AA?$AAc?$AA?$AA?$AAd?$AA?$AA?$AAe?$AA?$AA?$AAf?$AA?$AA?$AAg?$AA?$AA?$AAh?$AA?$AA?$AA@ +; CHECK: u"a\0b\0c\0d\0e\0f\0g\0h\0"...