From 1095419b10ff222c3cc362be27d97ff561a2181b Mon Sep 17 00:00:00 2001 From: River Riddle Date: Wed, 28 Oct 2020 16:46:25 -0700 Subject: [PATCH] [llvm][StringExtras] Add a fail-able version of `fromHex` This revision adds a fail-able/checked version of `fromHex` that fails when the input string contains a non-hex character. This removes the need for users to have a separate check for if the string contains all hex digits. This becomes very costly for large hex strings given that checking if a string contains only hex digits is effectively the same as just converting it in the first place. Context: In MLIR we use hex strings to represent very large constants in the textual format of the IR. These changes lead to a large decrease in compile time when parsing these constants (2 seconds -> 1 second). Differential Revision: https://reviews.llvm.org/D90265 --- llvm/include/llvm/ADT/StringExtras.h | 58 ++++++++++++++++++++----- llvm/unittests/ADT/StringExtrasTest.cpp | 4 ++ 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index 990a3054a9d2..6084a564b95d 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -165,34 +165,68 @@ inline std::string toHex(ArrayRef Input, bool LowerCase = false) { return toHex(toStringRef(Input), LowerCase); } -inline uint8_t hexFromNibbles(char MSB, char LSB) { +/// Store the binary representation of the two provided values, \p MSB and +/// \p LSB, that make up the nibbles of a hexadecimal digit. If \p MSB or \p LSB +/// do not correspond to proper nibbles of a hexadecimal digit, this method +/// returns false. Otherwise, returns true. +inline bool tryGetHexFromNibbles(char MSB, char LSB, uint8_t &Hex) { unsigned U1 = hexDigitValue(MSB); unsigned U2 = hexDigitValue(LSB); - assert(U1 != -1U && U2 != -1U); + if (U1 == -1U || U2 == -1U) + return false; - return static_cast((U1 << 4) | U2); + Hex = static_cast((U1 << 4) | U2); + return true; } -/// Convert hexadecimal string \p Input to its binary representation. -/// The return string is half the size of \p Input. -inline std::string fromHex(StringRef Input) { - if (Input.empty()) - return std::string(); +/// Return the binary representation of the two provided values, \p MSB and +/// \p LSB, that make up the nibbles of a hexadecimal digit. +inline uint8_t hexFromNibbles(char MSB, char LSB) { + uint8_t Hex = 0; + bool GotHex = tryGetHexFromNibbles(MSB, LSB, Hex); + (void)GotHex; + assert(GotHex && "MSB and/or LSB do not correspond to hex digits"); + return Hex; +} + +/// Convert hexadecimal string \p Input to its binary representation and store +/// the result in \p Output. Returns true if the binary representation could be +/// converted from the hexadecimal string. Returns false if \p Input contains +/// non-hexadecimal digits. The output string is half the size of \p Input. +inline bool tryGetFromHex(StringRef Input, std::string &Output) { + if (Input.empty()) + return true; - std::string Output; Output.reserve((Input.size() + 1) / 2); if (Input.size() % 2 == 1) { - Output.push_back(hexFromNibbles('0', Input.front())); + uint8_t Hex = 0; + if (!tryGetHexFromNibbles('0', Input.front(), Hex)) + return false; + + Output.push_back(Hex); Input = Input.drop_front(); } assert(Input.size() % 2 == 0); while (!Input.empty()) { - uint8_t Hex = hexFromNibbles(Input[0], Input[1]); + uint8_t Hex = 0; + if (!tryGetHexFromNibbles(Input[0], Input[1], Hex)) + return false; + Output.push_back(Hex); Input = Input.drop_front(2); } - return Output; + return true; +} + +/// Convert hexadecimal string \p Input to its binary representation. +/// The return string is half the size of \p Input. +inline std::string fromHex(StringRef Input) { + std::string Hex; + bool GotHex = tryGetFromHex(Input, Hex); + (void)GotHex; + assert(GotHex && "Input contains non hex digits"); + return Hex; } /// Convert the string \p S to an integer of the specified type using diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp index d17a172dbd9c..b785bb51844b 100644 --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -89,6 +89,10 @@ TEST(StringExtrasTest, ToAndFromHex) { EXPECT_EQ(EvenStr, toHex(EvenData)); EXPECT_EQ(EvenData, fromHex(EvenStr)); EXPECT_EQ(StringRef(EvenStr).lower(), toHex(EvenData, true)); + + std::string InvalidStr = "A5ZX"; + std::string IgnoredOutput; + EXPECT_FALSE(tryGetFromHex(InvalidStr, IgnoredOutput)); } TEST(StringExtrasTest, to_float) {