[llvm][StringExtras] Add a fail-able version of `fromHex`

This revision adds a fail-able/checked version of `fromHex` that fails when the input string contains a non-hex character. This removes the need for users to have a separate check for if the string contains all hex digits. This becomes very costly for large hex strings given that checking if a string contains only hex digits is effectively the same as just converting it in the first place.

Context: In MLIR we use hex strings to represent very large constants in the textual format of the IR. These changes lead to a large decrease in compile time when parsing these constants (2 seconds -> 1 second).

Differential Revision: https://reviews.llvm.org/D90265
This commit is contained in:
River Riddle 2020-10-28 16:46:25 -07:00
parent 49c84fd5a4
commit 1095419b10
2 changed files with 50 additions and 12 deletions

View File

@ -165,34 +165,68 @@ inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) {
return toHex(toStringRef(Input), LowerCase);
}
inline uint8_t hexFromNibbles(char MSB, char LSB) {
/// Store the binary representation of the two provided values, \p MSB and
/// \p LSB, that make up the nibbles of a hexadecimal digit. If \p MSB or \p LSB
/// do not correspond to proper nibbles of a hexadecimal digit, this method
/// returns false. Otherwise, returns true.
inline bool tryGetHexFromNibbles(char MSB, char LSB, uint8_t &Hex) {
unsigned U1 = hexDigitValue(MSB);
unsigned U2 = hexDigitValue(LSB);
assert(U1 != -1U && U2 != -1U);
if (U1 == -1U || U2 == -1U)
return false;
return static_cast<uint8_t>((U1 << 4) | U2);
Hex = static_cast<uint8_t>((U1 << 4) | U2);
return true;
}
/// Convert hexadecimal string \p Input to its binary representation.
/// The return string is half the size of \p Input.
inline std::string fromHex(StringRef Input) {
if (Input.empty())
return std::string();
/// Return the binary representation of the two provided values, \p MSB and
/// \p LSB, that make up the nibbles of a hexadecimal digit.
inline uint8_t hexFromNibbles(char MSB, char LSB) {
uint8_t Hex = 0;
bool GotHex = tryGetHexFromNibbles(MSB, LSB, Hex);
(void)GotHex;
assert(GotHex && "MSB and/or LSB do not correspond to hex digits");
return Hex;
}
/// Convert hexadecimal string \p Input to its binary representation and store
/// the result in \p Output. Returns true if the binary representation could be
/// converted from the hexadecimal string. Returns false if \p Input contains
/// non-hexadecimal digits. The output string is half the size of \p Input.
inline bool tryGetFromHex(StringRef Input, std::string &Output) {
if (Input.empty())
return true;
std::string Output;
Output.reserve((Input.size() + 1) / 2);
if (Input.size() % 2 == 1) {
Output.push_back(hexFromNibbles('0', Input.front()));
uint8_t Hex = 0;
if (!tryGetHexFromNibbles('0', Input.front(), Hex))
return false;
Output.push_back(Hex);
Input = Input.drop_front();
}
assert(Input.size() % 2 == 0);
while (!Input.empty()) {
uint8_t Hex = hexFromNibbles(Input[0], Input[1]);
uint8_t Hex = 0;
if (!tryGetHexFromNibbles(Input[0], Input[1], Hex))
return false;
Output.push_back(Hex);
Input = Input.drop_front(2);
}
return Output;
return true;
}
/// Convert hexadecimal string \p Input to its binary representation.
/// The return string is half the size of \p Input.
inline std::string fromHex(StringRef Input) {
std::string Hex;
bool GotHex = tryGetFromHex(Input, Hex);
(void)GotHex;
assert(GotHex && "Input contains non hex digits");
return Hex;
}
/// Convert the string \p S to an integer of the specified type using

View File

@ -89,6 +89,10 @@ TEST(StringExtrasTest, ToAndFromHex) {
EXPECT_EQ(EvenStr, toHex(EvenData));
EXPECT_EQ(EvenData, fromHex(EvenStr));
EXPECT_EQ(StringRef(EvenStr).lower(), toHex(EvenData, true));
std::string InvalidStr = "A5ZX";
std::string IgnoredOutput;
EXPECT_FALSE(tryGetFromHex(InvalidStr, IgnoredOutput));
}
TEST(StringExtrasTest, to_float) {