[libc] fix strtof/d/ld NaN parsing

Fix the fact that previously strtof/d/ld would only accept a NaN as
having parentheses if the thing in the parentheses was a valid number,
now it will accept any combination of letters and numbers, but will only
put valid numbers in the mantissa.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D113790
This commit is contained in:
Michael Jones 2021-11-12 11:53:44 -08:00
parent 3020608b61
commit 47d0c83e1f
2 changed files with 55 additions and 12 deletions

View File

@ -775,16 +775,30 @@ static inline T strtofloatingpoint(const char *__restrict src,
seenDigit = true; seenDigit = true;
src += 3; src += 3;
BitsType NaNMantissa = 0; BitsType NaNMantissa = 0;
// this handles the case of `NaN(n-character-sequence)`, where the
// n-character-sequence is made of 0 or more letters and numbers in any
// order.
if (*src == '(') { if (*src == '(') {
char *tempSrc = 0; const char *leftParen = src;
if (isdigit(*(src + 1)) || *(src + 1) == ')') { ++src;
NaNMantissa = strtointeger<BitsType>(src + 1, &tempSrc, 0); while (isalnum(*src))
if (*tempSrc != ')') { ++src;
NaNMantissa = 0; if (*src == ')') {
} else { ++src;
src = tempSrc + 1; char *tempSrc = 0;
if (isdigit(*(leftParen + 1))) {
// This is to prevent errors when BitsType is larger than 64 bits,
// since strtointeger only supports up to 64 bits. This is actually
// more than is required by the specification, which says for the
// input type "NAN(n-char-sequence)" that "the meaning of
// the n-char sequence is implementation-defined."
NaNMantissa = static_cast<BitsType>(
strtointeger<uint64_t>(leftParen + 1, &tempSrc, 0));
if (*tempSrc != ')')
NaNMantissa = 0;
} }
} } else
src = leftParen;
} }
NaNMantissa |= fputil::FloatProperties<T>::quietNaNMask; NaNMantissa |= fputil::FloatProperties<T>::quietNaNMask;
if (result.getSign()) { if (result.getSign()) {

View File

@ -165,10 +165,39 @@ TEST_F(LlvmLibcStrToFTest, InfTests) {
runTest("-iNfInItY", 9, 0xff800000); runTest("-iNfInItY", 9, 0xff800000);
} }
TEST_F(LlvmLibcStrToFTest, NaNTests) { TEST_F(LlvmLibcStrToFTest, SimpleNaNTests) {
runTest("NaN", 3, 0x7fc00000); runTest("NaN", 3, 0x7fc00000);
runTest("-nAn", 4, 0xffc00000); runTest("-nAn", 4, 0xffc00000);
runTest("NaN()", 5, 0x7fc00000); }
runTest("NaN(1234)", 9, 0x7fc004d2);
runTest("NaN( 1234)", 3, 0x7fc00000); // These NaNs are of the form `NaN(n-character-sequence)` where the
// n-character-sequence is 0 or more letters or numbers. If there is anything
// other than a letter or a number, then the valid number is just `NaN`. If
// the sequence is valid, then the interpretation of them is implementation
// defined, in this case it's passed to strtoll with an automatic base, and
// the result is put into the mantissa if it takes up the whole width of the
// parentheses.
TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesEmptyTest) {
runTest("NaN()", 5, 0x7fc00000);
}
TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidNumberTests) {
runTest("NaN(1234)", 9, 0x7fc004d2);
runTest("NaN(0x1234)", 11, 0x7fc01234);
runTest("NaN(01234)", 10, 0x7fc0029c);
}
TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesInvalidSequenceTests) {
runTest("NaN( 1234)", 3, 0x7fc00000);
runTest("NaN(-1234)", 3, 0x7fc00000);
runTest("NaN(asd&f)", 3, 0x7fc00000);
runTest("NaN(123 )", 3, 0x7fc00000);
runTest("NaN(123+asdf)", 3, 0x7fc00000);
runTest("NaN(123", 3, 0x7fc00000);
}
TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidSequenceInvalidNumberTests) {
runTest("NaN(1a)", 7, 0x7fc00000);
runTest("NaN(asdf)", 9, 0x7fc00000);
runTest("NaN(1A1)", 8, 0x7fc00000);
} }