forked from OSchip/llvm-project
An upcoming patch to LLDB will require the ability to decode base64. This patch adds support for decoding base64 and adds tests.
Resubmission of https://reviews.llvm.org/D126254 with where decodeBase64Byte is no longer a lambda but a static function. Some compilers have different errors or warnings with respect to what needs to be captured and what doesn't (see comments in https://reviews.llvm.org/D126254 for details). Differential Revision: https://reviews.llvm.org/D128560
This commit is contained in:
parent
1877d76aa0
commit
ea9ac3519c
|
@ -13,8 +13,10 @@
|
|||
#ifndef LLVM_SUPPORT_BASE64_H
|
||||
#define LLVM_SUPPORT_BASE64_H
|
||||
|
||||
#include "llvm/Support/Error.h"
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -52,6 +54,8 @@ template <class InputBytes> std::string encodeBase64(InputBytes const &Bytes) {
|
|||
return Buffer;
|
||||
}
|
||||
|
||||
llvm::Error decodeBase64(llvm::StringRef Input, std::vector<char> &Output);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
//===- Base64.cpp ---------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define INVALID_BASE64_BYTE 64
|
||||
#include "llvm/Support/Base64.h"
|
||||
|
||||
static char decodeBase64Byte(uint8_t Ch) {
|
||||
constexpr char Inv = INVALID_BASE64_BYTE;
|
||||
static const char DecodeTable[] = {
|
||||
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
|
||||
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
|
||||
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
|
||||
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
|
||||
Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........
|
||||
Inv, Inv, Inv, 62, Inv, Inv, Inv, 63, // ...+.../
|
||||
52, 53, 54, 55, 56, 57, 58, 59, // 01234567
|
||||
60, 61, Inv, Inv, Inv, 0, Inv, Inv, // 89...=..
|
||||
Inv, 0, 1, 2, 3, 4, 5, 6, // .ABCDEFG
|
||||
7, 8, 9, 10, 11, 12, 13, 14, // HIJKLMNO
|
||||
15, 16, 17, 18, 19, 20, 21, 22, // PQRSTUVW
|
||||
23, 24, 25, Inv, Inv, Inv, Inv, Inv, // XYZ.....
|
||||
Inv, 26, 27, 28, 29, 30, 31, 32, // .abcdefg
|
||||
33, 34, 35, 36, 37, 38, 39, 40, // hijklmno
|
||||
41, 42, 43, 44, 45, 46, 47, 48, // pqrstuvw
|
||||
49, 50, 51 // xyz.....
|
||||
};
|
||||
if (Ch >= sizeof(DecodeTable))
|
||||
return Inv;
|
||||
return DecodeTable[Ch];
|
||||
}
|
||||
|
||||
llvm::Error llvm::decodeBase64(llvm::StringRef Input,
|
||||
std::vector<char> &Output) {
|
||||
constexpr char Base64InvalidByte = INVALID_BASE64_BYTE;
|
||||
// Invalid table value with short name to fit in the table init below. The
|
||||
// invalid value is 64 since valid base64 values are 0 - 63.
|
||||
Output.clear();
|
||||
const uint64_t InputLength = Input.size();
|
||||
if (InputLength == 0)
|
||||
return Error::success();
|
||||
// Make sure we have a valid input string length which must be a multiple
|
||||
// of 4.
|
||||
if ((InputLength % 4) != 0)
|
||||
return createStringError(std::errc::illegal_byte_sequence,
|
||||
"Base64 encoded strings must be a multiple of 4 "
|
||||
"bytes in length");
|
||||
const uint64_t FirstValidEqualIdx = InputLength - 2;
|
||||
char Hex64Bytes[4];
|
||||
for (uint64_t Idx = 0; Idx < InputLength; Idx += 4) {
|
||||
for (uint64_t ByteOffset = 0; ByteOffset < 4; ++ByteOffset) {
|
||||
const uint64_t ByteIdx = Idx + ByteOffset;
|
||||
const char Byte = Input[ByteIdx];
|
||||
const char DecodedByte = decodeBase64Byte(Byte);
|
||||
bool Illegal = DecodedByte == Base64InvalidByte;
|
||||
if (!Illegal && Byte == '=') {
|
||||
if (ByteIdx < FirstValidEqualIdx) {
|
||||
// We have an '=' in the middle of the string which is invalid, only
|
||||
// the last two characters can be '=' characters.
|
||||
Illegal = true;
|
||||
} else if (ByteIdx == FirstValidEqualIdx && Input[ByteIdx + 1] != '=') {
|
||||
// We have an equal second to last from the end and the last character
|
||||
// is not also an equal, so the '=' character is invalid
|
||||
Illegal = true;
|
||||
}
|
||||
}
|
||||
if (Illegal)
|
||||
return createStringError(
|
||||
std::errc::illegal_byte_sequence,
|
||||
"Invalid Base64 character %#2.2x at index %" PRIu64, Byte, ByteIdx);
|
||||
Hex64Bytes[ByteOffset] = DecodedByte;
|
||||
}
|
||||
// Now we have 6 bits of 3 bytes in value in each of the Hex64Bytes bytes.
|
||||
// Extract the right bytes into the Output buffer.
|
||||
Output.push_back((Hex64Bytes[0] << 2) + ((Hex64Bytes[1] >> 4) & 0x03));
|
||||
Output.push_back((Hex64Bytes[1] << 4) + ((Hex64Bytes[2] >> 2) & 0x0f));
|
||||
Output.push_back((Hex64Bytes[2] << 6) + (Hex64Bytes[3] & 0x3f));
|
||||
}
|
||||
// If we had valid trailing '=' characters strip the right number of bytes
|
||||
// from the end of the output buffer. We already know that the Input length
|
||||
// it a multiple of 4 and is not zero, so direct character access is safe.
|
||||
if (Input.back() == '=') {
|
||||
Output.pop_back();
|
||||
if (Input[InputLength - 2] == '=')
|
||||
Output.pop_back();
|
||||
}
|
||||
return Error::success();
|
||||
}
|
|
@ -125,6 +125,7 @@ add_llvm_component_library(LLVMSupport
|
|||
ARMWinEH.cpp
|
||||
Allocator.cpp
|
||||
AutoConvert.cpp
|
||||
Base64.cpp
|
||||
BinaryStreamError.cpp
|
||||
BinaryStreamReader.cpp
|
||||
BinaryStreamRef.cpp
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#include "llvm/Support/Base64.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Testing/Support/Error.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -24,6 +25,28 @@ void TestBase64(StringRef Input, StringRef Final) {
|
|||
EXPECT_EQ(Res, Final);
|
||||
}
|
||||
|
||||
void TestBase64Decode(StringRef Input, StringRef Expected,
|
||||
StringRef ExpectedErrorMessage = {}) {
|
||||
std::vector<char> DecodedBytes;
|
||||
if (ExpectedErrorMessage.empty()) {
|
||||
ASSERT_THAT_ERROR(decodeBase64(Input, DecodedBytes), Succeeded());
|
||||
EXPECT_EQ(llvm::ArrayRef<char>(DecodedBytes),
|
||||
llvm::ArrayRef<char>(Expected.data(), Expected.size()));
|
||||
} else {
|
||||
ASSERT_THAT_ERROR(decodeBase64(Input, DecodedBytes),
|
||||
FailedWithMessage(ExpectedErrorMessage));
|
||||
}
|
||||
}
|
||||
|
||||
char NonPrintableVector[] = {0x00, 0x00, 0x00, 0x46,
|
||||
0x00, 0x08, (char)0xff, (char)0xee};
|
||||
|
||||
char LargeVector[] = {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b,
|
||||
0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f,
|
||||
0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20, 0x6f,
|
||||
0x76, 0x65, 0x72, 0x20, 0x31, 0x33, 0x20, 0x6c, 0x61,
|
||||
0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67, 0x73, 0x2e};
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(Base64Test, Base64) {
|
||||
|
@ -37,16 +60,45 @@ TEST(Base64Test, Base64) {
|
|||
TestBase64("foobar", "Zm9vYmFy");
|
||||
|
||||
// With non-printable values.
|
||||
char NonPrintableVector[] = {0x00, 0x00, 0x00, 0x46,
|
||||
0x00, 0x08, (char)0xff, (char)0xee};
|
||||
TestBase64({NonPrintableVector, sizeof(NonPrintableVector)}, "AAAARgAI/+4=");
|
||||
|
||||
// Large test case
|
||||
char LargeVector[] = {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b,
|
||||
0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f,
|
||||
0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, 0x73, 0x20, 0x6f,
|
||||
0x76, 0x65, 0x72, 0x20, 0x31, 0x33, 0x20, 0x6c, 0x61,
|
||||
0x7a, 0x79, 0x20, 0x64, 0x6f, 0x67, 0x73, 0x2e};
|
||||
TestBase64({LargeVector, sizeof(LargeVector)},
|
||||
"VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIDEzIGxhenkgZG9ncy4=");
|
||||
}
|
||||
|
||||
TEST(Base64Test, DecodeBase64) {
|
||||
std::vector<llvm::StringRef> Outputs = {"", "f", "fo", "foo",
|
||||
"foob", "fooba", "foobar"};
|
||||
Outputs.push_back(
|
||||
llvm::StringRef(NonPrintableVector, sizeof(NonPrintableVector)));
|
||||
|
||||
Outputs.push_back(llvm::StringRef(LargeVector, sizeof(LargeVector)));
|
||||
// Make sure we can encode and decode any byte.
|
||||
std::vector<char> AllChars;
|
||||
for (int Ch = INT8_MIN; Ch <= INT8_MAX; ++Ch)
|
||||
AllChars.push_back(Ch);
|
||||
Outputs.push_back(llvm::StringRef(AllChars.data(), AllChars.size()));
|
||||
|
||||
for (const auto &Output : Outputs) {
|
||||
// We trust that encoding is working after running the Base64Test::Base64()
|
||||
// test function above, so we can use it to encode the string and verify we
|
||||
// can decode it correctly.
|
||||
auto Input = encodeBase64(Output);
|
||||
TestBase64Decode(Input, Output);
|
||||
}
|
||||
struct ErrorInfo {
|
||||
llvm::StringRef Input;
|
||||
llvm::StringRef ErrorMessage;
|
||||
};
|
||||
std::vector<ErrorInfo> ErrorInfos = {
|
||||
{"f", "Base64 encoded strings must be a multiple of 4 bytes in length"},
|
||||
{"=abc", "Invalid Base64 character 0x3d at index 0"},
|
||||
{"a=bc", "Invalid Base64 character 0x3d at index 1"},
|
||||
{"ab=c", "Invalid Base64 character 0x3d at index 2"},
|
||||
{"fun!", "Invalid Base64 character 0x21 at index 3"},
|
||||
};
|
||||
|
||||
for (const auto &EI : ErrorInfos)
|
||||
TestBase64Decode(EI.Input, "", EI.ErrorMessage);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue