Extend url support for azure blob backup (#7776)

* Change to old url format azure://<endpoint>/<account>/<container>/

* Remove unnecessary dependency

* Add cmake_minimum_required  for azure backup cmake file

* Add the comments for new supported urls

* Add the host name resolution logic

* format files

* refactor part of the code

* Add documentation about Azure backup testing

* Add more comments, update the doc
This commit is contained in:
Chaoguang Lin 2022-08-09 01:37:24 -07:00 committed by GitHub
parent a907c444a0
commit 15523ff2e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 122 additions and 17 deletions

View File

@ -288,11 +288,44 @@ Reference<IBackupContainer> IBackupContainer::openContainer(const std::string& u
#ifdef BUILD_AZURE_BACKUP
else if (u.startsWith("azure://"_sr)) {
u.eat("azure://"_sr);
auto accountName = u.eat("@"_sr).toString();
auto endpoint = u.eat("/"_sr).toString();
auto containerName = u.eat("/"_sr).toString();
r = makeReference<BackupContainerAzureBlobStore>(
endpoint, accountName, containerName, encryptionKeyFileName);
auto address = u.eat("/"_sr);
if (address.endsWith(std::string(azure::storage_lite::constants::default_endpoint_suffix))) {
// <account>.<service>.core.windows.net/<resource_path>
auto endPoint = address.toString();
auto accountName = address.eat("."_sr).toString();
auto containerName = u.eat("/"_sr).toString();
r = makeReference<BackupContainerAzureBlobStore>(
endPoint, accountName, containerName, encryptionKeyFileName);
} else {
// resolve the network address if necessary
std::string endpoint(address.toString());
Optional<NetworkAddress> parsedAddress = NetworkAddress::parseOptional(endpoint);
if (!parsedAddress.present()) {
try {
auto hostname = Hostname::parse(endpoint);
auto resolvedAddress = hostname.resolveBlocking();
if (resolvedAddress.present()) {
parsedAddress = resolvedAddress.get();
}
} catch (Error& e) {
TraceEvent(SevError, "InvalidAzureBackupUrl").error(e).detail("Endpoint", endpoint);
throw backup_invalid_url();
}
}
if (!parsedAddress.present()) {
TraceEvent(SevError, "InvalidAzureBackupUrl").detail("Endpoint", endpoint);
throw backup_invalid_url();
}
auto accountName = u.eat("/"_sr).toString();
// Avoid including ":tls" and "(fromHostname)"
// note: the endpoint needs to contain the account name
// so either "<account_name>.blob.core.windows.net" or "<ip>:<port>/<account_name>"
endpoint =
fmt::format("{}/{}", formatIpPort(parsedAddress.get().ip, parsedAddress.get().port), accountName);
auto containerName = u.eat("/"_sr).toString();
r = makeReference<BackupContainerAzureBlobStore>(
endpoint, accountName, containerName, encryptionKeyFileName);
}
}
#endif
else {

View File

@ -1523,11 +1523,44 @@ Reference<BackupContainerFileSystem> BackupContainerFileSystem::openContainerFS(
#ifdef BUILD_AZURE_BACKUP
else if (u.startsWith("azure://"_sr)) {
u.eat("azure://"_sr);
auto accountName = u.eat("@"_sr).toString();
auto endpoint = u.eat("/"_sr).toString();
auto containerName = u.eat("/"_sr).toString();
r = makeReference<BackupContainerAzureBlobStore>(
endpoint, accountName, containerName, encryptionKeyFileName);
auto address = u.eat("/"_sr);
if (address.endsWith(std::string(azure::storage_lite::constants::default_endpoint_suffix))) {
// <account>.<service>.core.windows.net/<resource_path>
auto endPoint = address.toString();
auto accountName = address.eat("."_sr).toString();
auto containerName = u.eat("/"_sr).toString();
r = makeReference<BackupContainerAzureBlobStore>(
endPoint, accountName, containerName, encryptionKeyFileName);
} else {
// resolve the network address if necessary
std::string endpoint(address.toString());
Optional<NetworkAddress> parsedAddress = NetworkAddress::parseOptional(endpoint);
if (!parsedAddress.present()) {
try {
auto hostname = Hostname::parse(endpoint);
auto resolvedAddress = hostname.resolveBlocking();
if (resolvedAddress.present()) {
parsedAddress = resolvedAddress.get();
}
} catch (Error& e) {
TraceEvent(SevError, "InvalidAzureBackupUrl").error(e).detail("Endpoint", endpoint);
throw backup_invalid_url();
}
}
if (!parsedAddress.present()) {
TraceEvent(SevError, "InvalidAzureBackupUrl").detail("Endpoint", endpoint);
throw backup_invalid_url();
}
auto accountName = u.eat("/"_sr).toString();
// Avoid including ":tls" and "(fromHostname)"
// note: the endpoint needs to contain the account name
// so either "<account_name>.blob.core.windows.net" or "<ip>:<port>/<account_name>"
endpoint =
fmt::format("{}/{}", formatIpPort(parsedAddress.get().ip, parsedAddress.get().port), accountName);
auto containerName = u.eat("/"_sr).toString();
r = makeReference<BackupContainerAzureBlobStore>(
endpoint, accountName, containerName, encryptionKeyFileName);
}
}
#endif
else {

View File

@ -90,8 +90,8 @@ add_flow_target(LINK_TEST NAME fdbclientlinktest SRCS LinkTest.cpp)
target_link_libraries(fdbclientlinktest PRIVATE fdbclient rapidxml) # re-link rapidxml due to private link interface
if(BUILD_AZURE_BACKUP)
target_link_libraries(fdbclient PRIVATE curl uuid azure-storage-lite)
target_link_libraries(fdbclient_sampling PRIVATE curl uuid azure-storage-lite)
target_link_libraries(fdbclient PRIVATE curl azure-storage-lite)
target_link_libraries(fdbclient_sampling PRIVATE curl azure-storage-lite)
endif()
if(BUILD_AWS_BACKUP)

View File

@ -29,7 +29,7 @@ namespace {
std::string const notFoundErrorCode = "404";
void printAzureError(std::string const& operationName, azure::storage_lite::storage_error const& err) {
printf("(%s) : Error from Azure SDK : %s (%s) : %s",
printf("(%s) : Error from Azure SDK : %s (%s) : %s\n",
operationName.c_str(),
err.code_name.c_str(),
err.code.c_str(),
@ -109,9 +109,9 @@ public:
class WriteFile final : public IAsyncFile, ReferenceCounted<WriteFile> {
AsyncTaskThread* asyncTaskThread;
std::shared_ptr<AzureClient> client;
std::string containerName;
std::string blobName;
std::shared_ptr<AzureClient> client;
int64_t m_cursor{ 0 };
// Ideally this buffer should not be a string, but
// the Azure SDK only supports/tests uploading to append
@ -318,7 +318,7 @@ BackupContainerAzureBlobStore::BackupContainerAzureBlobStore(const std::string&
std::string accountKey = _accountKey;
auto credential = std::make_shared<azure::storage_lite::shared_key_credential>(accountName, accountKey);
auto storageAccount = std::make_shared<azure::storage_lite::storage_account>(
accountName, credential, true, format("https://%s", endpoint.c_str()));
accountName, credential, true, fmt::format("https://{}", endpoint));
client = std::make_unique<AzureClient>(storageAccount, 1);
}
@ -342,6 +342,7 @@ Future<Void> BackupContainerAzureBlobStore::create() {
Future<Void> encryptionSetupFuture = usesEncryption() ? encryptionSetupComplete() : Void();
return createContainerFuture && encryptionSetupFuture;
}
Future<bool> BackupContainerAzureBlobStore::exists() {
TraceEvent(SevDebug, "BCAzureBlobStoreCheckContainerExists").detail("ContainerName", containerName);
return asyncTaskThread.execAsync([containerName = this->containerName, client = this->client] {

View File

@ -0,0 +1,33 @@
# Set up the Azure Backup Testing Environment
Make sure we built FDB with `-DBUILD_AZURE_BACKUP=ON`
# Test
If you run _BackupToBlob_ and _RestoreFromBlob_ workloads with the paramter _backupURL_ starts with `azure://`,
the workload will backup to and restore from the azure blob storage.
For example, _BackupAzureBlobCorrectness.toml_
## Url format
The code now supports the following style urls:
- `azure://<account_name>.blob.core.windows.net/<container_name>` (The formal url format for the blob service provided by the azure storage account)
- `azure://<ip|hostname>:<port>/<account_name>/<container_name>` (Directly providing the endpoint address for the blob service, usually for local testing)
## Local test environment
We need to use the _Azurite_ to simulate an Azure blob service locally.
Please follow the [turtorial](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=docker-hub) to start your service locally.
For example,
```
docker run -p 10000:10000 -v `pwd`:<path> -w <path> mcr.microsoft.com/azure-storage/azurite azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --oauth basic --cert ./<...>.pem --key ./<...>.key.pem --debug ./<log_file_path>
```
### Notice
- To use uses _https_, we need to provide the certificates via `--cert` and `--key`
The detailed [turtorial](https://github.com/Azure/Azurite/blob/main/README.md#https-setup) to setup HTTPS. (We tested with the `mkcert` method)
- To use Azure SDKs, we need to pass `--oauth basic` option
- Please take a look at the [difference](https://github.com/Azure/Azurite/blob/main/README.md#differences-between-azurite-and-azure-storage) between Azurite and Azure Storage

View File

@ -1,3 +1,5 @@
cmake_minimum_required(VERSION 3.13)
project(azurestorage-download)
include(ExternalProject)

View File

@ -25,6 +25,7 @@
#include "fdbclient/AsyncTaskThread.h"
#include "fdbclient/BackupContainerFileSystem.h"
#include "constants.h"
#include "storage_credential.h"
#include "storage_account.h"
#include "blob/blob_client.h"

View File

@ -39,7 +39,9 @@ testTitle = 'Backup'
testName = 'BackupToBlob'
backupAfter = 0.0
backupTag = 'default'
backupURL = 'azure://0.0.0.0:10000/devstoreaccount1/test_container/'
# azure storage account style format: azure://<account_name>.blob.core.windows.net/<container_name>
# general ip-port style format: azure://<IP|Hostname>:<Port>/<account_name>/<container_name>
backupURL = 'azure://0.0.0.0:10000/devstoreaccount1/testcontainer'
[[test.workload]]
testName = 'RandomClogging'
@ -72,7 +74,7 @@ clearAfterTest = 'false'
testName = 'RestoreFromBlob'
restoreAfter = 0.0
backupTag = 'default'
backupURL = 'azure://0.0.0.0:10000/devstoreaccount1/test_container/'
backupURL = 'azure://0.0.0.0:10000/devstoreaccount1/testcontainer'
[[test.workload]]
testName = 'RandomClogging'