2017-05-26 04:48:44 +08:00
/*
* BackupContainer . actor . cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013 - 2018 Apple Inc . and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* http : //www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
2018-10-20 01:30:13 +08:00
# include "fdbclient/BackupContainer.h"
2019-03-07 14:34:25 +08:00
# include "fdbclient/BackupAgent.actor.h"
# include "fdbclient/JsonBuilder.h"
2017-05-26 04:48:44 +08:00
# include "flow/Trace.h"
# include "flow/UnitTest.h"
# include "flow/Hash3.h"
# include "fdbrpc/AsyncFileReadAhead.actor.h"
2017-11-15 15:33:17 +08:00
# include "fdbrpc/Platform.h"
2018-10-27 04:49:42 +08:00
# include "fdbclient/AsyncFileBlobStore.actor.h"
2017-05-26 04:48:44 +08:00
# include "fdbclient/Status.h"
2018-01-17 20:09:43 +08:00
# include "fdbclient/SystemData.h"
# include "fdbclient/ReadYourWrites.h"
# include "fdbclient/KeyBackedTypes.h"
# include "fdbclient/RunTransaction.actor.h"
2017-05-26 04:48:44 +08:00
# include <algorithm>
2018-01-17 20:09:43 +08:00
# include <time.h>
2019-02-18 06:55:47 +08:00
# include "flow/actorcompiler.h" // has to be last include
2017-05-26 04:48:44 +08:00
2017-11-16 05:33:09 +08:00
namespace IBackupFile_impl {
2018-01-17 20:09:43 +08:00
ACTOR Future < Void > appendStringRefWithLen ( Reference < IBackupFile > file , Standalone < StringRef > s ) {
2017-11-16 05:33:09 +08:00
state uint32_t lenBuf = bigEndian32 ( ( uint32_t ) s . size ( ) ) ;
2018-08-11 04:57:10 +08:00
wait ( file - > append ( & lenBuf , sizeof ( lenBuf ) ) ) ;
wait ( file - > append ( s . begin ( ) , s . size ( ) ) ) ;
2017-11-16 05:33:09 +08:00
return Void ( ) ;
}
}
2018-01-17 20:09:43 +08:00
Future < Void > IBackupFile : : appendStringRefWithLen ( Standalone < StringRef > s ) {
return IBackupFile_impl : : appendStringRefWithLen ( Reference < IBackupFile > : : addRef ( this ) , s ) ;
2017-11-16 05:33:09 +08:00
}
2018-12-20 16:23:26 +08:00
std : : string IBackupContainer : : ExpireProgress : : toString ( ) const {
std : : string s = step + " ... " ;
if ( total > 0 ) {
s + = format ( " %d/%d (%.2f%%) " , done , total , double ( done ) / total * 100 ) ;
}
return s ;
}
2018-12-22 14:42:29 +08:00
void BackupFileList : : toStream ( FILE * fout ) const {
for ( const RangeFile & f : ranges ) {
fprintf ( fout , " range %lld %s \n " , f . fileSize , f . fileName . c_str ( ) ) ;
}
for ( const LogFile & f : logs ) {
fprintf ( fout , " log %lld %s \n " , f . fileSize , f . fileName . c_str ( ) ) ;
}
for ( const KeyspaceSnapshotFile & f : snapshots ) {
fprintf ( fout , " snapshotManifest %lld %s \n " , f . totalSize , f . fileName . c_str ( ) ) ;
}
}
2018-01-17 20:09:43 +08:00
Future < Void > fetchTimes ( Reference < ReadYourWritesTransaction > tr , std : : map < Version , int64_t > * pVersionTimeMap ) {
std : : vector < Future < Void > > futures ;
// Resolve each version in the map,
for ( auto & p : * pVersionTimeMap ) {
2018-01-23 15:57:01 +08:00
futures . push_back ( map ( timeKeeperEpochsFromVersion ( p . first , tr ) , [ = ] ( Optional < int64_t > t ) {
2018-01-17 20:09:43 +08:00
if ( t . present ( ) )
pVersionTimeMap - > at ( p . first ) = t . get ( ) ;
else
pVersionTimeMap - > erase ( p . first ) ;
return Void ( ) ;
} ) ) ;
}
return waitForAll ( futures ) ;
}
Future < Void > BackupDescription : : resolveVersionTimes ( Database cx ) {
// Populate map with versions needed
versionTimeMap . clear ( ) ;
for ( const KeyspaceSnapshotFile & m : snapshots ) {
versionTimeMap [ m . beginVersion ] ;
versionTimeMap [ m . endVersion ] ;
}
if ( minLogBegin . present ( ) )
versionTimeMap [ minLogBegin . get ( ) ] ;
if ( maxLogEnd . present ( ) )
versionTimeMap [ maxLogEnd . get ( ) ] ;
if ( contiguousLogEnd . present ( ) )
versionTimeMap [ contiguousLogEnd . get ( ) ] ;
if ( minRestorableVersion . present ( ) )
versionTimeMap [ minRestorableVersion . get ( ) ] ;
if ( maxRestorableVersion . present ( ) )
versionTimeMap [ maxRestorableVersion . get ( ) ] ;
return runRYWTransaction ( cx , [ = ] ( Reference < ReadYourWritesTransaction > tr ) { return fetchTimes ( tr , & versionTimeMap ) ; } ) ;
} ;
2017-11-15 15:33:17 +08:00
std : : string BackupDescription : : toString ( ) const {
std : : string info ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
info . append ( format ( " URL: %s \n " , url . c_str ( ) ) ) ;
info . append ( format ( " Restorable: %s \n " , maxRestorableVersion . present ( ) ? " true " : " false " ) ) ;
2018-01-17 20:09:43 +08:00
auto formatVersion = [ & ] ( Version v ) {
std : : string s ;
if ( ! versionTimeMap . empty ( ) ) {
auto i = versionTimeMap . find ( v ) ;
if ( i ! = versionTimeMap . end ( ) )
2019-03-07 14:34:25 +08:00
s = format ( " %lld (%s) " , v , BackupAgentBase : : formatTime ( i - > second ) . c_str ( ) ) ;
2018-01-17 20:09:43 +08:00
else
s = format ( " %lld (unknown) " , v ) ;
}
2018-12-20 08:53:39 +08:00
else if ( maxLogEnd . present ( ) ) {
double days = double ( maxLogEnd . get ( ) - v ) / ( CLIENT_KNOBS - > CORE_VERSIONSPERSECOND * 24 * 60 * 60 ) ;
s = format ( " %lld (maxLogEnd %s%.2f days) " , v , days < 0 ? " + " : " - " , days ) ;
}
2018-01-17 20:09:43 +08:00
else {
s = format ( " %lld " , v ) ;
}
return s ;
} ;
for ( const KeyspaceSnapshotFile & m : snapshots ) {
2019-03-07 06:14:06 +08:00
info . append ( format ( " Snapshot: startVersion=%s endVersion=%s totalBytes=%lld restorable=%s expiredPct=%.2f \n " ,
formatVersion ( m . beginVersion ) . c_str ( ) , formatVersion ( m . endVersion ) . c_str ( ) , m . totalSize , m . restorable . orDefault ( false ) ? " true " : " false " , m . expiredPct ( expiredEndVersion ) ) ) ;
2018-01-17 20:09:43 +08:00
}
2018-01-03 15:22:35 +08:00
info . append ( format ( " SnapshotBytes: %lld \n " , snapshotBytes ) ) ;
2017-11-15 15:33:17 +08:00
2018-12-16 16:18:13 +08:00
if ( expiredEndVersion . present ( ) )
info . append ( format ( " ExpiredEndVersion: %s \n " , formatVersion ( expiredEndVersion . get ( ) ) . c_str ( ) ) ) ;
if ( unreliableEndVersion . present ( ) )
info . append ( format ( " UnreliableEndVersion: %s \n " , formatVersion ( unreliableEndVersion . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( minLogBegin . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " MinLogBeginVersion: %s \n " , formatVersion ( minLogBegin . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( contiguousLogEnd . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " ContiguousLogEndVersion: %s \n " , formatVersion ( contiguousLogEnd . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( maxLogEnd . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " MaxLogEndVersion: %s \n " , formatVersion ( maxLogEnd . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( minRestorableVersion . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " MinRestorableVersion: %s \n " , formatVersion ( minRestorableVersion . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( maxRestorableVersion . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " MaxRestorableVersion: %s \n " , formatVersion ( maxRestorableVersion . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( ! extendedDetail . empty ( ) )
info . append ( " ExtendedDetail: " ) . append ( extendedDetail ) ;
return info ;
}
2019-03-07 06:14:06 +08:00
std : : string BackupDescription : : toJSON ( ) const {
JsonBuilderObject doc ;
doc . setKey ( " URL " , url . c_str ( ) ) ;
doc . setKey ( " Restorable " , maxRestorableVersion . present ( ) ) ;
auto formatVersion = [ & ] ( Version v ) {
JsonBuilderObject doc ;
doc . setKey ( " Version " , v ) ;
if ( ! versionTimeMap . empty ( ) ) {
auto i = versionTimeMap . find ( v ) ;
2019-03-07 14:34:25 +08:00
if ( i ! = versionTimeMap . end ( ) ) {
doc . setKey ( " Timestamp " , BackupAgentBase : : formatTime ( i - > second ) ) ;
doc . setKey ( " Epochs " , i - > second ) ;
}
2019-03-07 06:14:06 +08:00
}
else if ( maxLogEnd . present ( ) ) {
double days = double ( v - maxLogEnd . get ( ) ) / ( CLIENT_KNOBS - > CORE_VERSIONSPERSECOND * 24 * 60 * 60 ) ;
doc . setKey ( " RelativeDays " , days ) ;
}
return doc ;
} ;
JsonBuilderArray snapshotsArray ;
for ( const KeyspaceSnapshotFile & m : snapshots ) {
JsonBuilderObject snapshotDoc ;
2019-03-07 14:34:25 +08:00
snapshotDoc . setKey ( " Start " , formatVersion ( m . beginVersion ) ) ;
snapshotDoc . setKey ( " End " , formatVersion ( m . endVersion ) ) ;
2019-03-07 06:14:06 +08:00
snapshotDoc . setKey ( " Restorable " , m . restorable . orDefault ( false ) ) ;
snapshotDoc . setKey ( " TotalBytes " , m . totalSize ) ;
snapshotDoc . setKey ( " PercentageExpired " , m . expiredPct ( expiredEndVersion ) ) ;
snapshotsArray . push_back ( snapshotDoc ) ;
}
doc . setKey ( " Snapshots " , snapshotsArray ) ;
doc . setKey ( " TotalSnapshotBytes " , snapshotBytes ) ;
if ( expiredEndVersion . present ( ) )
2019-03-07 14:34:25 +08:00
doc . setKey ( " ExpiredEnd " , formatVersion ( expiredEndVersion . get ( ) ) ) ;
2019-03-07 06:14:06 +08:00
if ( unreliableEndVersion . present ( ) )
2019-03-07 14:34:25 +08:00
doc . setKey ( " UnreliableEnd " , formatVersion ( unreliableEndVersion . get ( ) ) ) ;
2019-03-07 06:14:06 +08:00
if ( minLogBegin . present ( ) )
2019-03-07 14:34:25 +08:00
doc . setKey ( " MinLogBegin " , formatVersion ( minLogBegin . get ( ) ) ) ;
2019-03-07 06:14:06 +08:00
if ( contiguousLogEnd . present ( ) )
2019-03-07 14:34:25 +08:00
doc . setKey ( " ContiguousLogEnd " , formatVersion ( contiguousLogEnd . get ( ) ) ) ;
2019-03-07 06:14:06 +08:00
if ( maxLogEnd . present ( ) )
2019-03-07 14:34:25 +08:00
doc . setKey ( " MaxLogEnd " , formatVersion ( maxLogEnd . get ( ) ) ) ;
2019-03-07 06:14:06 +08:00
if ( minRestorableVersion . present ( ) )
2019-03-07 14:34:25 +08:00
doc . setKey ( " MinRestorablePoint " , formatVersion ( minRestorableVersion . get ( ) ) ) ;
2019-03-07 06:14:06 +08:00
if ( maxRestorableVersion . present ( ) )
2019-03-07 14:34:25 +08:00
doc . setKey ( " MaxRestorablePoint " , formatVersion ( maxRestorableVersion . get ( ) ) ) ;
2019-03-07 06:14:06 +08:00
if ( ! extendedDetail . empty ( ) )
doc . setKey ( " ExtendedDetail " , extendedDetail ) ;
return doc . getJson ( ) ;
}
2017-11-15 15:33:17 +08:00
/* BackupContainerFileSystem implements a backup container which stores files in a nested folder structure.
* Inheritors must only defined methods for writing , reading , deleting , sizing , and listing files .
*
2018-11-23 21:23:56 +08:00
* Snapshot manifests ( a complete set of files constituting a database snapshot for the backup ' s target ranges )
* are stored as JSON files at paths like
* / snapshots / snapshot , minVersion , maxVersion , totalBytes
*
* Key range files for snapshots are stored at paths like
* / kvranges / snapshot , startVersion / N / range , version , uid , blockSize
* where startVersion is the version at which the backup snapshot execution began and N is a number
* that is increased as key range files are generated over time ( at varying rates ) such that there
* are around 5 , 000 key range files in each folder .
2017-11-15 15:33:17 +08:00
*
2018-11-23 21:23:56 +08:00
* Note that startVersion will NOT correspond to the minVersion of a snapshot manifest because
* snapshot manifest min / max versions are based on the actual contained data and the first data
* file written will be after the start version of the snapshot ' s execution .
*
* Log files are at file paths like
* / logs / . . . / log , startVersion , endVersion , blockSize
* where . . . is a multi level path which sorts lexically into version order and results in approximately 1
* unique folder per day containing about 5 , 000 files .
*
* BACKWARD COMPATIBILITY
*
* Prior to FDB version 6.0 .16 , key range files were stored using a different folder scheme . Newer versions
* still support this scheme for all restore and backup management operations but key range files generated
* by backup using version 6.0 .16 or later use the scheme describe above .
*
* The old format stored key range files at paths like
* / ranges / . . . / range , version , uid , blockSize
* where . . . is a multi level path with sorts lexically into version order and results in up to approximately
* 900 unique folders per day . The number of files per folder depends on the configured snapshot rate and
* database size and will vary from 1 to around 5 , 000.
2017-11-15 15:33:17 +08:00
*/
class BackupContainerFileSystem : public IBackupContainer {
public :
virtual void addref ( ) = 0 ;
virtual void delref ( ) = 0 ;
BackupContainerFileSystem ( ) { }
virtual ~ BackupContainerFileSystem ( ) { }
// Create the container
virtual Future < Void > create ( ) = 0 ;
2018-12-21 10:05:23 +08:00
virtual Future < bool > exists ( ) = 0 ;
2017-11-15 15:33:17 +08:00
// Get a list of fileNames and their sizes in the container under the given path
2018-11-23 21:23:56 +08:00
// Although not required, an implementation can avoid traversing unwanted subfolders
// by calling folderPathFilter(absoluteFolderPath) and checking for a false return value.
2017-11-15 15:33:17 +08:00
typedef std : : vector < std : : pair < std : : string , int64_t > > FilesAndSizesT ;
2018-01-17 20:09:43 +08:00
virtual Future < FilesAndSizesT > listFiles ( std : : string path = " " , std : : function < bool ( std : : string const & ) > folderPathFilter = nullptr ) = 0 ;
2017-11-15 15:33:17 +08:00
// Open a file for read by fileName
virtual Future < Reference < IAsyncFile > > readFile ( std : : string fileName ) = 0 ;
// Open a file for write by fileName
virtual Future < Reference < IBackupFile > > writeFile ( std : : string fileName ) = 0 ;
// Delete a file
virtual Future < Void > deleteFile ( std : : string fileName ) = 0 ;
// Delete entire container. During the process, if pNumDeleted is not null it will be
// updated with the count of deleted files so that progress can be seen.
virtual Future < Void > deleteContainer ( int * pNumDeleted ) = 0 ;
2018-01-25 02:29:37 +08:00
// Creates a 2-level path (x/y) where v should go such that x/y/* contains (10^smallestBucket) possible versions
2018-01-03 15:22:35 +08:00
static std : : string versionFolderString ( Version v , int smallestBucket ) {
2018-01-25 02:29:37 +08:00
ASSERT ( smallestBucket < 14 ) ;
2018-01-24 07:02:15 +08:00
// Get a 0-padded fixed size representation of v
std : : string vFixedPrecision = format ( " %019lld " , v ) ;
ASSERT ( vFixedPrecision . size ( ) = = 19 ) ;
2018-01-25 02:29:37 +08:00
// Truncate smallestBucket from the fixed length representation
vFixedPrecision . resize ( vFixedPrecision . size ( ) - smallestBucket ) ;
2018-01-24 07:02:15 +08:00
2018-01-25 02:29:37 +08:00
// Split the remaining digits with a '/' 4 places from the right
vFixedPrecision . insert ( vFixedPrecision . size ( ) - 4 , 1 , ' / ' ) ;
2018-01-24 07:02:15 +08:00
return vFixedPrecision ;
}
// This useful for comparing version folder strings regardless of where their "/" dividers are, as it is possible
// that division points would change in the future.
static std : : string cleanFolderString ( std : : string f ) {
f . erase ( std : : remove ( f . begin ( ) , f . end ( ) , ' / ' ) , f . end ( ) ) ;
return f ;
2018-01-03 15:22:35 +08:00
}
2017-11-15 15:33:17 +08:00
2018-01-24 07:02:15 +08:00
// The innermost folder covers 100 seconds (1e8 versions) During a full speed backup it is possible though very unlikely write about 10,000 snapshot range files during that time.
2018-11-23 21:23:56 +08:00
static std : : string old_rangeVersionFolderString ( Version v ) {
2018-01-24 07:02:15 +08:00
return format ( " ranges/%s/ " , versionFolderString ( v , 8 ) . c_str ( ) ) ;
2018-01-03 15:22:35 +08:00
}
2018-11-23 21:23:56 +08:00
// Get the root folder for a snapshot's data based on its begin version
static std : : string snapshotFolderString ( Version snapshotBeginVersion ) {
return format ( " kvranges/snapshot.%018lld " , snapshotBeginVersion ) ;
}
// Extract the snapshot begin version from a path
static Version extractSnapshotBeginVersion ( std : : string path ) {
Version snapshotBeginVersion ;
if ( sscanf ( path . c_str ( ) , " kvranges/snapshot.%018lld " , & snapshotBeginVersion ) = = 1 ) {
return snapshotBeginVersion ;
}
return invalidVersion ;
}
2018-01-24 07:02:15 +08:00
// The innermost folder covers 100,000 seconds (1e11 versions) which is 5,000 mutation log files at current settings.
2018-01-03 15:22:35 +08:00
static std : : string logVersionFolderString ( Version v ) {
2018-01-24 07:02:15 +08:00
return format ( " logs/%s/ " , versionFolderString ( v , 11 ) . c_str ( ) ) ;
2017-11-15 15:33:17 +08:00
}
Future < Reference < IBackupFile > > writeLogFile ( Version beginVersion , Version endVersion , int blockSize ) {
2018-01-24 07:02:15 +08:00
return writeFile ( logVersionFolderString ( beginVersion ) + format ( " log,%lld,%lld,%s,%d " , beginVersion , endVersion , g_random - > randomUniqueID ( ) . toString ( ) . c_str ( ) , blockSize ) ) ;
2017-11-15 15:33:17 +08:00
}
2018-11-23 21:23:56 +08:00
Future < Reference < IBackupFile > > writeRangeFile ( Version snapshotBeginVersion , int snapshotFileCount , Version fileVersion , int blockSize ) {
std : : string fileName = format ( " range,%lld,%s,%d " , fileVersion , g_random - > randomUniqueID ( ) . toString ( ) . c_str ( ) , blockSize ) ;
// In order to test backward compatibility in simulation, sometimes write to the old path format
2018-11-25 09:24:54 +08:00
if ( g_network - > isSimulated ( ) & & g_random - > coinflip ( ) ) {
2018-11-23 21:23:56 +08:00
return writeFile ( old_rangeVersionFolderString ( fileVersion ) + fileName ) ;
}
return writeFile ( snapshotFolderString ( snapshotBeginVersion ) + format ( " /%d/ " , snapshotFileCount / ( BUGGIFY ? 1 : 5000 ) ) + fileName ) ;
2017-11-15 15:33:17 +08:00
}
2018-01-06 15:06:39 +08:00
static bool pathToRangeFile ( RangeFile & out , std : : string path , int64_t size ) {
2017-11-15 15:33:17 +08:00
std : : string name = basename ( path ) ;
RangeFile f ;
f . fileName = path ;
f . fileSize = size ;
int len ;
2018-01-06 15:06:39 +08:00
if ( sscanf ( name . c_str ( ) , " range,%lld,%*[^,],%u%n " , & f . version , & f . blockSize , & len ) = = 2 & & len = = name . size ( ) ) {
out = f ;
return true ;
}
return false ;
2017-11-15 15:33:17 +08:00
}
2018-01-06 15:06:39 +08:00
static bool pathToLogFile ( LogFile & out , std : : string path , int64_t size ) {
2017-11-15 15:33:17 +08:00
std : : string name = basename ( path ) ;
LogFile f ;
f . fileName = path ;
f . fileSize = size ;
int len ;
2018-01-06 15:06:39 +08:00
if ( sscanf ( name . c_str ( ) , " log,%lld,%lld,%*[^,],%u%n " , & f . beginVersion , & f . endVersion , & f . blockSize , & len ) = = 3 & & len = = name . size ( ) ) {
out = f ;
return true ;
}
return false ;
2017-11-15 15:33:17 +08:00
}
2018-01-06 15:06:39 +08:00
static bool pathToKeyspaceSnapshotFile ( KeyspaceSnapshotFile & out , std : : string path ) {
2017-11-15 15:33:17 +08:00
std : : string name = basename ( path ) ;
KeyspaceSnapshotFile f ;
f . fileName = path ;
int len ;
2018-01-06 15:06:39 +08:00
if ( sscanf ( name . c_str ( ) , " snapshot,%lld,%lld,%lld%n " , & f . beginVersion , & f . endVersion , & f . totalSize , & len ) = = 3 & & len = = name . size ( ) ) {
out = f ;
return true ;
}
return false ;
2017-11-15 15:33:17 +08:00
}
// TODO: Do this more efficiently, as the range file list for a snapshot could potentially be hundreds of megabytes.
ACTOR static Future < std : : vector < RangeFile > > readKeyspaceSnapshot_impl ( Reference < BackupContainerFileSystem > bc , KeyspaceSnapshotFile snapshot ) {
// Read the range file list for the specified version range, and then index them by fileName.
2018-11-23 21:23:56 +08:00
// This is so we can verify that each of the files listed in the manifest file are also in the container at this time.
2017-11-15 15:33:17 +08:00
std : : vector < RangeFile > files = wait ( bc - > listRangeFiles ( snapshot . beginVersion , snapshot . endVersion ) ) ;
state std : : map < std : : string , RangeFile > rangeIndex ;
for ( auto & f : files )
rangeIndex [ f . fileName ] = std : : move ( f ) ;
// Read the snapshot file, verify the version range, then find each of the range files by name in the index and return them.
state Reference < IAsyncFile > f = wait ( bc - > readFile ( snapshot . fileName ) ) ;
int64_t size = wait ( f - > size ( ) ) ;
state Standalone < StringRef > buf = makeString ( size ) ;
2019-02-13 08:07:17 +08:00
wait ( success ( f - > read ( mutateString ( buf ) , buf . size ( ) , 0 ) ) ) ;
2017-11-15 15:33:17 +08:00
json_spirit : : mValue json ;
json_spirit : : read_string ( buf . toString ( ) , json ) ;
JSONDoc doc ( json ) ;
Version v ;
if ( ! doc . tryGet ( " beginVersion " , v ) | | v ! = snapshot . beginVersion )
throw restore_corrupted_data ( ) ;
if ( ! doc . tryGet ( " endVersion " , v ) | | v ! = snapshot . endVersion )
throw restore_corrupted_data ( ) ;
json_spirit : : mValue & filesArray = doc . create ( " files " ) ;
if ( filesArray . type ( ) ! = json_spirit : : array_type )
throw restore_corrupted_data ( ) ;
std : : vector < RangeFile > results ;
2019-01-09 08:28:40 +08:00
int missing = 0 ;
2017-11-15 15:33:17 +08:00
for ( auto const & fileValue : filesArray . get_array ( ) ) {
if ( fileValue . type ( ) ! = json_spirit : : str_type )
throw restore_corrupted_data ( ) ;
2019-01-09 08:28:40 +08:00
// If the file is not in the index then log the error but don't throw yet, keep checking the whole list.
2017-11-15 15:33:17 +08:00
auto i = rangeIndex . find ( fileValue . get_str ( ) ) ;
2019-01-09 08:28:40 +08:00
if ( i = = rangeIndex . end ( ) ) {
TraceEvent ( SevError , " FileRestoreMissingRangeFile " )
. detail ( " URL " , bc - > getURL ( ) )
. detail ( " File " , fileValue . get_str ( ) ) ;
2017-11-15 15:33:17 +08:00
2019-01-09 08:28:40 +08:00
+ + missing ;
}
// No point in using more memory once data is missing since an error will be thrown instead.
if ( missing = = 0 ) {
results . push_back ( i - > second ) ;
}
}
if ( missing > 0 ) {
TraceEvent ( SevError , " FileRestoreMissingRangeFileSummary " )
. detail ( " URL " , bc - > getURL ( ) )
. detail ( " Count " , missing ) ;
2017-11-15 15:33:17 +08:00
2019-01-09 08:28:40 +08:00
throw restore_missing_data ( ) ;
2017-11-15 15:33:17 +08:00
}
return results ;
}
Future < std : : vector < RangeFile > > readKeyspaceSnapshot ( KeyspaceSnapshotFile snapshot ) {
return readKeyspaceSnapshot_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , snapshot ) ;
}
ACTOR static Future < Void > writeKeyspaceSnapshotFile_impl ( Reference < BackupContainerFileSystem > bc , std : : vector < std : : string > fileNames , int64_t totalBytes ) {
ASSERT ( ! fileNames . empty ( ) ) ;
2018-01-18 05:28:56 +08:00
state Version minVer = std : : numeric_limits < Version > : : max ( ) ;
state Version maxVer = 0 ;
state RangeFile rf ;
state json_spirit : : mArray fileArray ;
state int i ;
2017-11-15 15:33:17 +08:00
2018-01-18 05:28:56 +08:00
// Validate each filename, update version range
for ( i = 0 ; i < fileNames . size ( ) ; + + i ) {
auto const & f = fileNames [ i ] ;
2018-01-06 15:06:39 +08:00
if ( pathToRangeFile ( rf , f , 0 ) ) {
2018-01-18 05:28:56 +08:00
fileArray . push_back ( f ) ;
2018-01-06 15:06:39 +08:00
if ( rf . version < minVer )
minVer = rf . version ;
if ( rf . version > maxVer )
maxVer = rf . version ;
}
else
throw restore_unknown_file_type ( ) ;
2018-08-11 04:57:10 +08:00
wait ( yield ( ) ) ;
2017-11-15 15:33:17 +08:00
}
2018-01-18 05:28:56 +08:00
state json_spirit : : mValue json ;
state JSONDoc doc ( json ) ;
doc . create ( " files " ) = std : : move ( fileArray ) ;
2017-11-15 15:33:17 +08:00
doc . create ( " totalBytes " ) = totalBytes ;
doc . create ( " beginVersion " ) = minVer ;
doc . create ( " endVersion " ) = maxVer ;
2018-08-11 04:57:10 +08:00
wait ( yield ( ) ) ;
2017-11-15 15:33:17 +08:00
state std : : string docString = json_spirit : : write_string ( json ) ;
state Reference < IBackupFile > f = wait ( bc - > writeFile ( format ( " snapshots/snapshot,%lld,%lld,%lld " , minVer , maxVer , totalBytes ) ) ) ;
2018-08-11 04:57:10 +08:00
wait ( f - > append ( docString . data ( ) , docString . size ( ) ) ) ;
wait ( f - > finish ( ) ) ;
2017-11-15 15:33:17 +08:00
return Void ( ) ;
}
Future < Void > writeKeyspaceSnapshotFile ( std : : vector < std : : string > fileNames , int64_t totalBytes ) {
return writeKeyspaceSnapshotFile_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , fileNames , totalBytes ) ;
} ;
2018-12-18 20:33:37 +08:00
// List log files, unsorted, which contain data at any version >= beginVersion and <= targetVersion
2018-09-22 02:48:28 +08:00
Future < std : : vector < LogFile > > listLogFiles ( Version beginVersion = 0 , Version targetVersion = std : : numeric_limits < Version > : : max ( ) ) {
2018-01-17 20:09:43 +08:00
// The first relevant log file could have a begin version less than beginVersion based on the knobs which determine log file range size,
// so start at an earlier version adjusted by how many versions a file could contain.
//
2018-01-24 07:02:15 +08:00
// Get the cleaned (without slashes) first and last folders that could contain relevant results.
std : : string firstPath = cleanFolderString ( logVersionFolderString (
std : : max < Version > ( 0 , beginVersion - CLIENT_KNOBS - > BACKUP_MAX_LOG_RANGES * CLIENT_KNOBS - > LOG_RANGE_BLOCK_SIZE )
) ) ;
2018-09-22 02:48:28 +08:00
std : : string lastPath = cleanFolderString ( logVersionFolderString ( targetVersion ) ) ;
2018-01-17 20:09:43 +08:00
std : : function < bool ( std : : string const & ) > pathFilter = [ = ] ( const std : : string & folderPath ) {
2018-01-24 07:02:15 +08:00
// Remove slashes in the given folder path so that the '/' positions in the version folder string do not matter
std : : string cleaned = cleanFolderString ( folderPath ) ;
return StringRef ( firstPath ) . startsWith ( cleaned ) | | StringRef ( lastPath ) . startsWith ( cleaned )
| | ( cleaned > firstPath & & cleaned < lastPath ) ;
2018-01-17 20:09:43 +08:00
} ;
return map ( listFiles ( " logs/ " , pathFilter ) , [ = ] ( const FilesAndSizesT & files ) {
2017-11-15 15:33:17 +08:00
std : : vector < LogFile > results ;
2018-01-06 15:06:39 +08:00
LogFile lf ;
2017-11-15 15:33:17 +08:00
for ( auto & f : files ) {
2018-09-22 02:48:28 +08:00
if ( pathToLogFile ( lf , f . first , f . second ) & & lf . endVersion > beginVersion & & lf . beginVersion < = targetVersion )
2017-11-15 15:33:17 +08:00
results . push_back ( lf ) ;
}
return results ;
} ) ;
}
2018-01-17 20:09:43 +08:00
2018-12-18 20:33:37 +08:00
// List range files, unsorted, which contain data at or between beginVersion and endVersion
2018-11-23 21:23:56 +08:00
// NOTE: This reads the range file folder schema from FDB 6.0.15 and earlier and is provided for backward compatibility
Future < std : : vector < RangeFile > > old_listRangeFiles ( Version beginVersion , Version endVersion ) {
2018-01-24 07:02:15 +08:00
// Get the cleaned (without slashes) first and last folders that could contain relevant results.
2018-11-23 21:23:56 +08:00
std : : string firstPath = cleanFolderString ( old_rangeVersionFolderString ( beginVersion ) ) ;
std : : string lastPath = cleanFolderString ( old_rangeVersionFolderString ( endVersion ) ) ;
2018-01-17 20:09:43 +08:00
std : : function < bool ( std : : string const & ) > pathFilter = [ = ] ( const std : : string & folderPath ) {
2018-01-24 07:02:15 +08:00
// Remove slashes in the given folder path so that the '/' positions in the version folder string do not matter
std : : string cleaned = cleanFolderString ( folderPath ) ;
return StringRef ( firstPath ) . startsWith ( cleaned ) | | StringRef ( lastPath ) . startsWith ( cleaned )
| | ( cleaned > firstPath & & cleaned < lastPath ) ;
2018-01-17 20:09:43 +08:00
} ;
return map ( listFiles ( " ranges/ " , pathFilter ) , [ = ] ( const FilesAndSizesT & files ) {
2017-11-15 15:33:17 +08:00
std : : vector < RangeFile > results ;
2018-01-06 15:06:39 +08:00
RangeFile rf ;
2017-11-15 15:33:17 +08:00
for ( auto & f : files ) {
2018-01-06 15:06:39 +08:00
if ( pathToRangeFile ( rf , f . first , f . second ) & & rf . version > = beginVersion & & rf . version < = endVersion )
2017-11-15 15:33:17 +08:00
results . push_back ( rf ) ;
}
2018-11-23 21:23:56 +08:00
return results ;
} ) ;
}
2018-12-18 20:33:37 +08:00
// List range files, unsorted, which contain data at or between beginVersion and endVersion
2018-11-23 21:23:56 +08:00
// Note: The contents of each top level snapshot.N folder do not necessarily constitute a valid snapshot
// and therefore listing files is not how RestoreSets are obtained.
// Note: Snapshots partially written using FDB versions prior to 6.0.16 will have some range files stored
// using the old folder scheme read by old_listRangeFiles
Future < std : : vector < RangeFile > > listRangeFiles ( Version beginVersion , Version endVersion ) {
// Until the old folder scheme is no longer supported, read files stored using old folder scheme
Future < std : : vector < RangeFile > > oldFiles = old_listRangeFiles ( beginVersion , endVersion ) ;
// Define filter function (for listFiles() implementations that use it) to reject any folder
// starting after endVersion
std : : function < bool ( std : : string const & ) > pathFilter = [ = ] ( std : : string const & path ) {
2018-11-25 10:41:39 +08:00
return extractSnapshotBeginVersion ( path ) < = endVersion ;
2018-11-23 21:23:56 +08:00
} ;
Future < std : : vector < RangeFile > > newFiles = map ( listFiles ( " kvranges/ " , pathFilter ) , [ = ] ( const FilesAndSizesT & files ) {
std : : vector < RangeFile > results ;
RangeFile rf ;
for ( auto & f : files ) {
if ( pathToRangeFile ( rf , f . first , f . second ) & & rf . version > = beginVersion & & rf . version < = endVersion )
results . push_back ( rf ) ;
}
return results ;
} ) ;
return map ( success ( oldFiles ) & & success ( newFiles ) , [ = ] ( Void _ ) {
std : : vector < RangeFile > results = std : : move ( newFiles . get ( ) ) ;
2018-11-24 04:49:10 +08:00
std : : vector < RangeFile > oldResults = std : : move ( oldFiles . get ( ) ) ;
results . insert ( results . end ( ) , std : : make_move_iterator ( oldResults . begin ( ) ) , std : : make_move_iterator ( oldResults . end ( ) ) ) ;
2017-11-15 15:33:17 +08:00
return results ;
} ) ;
}
2018-01-17 20:09:43 +08:00
2018-12-22 14:42:29 +08:00
// List snapshots which have been fully written, in sorted beginVersion order, which start before end and finish on or after begin
Future < std : : vector < KeyspaceSnapshotFile > > listKeyspaceSnapshots ( Version begin = 0 , Version end = std : : numeric_limits < Version > : : max ( ) ) {
2017-11-15 15:33:17 +08:00
return map ( listFiles ( " snapshots/ " ) , [ = ] ( const FilesAndSizesT & files ) {
std : : vector < KeyspaceSnapshotFile > results ;
2018-01-06 15:06:39 +08:00
KeyspaceSnapshotFile sf ;
2017-11-15 15:33:17 +08:00
for ( auto & f : files ) {
2018-12-22 14:42:29 +08:00
if ( pathToKeyspaceSnapshotFile ( sf , f . first ) & & sf . beginVersion < end & & sf . endVersion > = begin )
2017-11-15 15:33:17 +08:00
results . push_back ( sf ) ;
}
std : : sort ( results . begin ( ) , results . end ( ) ) ;
return results ;
} ) ;
}
2018-12-22 14:42:29 +08:00
ACTOR static Future < BackupFileList > dumpFileList_impl ( Reference < BackupContainerFileSystem > bc , Version begin , Version end ) {
state Future < std : : vector < RangeFile > > fRanges = bc - > listRangeFiles ( begin , end ) ;
state Future < std : : vector < KeyspaceSnapshotFile > > fSnapshots = bc - > listKeyspaceSnapshots ( begin , end ) ;
state Future < std : : vector < LogFile > > fLogs = bc - > listLogFiles ( begin , end ) ;
2018-08-11 04:57:10 +08:00
wait ( success ( fRanges ) & & success ( fSnapshots ) & & success ( fLogs ) ) ;
2018-12-22 14:42:29 +08:00
return BackupFileList ( { fRanges . get ( ) , fLogs . get ( ) , fSnapshots . get ( ) } ) ;
2017-11-19 20:28:22 +08:00
}
2018-12-22 14:42:29 +08:00
Future < BackupFileList > dumpFileList ( Version begin , Version end ) {
return dumpFileList_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , begin , end ) ;
2017-11-19 20:28:22 +08:00
}
2018-12-19 10:55:44 +08:00
static Version resolveRelativeVersion ( Optional < Version > max , Version v , const char * name , Error e ) {
if ( v = = invalidVersion ) {
TraceEvent ( SevError , " BackupExpireInvalidVersion " ) . detail ( name , v ) ;
throw e ;
}
if ( v < 0 ) {
if ( ! max . present ( ) ) {
TraceEvent ( SevError , " BackupExpireCannotResolveRelativeVersion " ) . detail ( name , v ) ;
throw e ;
}
v + = max . get ( ) ;
}
return v ;
2017-11-19 20:28:22 +08:00
}
2018-12-18 20:33:37 +08:00
ACTOR static Future < BackupDescription > describeBackup_impl ( Reference < BackupContainerFileSystem > bc , bool deepScan , Version logStartVersionOverride ) {
2017-11-15 15:33:17 +08:00
state BackupDescription desc ;
desc . url = bc - > getURL ( ) ;
2018-12-19 10:55:44 +08:00
TraceEvent ( " BackupContainerDescribe1 " )
. detail ( " URL " , bc - > getURL ( ) )
. detail ( " LogStartVersionOverride " , logStartVersionOverride ) ;
2018-12-21 10:05:23 +08:00
bool e = wait ( bc - > exists ( ) ) ;
if ( ! e ) {
TraceEvent ( SevWarnAlways , " BackupContainerDoesNotExist " ) . detail ( " URL " , bc - > getURL ( ) ) ;
throw backup_does_not_exist ( ) ;
}
2018-12-19 10:55:44 +08:00
// If logStartVersion is relative, then first do a recursive call without it to find the max log version
// from which to resolve the relative version.
// This could be handled more efficiently without recursion but it's tricky, this will do for now.
if ( logStartVersionOverride ! = invalidVersion & & logStartVersionOverride < 0 ) {
BackupDescription tmp = wait ( bc - > describeBackup ( false , invalidVersion ) ) ;
logStartVersionOverride = resolveRelativeVersion ( tmp . maxLogEnd , logStartVersionOverride , " LogStartVersionOverride " , invalid_option_value ( ) ) ;
}
2018-01-17 20:09:43 +08:00
2018-12-16 16:18:13 +08:00
// Get metadata versions
state Optional < Version > metaLogBegin ;
state Optional < Version > metaLogEnd ;
state Optional < Version > metaExpiredEnd ;
state Optional < Version > metaUnreliableEnd ;
2018-01-17 20:09:43 +08:00
2018-12-16 16:41:38 +08:00
std : : vector < Future < Void > > metaReads ;
2018-10-05 13:18:15 +08:00
metaReads . push_back ( store ( metaExpiredEnd , bc - > expiredEndVersion ( ) . get ( ) ) ) ;
metaReads . push_back ( store ( metaUnreliableEnd , bc - > unreliableEndVersion ( ) . get ( ) ) ) ;
2018-12-16 16:41:38 +08:00
2018-12-18 20:33:37 +08:00
// Only read log begin/end versions if not doing a deep scan, otherwise scan files and recalculate them.
2018-01-17 20:09:43 +08:00
if ( ! deepScan ) {
2018-10-05 13:18:15 +08:00
metaReads . push_back ( store ( metaLogBegin , bc - > logBeginVersion ( ) . get ( ) ) ) ;
metaReads . push_back ( store ( metaLogEnd , bc - > logEndVersion ( ) . get ( ) ) ) ;
2018-12-16 16:18:13 +08:00
}
2019-01-10 08:14:46 +08:00
wait ( waitForAll ( metaReads ) ) ;
2018-12-18 05:13:35 +08:00
2018-12-19 10:55:44 +08:00
TraceEvent ( " BackupContainerDescribe2 " )
2018-12-16 16:33:30 +08:00
. detail ( " URL " , bc - > getURL ( ) )
2018-12-18 20:33:37 +08:00
. detail ( " LogStartVersionOverride " , logStartVersionOverride )
. detail ( " ExpiredEndVersion " , metaExpiredEnd . orDefault ( invalidVersion ) )
. detail ( " UnreliableEndVersion " , metaUnreliableEnd . orDefault ( invalidVersion ) )
. detail ( " LogBeginVersion " , metaLogBegin . orDefault ( invalidVersion ) )
. detail ( " LogEndVersion " , metaLogEnd . orDefault ( invalidVersion ) ) ;
2018-12-19 10:55:44 +08:00
// If the logStartVersionOverride is positive (not relative) then ensure that unreliableEndVersion is equal or greater
2018-12-18 20:33:37 +08:00
if ( logStartVersionOverride ! = invalidVersion & & metaUnreliableEnd . orDefault ( invalidVersion ) < logStartVersionOverride ) {
metaUnreliableEnd = logStartVersionOverride ;
2018-01-17 20:09:43 +08:00
}
2018-12-18 20:33:37 +08:00
// Don't use metaLogBegin or metaLogEnd if any of the following are true, the safest
// thing to do is rescan to verify log continuity and get exact begin/end versions
2018-12-16 16:18:13 +08:00
// - either are missing
2018-12-18 20:33:37 +08:00
// - metaLogEnd <= metaLogBegin (invalid range)
// - metaLogEnd < metaExpiredEnd (log continuity exists in missing data range)
// - metaLogEnd < metaUnreliableEnd (log continuity exists in incomplete data range)
2018-12-16 16:18:13 +08:00
if ( ! metaLogBegin . present ( ) | | ! metaLogEnd . present ( )
| | metaLogEnd . get ( ) < = metaLogBegin . get ( )
2018-12-18 20:33:37 +08:00
| | metaLogEnd . get ( ) < metaExpiredEnd . orDefault ( invalidVersion )
| | metaLogEnd . get ( ) < metaUnreliableEnd . orDefault ( invalidVersion )
2018-12-16 16:18:13 +08:00
) {
TraceEvent ( SevWarnAlways , " BackupContainerMetadataInvalid " )
2018-12-16 16:33:30 +08:00
. detail ( " URL " , bc - > getURL ( ) )
2018-12-18 20:33:37 +08:00
. detail ( " ExpiredEndVersion " , metaExpiredEnd . orDefault ( invalidVersion ) )
. detail ( " UnreliableEndVersion " , metaUnreliableEnd . orDefault ( invalidVersion ) )
. detail ( " LogBeginVersion " , metaLogBegin . orDefault ( invalidVersion ) )
. detail ( " LogEndVersion " , metaLogEnd . orDefault ( invalidVersion ) ) ;
2018-12-16 16:18:13 +08:00
metaLogBegin = Optional < Version > ( ) ;
metaLogEnd = Optional < Version > ( ) ;
2018-01-17 20:09:43 +08:00
}
2018-12-18 20:33:37 +08:00
// If the unreliable end version is not set or is < expiredEndVersion then increase it to expiredEndVersion.
// Describe does not update unreliableEnd in the backup metadata for safety reasons as there is no
// compare-and-set operation to atomically change it and an expire process could be advancing it simultaneously.
2018-12-16 16:18:13 +08:00
if ( ! metaUnreliableEnd . present ( ) | | metaUnreliableEnd . get ( ) < metaExpiredEnd . orDefault ( 0 ) )
metaUnreliableEnd = metaExpiredEnd ;
desc . unreliableEndVersion = metaUnreliableEnd ;
desc . expiredEndVersion = metaExpiredEnd ;
2018-12-18 20:33:37 +08:00
// Start scanning at the end of the unreliable version range, which is the version before which data is likely
// missing because an expire process has operated on that range.
2018-12-16 16:18:13 +08:00
state Version scanBegin = desc . unreliableEndVersion . orDefault ( 0 ) ;
state Version scanEnd = std : : numeric_limits < Version > : : max ( ) ;
2018-01-17 20:09:43 +08:00
// Use the known log range if present
2018-12-18 20:33:37 +08:00
// Logs are assumed to be contiguious between metaLogBegin and metaLogEnd, so initalize desc accordingly
2018-12-16 16:18:13 +08:00
if ( metaLogBegin . present ( ) & & metaLogEnd . present ( ) ) {
2018-12-18 20:33:37 +08:00
// minLogBegin is the greater of the log begin metadata OR the unreliable end version since we can't count
// on log file presence before that version.
desc . minLogBegin = std : : max ( metaLogBegin . get ( ) , desc . unreliableEndVersion . orDefault ( 0 ) ) ;
// Set the maximum known end version of a log file, so far, which is also the assumed contiguous log file end version
2018-12-16 16:18:13 +08:00
desc . maxLogEnd = metaLogEnd . get ( ) ;
2018-01-18 04:12:04 +08:00
desc . contiguousLogEnd = desc . maxLogEnd ;
2018-12-18 20:33:37 +08:00
// Advance scanBegin to the contiguous log end version
2018-01-18 04:12:04 +08:00
scanBegin = desc . contiguousLogEnd . get ( ) ;
2018-01-17 20:09:43 +08:00
}
2018-12-18 20:33:37 +08:00
state std : : vector < LogFile > logs ;
2018-10-05 13:18:15 +08:00
wait ( store ( logs , bc - > listLogFiles ( scanBegin , scanEnd ) ) & & store ( desc . snapshots , bc - > listKeyspaceSnapshots ( ) ) ) ;
2017-11-15 15:33:17 +08:00
2018-12-18 20:33:37 +08:00
// List logs in version order so log continuity can be analyzed
std : : sort ( logs . begin ( ) , logs . end ( ) ) ;
2017-11-15 15:33:17 +08:00
if ( ! logs . empty ( ) ) {
desc . maxLogEnd = logs . rbegin ( ) - > endVersion ;
auto i = logs . begin ( ) ;
2018-01-17 20:09:43 +08:00
// If we didn't get log versions above then seed them using the first log file
if ( ! desc . contiguousLogEnd . present ( ) ) {
desc . minLogBegin = i - > beginVersion ;
desc . contiguousLogEnd = i - > endVersion ;
+ + i ;
}
2017-11-15 15:33:17 +08:00
auto & end = desc . contiguousLogEnd . get ( ) ; // For convenience to make loop cleaner
// Advance until continuity is broken
2018-01-17 20:09:43 +08:00
while ( i ! = logs . end ( ) ) {
2017-11-15 15:33:17 +08:00
if ( i - > beginVersion > end )
break ;
// If the next link in the log chain is found, update the end
if ( i - > beginVersion = = end )
end = i - > endVersion ;
2017-12-22 13:15:26 +08:00
+ + i ;
}
2017-11-17 08:19:56 +08:00
}
2018-12-20 02:35:06 +08:00
// Only update stored contiguous log begin and end versions if we did NOT use a log start override.
// Otherwise, a series of describe operations can result in a version range which is actually missing data.
if ( logStartVersionOverride = = invalidVersion ) {
// If the log metadata begin/end versions are missing (or treated as missing due to invalidity) or
// differ from the newly calculated values for minLogBegin and contiguousLogEnd, respectively,
// then attempt to update the metadata in the backup container but ignore errors in case the
// container is not writeable.
try {
state Future < Void > updates = Void ( ) ;
if ( desc . minLogBegin . present ( ) & & metaLogBegin ! = desc . minLogBegin ) {
updates = updates & & bc - > logBeginVersion ( ) . set ( desc . minLogBegin . get ( ) ) ;
}
if ( desc . contiguousLogEnd . present ( ) & & metaLogEnd ! = desc . contiguousLogEnd ) {
updates = updates & & bc - > logEndVersion ( ) . set ( desc . contiguousLogEnd . get ( ) ) ;
}
2019-01-10 08:14:46 +08:00
wait ( updates ) ;
2018-12-20 02:35:06 +08:00
} catch ( Error & e ) {
if ( e . code ( ) = = error_code_actor_cancelled )
throw ;
TraceEvent ( SevWarn , " BackupContainerMetadataUpdateFailure " )
. error ( e )
. detail ( " URL " , bc - > getURL ( ) ) ;
2018-12-16 16:18:13 +08:00
}
2018-01-17 20:09:43 +08:00
}
for ( auto & s : desc . snapshots ) {
// Calculate restorability of each snapshot. Assume true, then try to prove false
s . restorable = true ;
// If this is not a single-version snapshot then see if the available contiguous logs cover its range
if ( s . beginVersion ! = s . endVersion ) {
if ( ! desc . minLogBegin . present ( ) | | desc . minLogBegin . get ( ) > s . beginVersion )
s . restorable = false ;
2018-09-22 02:48:28 +08:00
if ( ! desc . contiguousLogEnd . present ( ) | | desc . contiguousLogEnd . get ( ) < = s . endVersion )
2018-01-17 20:09:43 +08:00
s . restorable = false ;
}
2018-01-03 15:22:35 +08:00
desc . snapshotBytes + = s . totalSize ;
2017-12-22 13:15:26 +08:00
2017-11-17 08:19:56 +08:00
// If the snapshot is at a single version then it requires no logs. Update min and max restorable.
// TODO: Somehow check / report if the restorable range is not or may not be contiguous.
if ( s . beginVersion = = s . endVersion ) {
if ( ! desc . minRestorableVersion . present ( ) | | s . endVersion < desc . minRestorableVersion . get ( ) )
desc . minRestorableVersion = s . endVersion ;
if ( ! desc . maxRestorableVersion . present ( ) | | s . endVersion > desc . maxRestorableVersion . get ( ) )
desc . maxRestorableVersion = s . endVersion ;
}
// If the snapshot is covered by the contiguous log chain then update min/max restorable.
2018-09-22 02:48:28 +08:00
if ( desc . minLogBegin . present ( ) & & s . beginVersion > = desc . minLogBegin . get ( ) & & s . endVersion < desc . contiguousLogEnd . get ( ) ) {
2017-11-17 08:19:56 +08:00
if ( ! desc . minRestorableVersion . present ( ) | | s . endVersion < desc . minRestorableVersion . get ( ) )
desc . minRestorableVersion = s . endVersion ;
2017-11-15 15:33:17 +08:00
2018-09-22 02:48:28 +08:00
if ( ! desc . maxRestorableVersion . present ( ) | | ( desc . contiguousLogEnd . get ( ) - 1 ) > desc . maxRestorableVersion . get ( ) )
desc . maxRestorableVersion = desc . contiguousLogEnd . get ( ) - 1 ;
2017-11-15 15:33:17 +08:00
}
}
return desc ;
}
// Uses the virtual methods to describe the backup contents
2018-12-18 20:33:37 +08:00
Future < BackupDescription > describeBackup ( bool deepScan , Version logStartVersionOverride ) {
return describeBackup_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , deepScan , logStartVersionOverride ) ;
2017-11-15 15:33:17 +08:00
}
2018-12-20 16:23:26 +08:00
ACTOR static Future < Void > expireData_impl ( Reference < BackupContainerFileSystem > bc , Version expireEndVersion , bool force , ExpireProgress * progress , Version restorableBeginVersion ) {
if ( progress ! = nullptr ) {
progress - > step = " Describing backup " ;
progress - > total = 0 ;
}
2018-01-17 20:09:43 +08:00
2018-12-19 10:55:44 +08:00
TraceEvent ( " BackupContainerFileSystemExpire1 " )
. detail ( " URL " , bc - > getURL ( ) )
. detail ( " ExpireEndVersion " , expireEndVersion )
. detail ( " RestorableBeginVersion " , restorableBeginVersion ) ;
2018-01-17 20:09:43 +08:00
// Get the backup description.
2018-12-18 20:33:37 +08:00
state BackupDescription desc = wait ( bc - > describeBackup ( false , expireEndVersion ) ) ;
2018-01-17 20:09:43 +08:00
2018-12-19 10:55:44 +08:00
// Resolve relative versions using max log version
2018-12-20 05:14:48 +08:00
expireEndVersion = resolveRelativeVersion ( desc . maxLogEnd , expireEndVersion , " ExpireEndVersion " , invalid_option_value ( ) ) ;
restorableBeginVersion = resolveRelativeVersion ( desc . maxLogEnd , restorableBeginVersion , " RestorableBeginVersion " , invalid_option_value ( ) ) ;
2018-12-19 10:55:44 +08:00
2018-12-20 05:14:48 +08:00
// It would be impossible to have restorability to any version < expireEndVersion after expiring to that version
2018-12-19 10:55:44 +08:00
if ( restorableBeginVersion < expireEndVersion )
throw backup_cannot_expire ( ) ;
2018-12-16 16:18:13 +08:00
// If the expire request is to a version at or before the previous version to which data was already deleted
// then do nothing and just return
2018-12-18 20:33:37 +08:00
if ( expireEndVersion < = desc . expiredEndVersion . orDefault ( invalidVersion ) ) {
2018-12-16 16:18:13 +08:00
return Void ( ) ;
}
2018-01-17 20:09:43 +08:00
// Assume force is needed, then try to prove otherwise.
// Force is required if there is not a restorable snapshot which both
// - begins at or after expireEndVersion
// - ends at or before restorableBeginVersion
2018-03-09 03:27:15 +08:00
state bool forceNeeded = true ;
2018-01-17 20:09:43 +08:00
for ( KeyspaceSnapshotFile & s : desc . snapshots ) {
if ( s . restorable . orDefault ( false ) & & s . beginVersion > = expireEndVersion & & s . endVersion < = restorableBeginVersion ) {
forceNeeded = false ;
break ;
}
}
2018-12-20 02:36:25 +08:00
// If force is needed but not passed then refuse to expire anything.
// Note that it is possible for there to be no actual files in the backup prior to expireEndVersion,
// if they were externally deleted or an expire operation deleted them but was terminated before
// updating expireEndVersion
if ( forceNeeded & & ! force )
throw backup_cannot_expire ( ) ;
2018-01-17 20:09:43 +08:00
2018-12-16 16:18:13 +08:00
// Start scan for files to delete at the last completed expire operation's end or 0.
state Version scanBegin = desc . expiredEndVersion . orDefault ( 0 ) ;
2018-01-17 20:09:43 +08:00
2018-12-19 10:55:44 +08:00
TraceEvent ( " BackupContainerFileSystemExpire2 " )
2018-12-16 16:33:30 +08:00
. detail ( " URL " , bc - > getURL ( ) )
2018-11-28 01:50:39 +08:00
. detail ( " ExpireEndVersion " , expireEndVersion )
2018-12-19 10:55:44 +08:00
. detail ( " RestorableBeginVersion " , restorableBeginVersion )
2018-12-16 16:18:13 +08:00
. detail ( " ScanBeginVersion " , scanBegin ) ;
2018-11-15 18:15:25 +08:00
2018-12-18 20:33:37 +08:00
state std : : vector < LogFile > logs ;
state std : : vector < RangeFile > ranges ;
2018-11-28 01:50:39 +08:00
2018-12-20 16:23:26 +08:00
if ( progress ! = nullptr ) {
progress - > step = " Listing files " ;
}
2018-12-18 20:33:37 +08:00
// Get log files or range files that contain any data at or before expireEndVersion
2018-10-05 13:18:15 +08:00
wait ( store ( logs , bc - > listLogFiles ( scanBegin , expireEndVersion - 1 ) ) & & store ( ranges , bc - > listRangeFiles ( scanBegin , expireEndVersion - 1 ) ) ) ;
2018-01-17 20:09:43 +08:00
// The new logBeginVersion will be taken from the last log file, if there is one
state Optional < Version > newLogBeginVersion ;
if ( ! logs . empty ( ) ) {
2018-12-18 20:33:37 +08:00
// Linear scan the unsorted logs to find the latest one in sorted order
LogFile & last = * std : : max_element ( logs . begin ( ) , logs . end ( ) ) ;
2018-01-17 20:09:43 +08:00
// If the last log ends at expireEndVersion then that will be the next log begin
if ( last . endVersion = = expireEndVersion ) {
newLogBeginVersion = expireEndVersion ;
}
else {
2018-03-10 03:29:23 +08:00
// If the last log overlaps the expiredEnd then use the log's begin version and move the expiredEnd
2018-12-18 20:33:37 +08:00
// back to match it and keep the last log file
2018-01-17 20:09:43 +08:00
if ( last . endVersion > expireEndVersion ) {
newLogBeginVersion = last . beginVersion ;
2018-12-18 20:33:37 +08:00
// Instead of modifying this potentially very large vector, just clear LogFile
last = LogFile ( ) ;
2018-03-10 03:29:23 +08:00
expireEndVersion = newLogBeginVersion . get ( ) ;
2018-01-17 20:09:43 +08:00
}
}
}
2018-03-09 03:27:15 +08:00
// Make a list of files to delete
state std : : vector < std : : string > toDelete ;
2017-11-15 15:33:17 +08:00
2018-03-09 03:27:15 +08:00
// Move filenames out of vector then destroy it to save memory
2018-01-17 20:09:43 +08:00
for ( auto const & f : logs ) {
2018-12-18 20:33:37 +08:00
// We may have cleared the last log file earlier so skip any empty filenames
if ( ! f . fileName . empty ( ) ) {
toDelete . push_back ( std : : move ( f . fileName ) ) ;
}
2018-01-17 20:09:43 +08:00
}
2018-03-09 03:27:15 +08:00
logs . clear ( ) ;
2017-11-15 15:33:17 +08:00
2018-03-09 03:27:15 +08:00
// Move filenames out of vector then destroy it to save memory
2018-01-17 20:09:43 +08:00
for ( auto const & f : ranges ) {
2018-11-28 01:50:39 +08:00
// The file version must be checked here again because it is likely that expireEndVersion is in the middle of a log file, in which case
// after the log and range file listings are done (using the original expireEndVersion) the expireEndVersion will be moved back slightly
// to the begin version of the last log file found (which is also the first log to not be deleted)
if ( f . version < expireEndVersion ) {
toDelete . push_back ( std : : move ( f . fileName ) ) ;
}
2018-01-17 20:09:43 +08:00
}
2018-03-09 03:27:15 +08:00
ranges . clear ( ) ;
2017-11-15 15:33:17 +08:00
2018-01-17 20:09:43 +08:00
for ( auto const & f : desc . snapshots ) {
if ( f . endVersion < expireEndVersion )
2018-03-09 03:27:15 +08:00
toDelete . push_back ( std : : move ( f . fileName ) ) ;
2018-01-17 20:09:43 +08:00
}
2018-03-10 04:03:10 +08:00
desc = BackupDescription ( ) ;
2017-11-15 15:33:17 +08:00
2018-12-16 16:18:13 +08:00
// We are about to start deleting files, at which point all data prior to expireEndVersion is considered
// 'unreliable' as some or all of it will be missing. So before deleting anything, read unreliableEndVersion
// (don't use cached value in desc) and update its value if it is missing or < expireEndVersion
2018-12-20 16:23:26 +08:00
if ( progress ! = nullptr ) {
progress - > step = " Initial metadata update " ;
2018-03-10 03:29:23 +08:00
}
2018-12-16 16:18:13 +08:00
Optional < Version > metaUnreliableEnd = wait ( bc - > unreliableEndVersion ( ) . get ( ) ) ;
if ( metaUnreliableEnd . orDefault ( 0 ) < expireEndVersion ) {
2019-01-10 08:14:46 +08:00
wait ( bc - > unreliableEndVersion ( ) . set ( expireEndVersion ) ) ;
2018-03-10 03:29:23 +08:00
}
2018-12-20 16:23:26 +08:00
if ( progress ! = nullptr ) {
progress - > step = " Deleting files " ;
progress - > total = toDelete . size ( ) ;
progress - > done = 0 ;
2018-03-10 03:29:23 +08:00
}
2018-03-09 03:27:15 +08:00
// Delete files, but limit parallelism because the file list could use a lot of memory and the corresponding
// delete actor states would use even more if they all existed at the same time.
state std : : list < Future < Void > > deleteFutures ;
while ( ! toDelete . empty ( ) | | ! deleteFutures . empty ( ) ) {
// While there are files to delete and budget in the deleteFutures list, start a delete
while ( ! toDelete . empty ( ) & & deleteFutures . size ( ) < CLIENT_KNOBS - > BACKUP_CONCURRENT_DELETES ) {
deleteFutures . push_back ( bc - > deleteFile ( toDelete . back ( ) ) ) ;
toDelete . pop_back ( ) ;
}
// Wait for deletes to finish until there are only targetDeletesInFlight remaining.
// If there are no files left to start then this value is 0, otherwise it is one less
// than the delete concurrency limit.
state int targetFuturesSize = toDelete . empty ( ) ? 0 : ( CLIENT_KNOBS - > BACKUP_CONCURRENT_DELETES - 1 ) ;
while ( deleteFutures . size ( ) > targetFuturesSize ) {
2018-08-11 04:57:10 +08:00
wait ( deleteFutures . front ( ) ) ;
2018-12-20 16:23:26 +08:00
if ( progress ! = nullptr ) {
+ + progress - > done ;
}
2018-03-09 03:27:15 +08:00
deleteFutures . pop_front ( ) ;
}
}
2018-01-17 20:09:43 +08:00
2018-12-20 16:23:26 +08:00
if ( progress ! = nullptr ) {
progress - > step = " Final metadata update " ;
progress - > total = 0 ;
}
2018-12-16 16:18:13 +08:00
// Update the expiredEndVersion metadata to indicate that everything prior to that version has been
2018-12-18 20:33:37 +08:00
// successfully deleted if the current version is lower or missing
2018-12-16 16:18:13 +08:00
Optional < Version > metaExpiredEnd = wait ( bc - > expiredEndVersion ( ) . get ( ) ) ;
if ( metaExpiredEnd . orDefault ( 0 ) < expireEndVersion ) {
2019-01-10 08:14:46 +08:00
wait ( bc - > expiredEndVersion ( ) . set ( expireEndVersion ) ) ;
2018-12-16 16:18:13 +08:00
}
2018-01-17 20:09:43 +08:00
2017-11-15 15:33:17 +08:00
return Void ( ) ;
}
// Delete all data up to (but not including endVersion)
2018-12-20 16:23:26 +08:00
Future < Void > expireData ( Version expireEndVersion , bool force , ExpireProgress * progress , Version restorableBeginVersion ) {
return expireData_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , expireEndVersion , force , progress , restorableBeginVersion ) ;
2017-11-15 15:33:17 +08:00
}
2017-11-19 20:28:22 +08:00
ACTOR static Future < Optional < RestorableFileSet > > getRestoreSet_impl ( Reference < BackupContainerFileSystem > bc , Version targetVersion ) {
2017-11-15 15:33:17 +08:00
// Find the most recent keyrange snapshot to end at or before targetVersion
state Optional < KeyspaceSnapshotFile > snapshot ;
std : : vector < KeyspaceSnapshotFile > snapshots = wait ( bc - > listKeyspaceSnapshots ( ) ) ;
for ( auto const & s : snapshots ) {
if ( s . endVersion < = targetVersion )
snapshot = s ;
}
if ( snapshot . present ( ) ) {
state RestorableFileSet restorable ;
2017-11-25 16:46:16 +08:00
restorable . snapshot = snapshot . get ( ) ;
2017-11-17 08:19:56 +08:00
restorable . targetVersion = targetVersion ;
2017-11-15 15:33:17 +08:00
std : : vector < RangeFile > ranges = wait ( bc - > readKeyspaceSnapshot ( snapshot . get ( ) ) ) ;
restorable . ranges = ranges ;
2017-12-22 06:11:44 +08:00
// No logs needed if there is a complete key space snapshot at the target version.
if ( snapshot . get ( ) . beginVersion = = snapshot . get ( ) . endVersion & & snapshot . get ( ) . endVersion = = targetVersion )
2017-12-21 05:48:31 +08:00
return Optional < RestorableFileSet > ( restorable ) ;
2017-11-17 08:19:56 +08:00
2018-12-18 20:33:37 +08:00
state std : : vector < LogFile > logs = wait ( bc - > listLogFiles ( snapshot . get ( ) . beginVersion , targetVersion ) ) ;
// List logs in version order so log continuity can be analyzed
std : : sort ( logs . begin ( ) , logs . end ( ) ) ;
2017-11-15 15:33:17 +08:00
2017-12-22 06:11:44 +08:00
// If there are logs and the first one starts at or before the snapshot begin version then proceed
2017-11-15 15:33:17 +08:00
if ( ! logs . empty ( ) & & logs . front ( ) . beginVersion < = snapshot . get ( ) . beginVersion ) {
auto i = logs . begin ( ) ;
Version end = i - > endVersion ;
restorable . logs . push_back ( * i ) ;
// Add logs to restorable logs set until continuity is broken OR we reach targetVersion
while ( + + i ! = logs . end ( ) ) {
2018-09-22 02:48:28 +08:00
if ( i - > beginVersion > end | | i - > beginVersion > targetVersion )
2017-11-15 15:33:17 +08:00
break ;
// If the next link in the log chain is found, update the end
if ( i - > beginVersion = = end ) {
restorable . logs . push_back ( * i ) ;
end = i - > endVersion ;
}
}
if ( end > = targetVersion ) {
2017-12-21 05:48:31 +08:00
return Optional < RestorableFileSet > ( restorable ) ;
2017-11-15 15:33:17 +08:00
}
}
}
return Optional < RestorableFileSet > ( ) ;
}
Future < Optional < RestorableFileSet > > getRestoreSet ( Version targetVersion ) {
return getRestoreSet_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , targetVersion ) ;
}
2018-01-17 20:09:43 +08:00
private :
struct VersionProperty {
VersionProperty ( Reference < BackupContainerFileSystem > bc , std : : string name ) : bc ( bc ) , path ( " properties/ " + name ) { }
Reference < BackupContainerFileSystem > bc ;
std : : string path ;
Future < Optional < Version > > get ( ) {
return readVersionProperty ( bc , path ) ;
}
Future < Void > set ( Version v ) {
return writeVersionProperty ( bc , path , v ) ;
}
Future < Void > clear ( ) {
return bc - > deleteFile ( path ) ;
}
} ;
public :
// To avoid the need to scan the underyling filesystem in many cases, some important version boundaries are stored in named files.
2018-12-16 16:18:13 +08:00
// These versions also indicate what version ranges are known to be deleted or partially deleted.
//
// The values below describe version ranges as follows:
// 0 - expiredEndVersion All files in this range have been deleted
// expiredEndVersion - unreliableEndVersion Some files in this range may have been deleted.
2018-01-17 20:09:43 +08:00
//
2018-12-16 16:18:13 +08:00
// logBeginVersion - logEnd Log files are contiguous in this range and have NOT been deleted by fdbbackup
// logEnd - infinity Files in this range may or may not exist yet
2018-01-17 20:09:43 +08:00
//
VersionProperty logBeginVersion ( ) { return { Reference < BackupContainerFileSystem > : : addRef ( this ) , " log_begin_version " } ; }
VersionProperty logEndVersion ( ) { return { Reference < BackupContainerFileSystem > : : addRef ( this ) , " log_end_version " } ; }
VersionProperty expiredEndVersion ( ) { return { Reference < BackupContainerFileSystem > : : addRef ( this ) , " expired_end_version " } ; }
2018-12-16 16:18:13 +08:00
VersionProperty unreliableEndVersion ( ) { return { Reference < BackupContainerFileSystem > : : addRef ( this ) , " unreliable_end_version " } ; }
2018-01-17 20:09:43 +08:00
ACTOR static Future < Void > writeVersionProperty ( Reference < BackupContainerFileSystem > bc , std : : string path , Version v ) {
try {
state Reference < IBackupFile > f = wait ( bc - > writeFile ( path ) ) ;
std : : string s = format ( " %lld " , v ) ;
2018-08-11 04:57:10 +08:00
wait ( f - > append ( s . data ( ) , s . size ( ) ) ) ;
wait ( f - > finish ( ) ) ;
2018-01-17 20:09:43 +08:00
return Void ( ) ;
} catch ( Error & e ) {
2018-12-16 16:33:30 +08:00
TraceEvent ( SevWarn , " BackupContainerWritePropertyFailed " )
. error ( e )
. detail ( " URL " , bc - > getURL ( ) )
. detail ( " Path " , path ) ;
2018-01-17 20:09:43 +08:00
throw ;
}
}
ACTOR static Future < Optional < Version > > readVersionProperty ( Reference < BackupContainerFileSystem > bc , std : : string path ) {
try {
state Reference < IAsyncFile > f = wait ( bc - > readFile ( path ) ) ;
state int64_t size = wait ( f - > size ( ) ) ;
state std : : string s ;
s . resize ( size ) ;
int rs = wait ( f - > read ( ( uint8_t * ) s . data ( ) , size , 0 ) ) ;
Version v ;
int len ;
if ( rs = = size & & sscanf ( s . c_str ( ) , " %lld%n " , & v , & len ) = = 1 & & len = = size )
return v ;
2018-12-16 16:33:30 +08:00
TraceEvent ( SevWarn , " BackupContainerInvalidProperty " )
. detail ( " URL " , bc - > getURL ( ) )
. detail ( " Path " , path ) ;
2018-01-17 20:09:43 +08:00
throw backup_invalid_info ( ) ;
} catch ( Error & e ) {
if ( e . code ( ) = = error_code_file_not_found )
return Optional < Version > ( ) ;
2018-12-16 16:33:30 +08:00
TraceEvent ( SevWarn , " BackupContainerReadPropertyFailed " )
. error ( e )
. detail ( " URL " , bc - > getURL ( ) )
. detail ( " Path " , path ) ;
2018-01-17 20:09:43 +08:00
throw ;
}
}
2017-11-15 15:33:17 +08:00
} ;
class BackupContainerLocalDirectory : public BackupContainerFileSystem , ReferenceCounted < BackupContainerLocalDirectory > {
2017-05-26 04:48:44 +08:00
public :
void addref ( ) { return ReferenceCounted < BackupContainerLocalDirectory > : : addref ( ) ; }
void delref ( ) { return ReferenceCounted < BackupContainerLocalDirectory > : : delref ( ) ; }
static std : : string getURLFormat ( ) { return " file://</path/to/base/dir/> " ; }
2018-01-18 03:35:34 +08:00
BackupContainerLocalDirectory ( std : : string url ) {
2017-05-26 04:48:44 +08:00
std : : string path ;
if ( url . find ( " file:// " ) ! = 0 ) {
TraceEvent ( SevWarn , " BackupContainerLocalDirectory " ) . detail ( " Description " , " Invalid URL for BackupContainerLocalDirectory " ) . detail ( " URL " , url ) ;
}
path = url . substr ( 7 ) ;
// Remove trailing slashes on path
path . erase ( path . find_last_not_of ( " \\ / " ) + 1 ) ;
if ( ! g_network - > isSimulated ( ) & & path ! = abspath ( path ) ) {
TraceEvent ( SevWarn , " BackupContainerLocalDirectory " ) . detail ( " Description " , " Backup path must be absolute (e.g. file:///some/path) " ) . detail ( " URL " , url ) . detail ( " Path " , path ) ;
throw io_error ( ) ;
}
// Finalized path written to will be will be <path>/backup-<uid>
m_path = path ;
}
2017-12-13 09:44:03 +08:00
static Future < std : : vector < std : : string > > listURLs ( std : : string url ) {
std : : string path ;
if ( url . find ( " file:// " ) ! = 0 ) {
TraceEvent ( SevWarn , " BackupContainerLocalDirectory " ) . detail ( " Description " , " Invalid URL for BackupContainerLocalDirectory " ) . detail ( " URL " , url ) ;
}
path = url . substr ( 7 ) ;
// Remove trailing slashes on path
path . erase ( path . find_last_not_of ( " \\ / " ) + 1 ) ;
if ( ! g_network - > isSimulated ( ) & & path ! = abspath ( path ) ) {
TraceEvent ( SevWarn , " BackupContainerLocalDirectory " ) . detail ( " Description " , " Backup path must be absolute (e.g. file:///some/path) " ) . detail ( " URL " , url ) . detail ( " Path " , path ) ;
throw io_error ( ) ;
}
std : : vector < std : : string > dirs = platform : : listDirectories ( path ) ;
std : : vector < std : : string > results ;
for ( auto & r : dirs ) {
if ( r = = " . " | | r = = " .. " )
continue ;
results . push_back ( std : : string ( " file:// " ) + joinPath ( path , r ) ) ;
}
return results ;
}
2017-05-26 04:48:44 +08:00
Future < Void > create ( ) {
// Nothing should be done here because create() can be called by any process working with the container URL, such as fdbbackup.
// Since "local directory" containers are by definition local to the machine they are accessed from,
// the container's creation (in this case the creation of a directory) must be ensured prior to every file creation,
// which is done in openFile().
// Creating the directory here will result in unnecessary directories being created on machines that run fdbbackup but not agents.
return Void ( ) ;
}
2018-12-21 10:05:23 +08:00
// The container exists if the folder it resides in exists
Future < bool > exists ( ) {
return directoryExists ( m_path ) ;
}
2017-11-15 15:33:17 +08:00
Future < Reference < IAsyncFile > > readFile ( std : : string path ) {
int flags = IAsyncFile : : OPEN_NO_AIO | IAsyncFile : : OPEN_READONLY | IAsyncFile : : OPEN_UNCACHED ;
// Simulation does not properly handle opening the same file from multiple machines using a shared filesystem,
// so create a symbolic link to make each file opening appear to be unique. This could also work in production
// but only if the source directory is writeable which shouldn't be required for a restore.
std : : string fullPath = joinPath ( m_path , path ) ;
# ifndef _WIN32
if ( g_network - > isSimulated ( ) ) {
if ( ! fileExists ( fullPath ) )
throw file_not_found ( ) ;
std : : string uniquePath = fullPath + " . " + g_random - > randomUniqueID ( ) . toString ( ) + " .lnk " ;
unlink ( uniquePath . c_str ( ) ) ;
ASSERT ( symlink ( basename ( path ) . c_str ( ) , uniquePath . c_str ( ) ) = = 0 ) ;
fullPath = uniquePath = uniquePath ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
// Opening cached mode forces read/write mode at a lower level, overriding the readonly request. So cached mode
// can't be used because backup files are read-only. Cached mode can only help during restore task retries handled
// by the same process that failed the first task execution anyway, which is a very rare case.
# endif
2019-02-28 16:22:38 +08:00
Future < Reference < IAsyncFile > > f = IAsyncFileSystem : : filesystem ( ) - > open ( fullPath , flags , 0644 ) ;
if ( g_network - > isSimulated ( ) ) {
int blockSize = 0 ;
// Extract block size from the filename, if present
size_t lastComma = path . find_last_of ( ' , ' ) ;
if ( lastComma ! = path . npos ) {
blockSize = atoi ( path . substr ( lastComma + 1 ) . c_str ( ) ) ;
}
if ( blockSize < = 0 ) {
blockSize = g_random - > randomInt ( 1e4 , 1e6 ) ;
}
if ( g_random - > random01 ( ) < .01 ) {
blockSize / = g_random - > randomInt ( 1 , 3 ) ;
}
return map ( f , [ = ] ( Reference < IAsyncFile > fr ) {
int readAhead = g_random - > randomInt ( 0 , 3 ) ;
int reads = g_random - > randomInt ( 1 , 3 ) ;
int cacheSize = g_random - > randomInt ( 0 , 3 ) ;
return Reference < IAsyncFile > ( new AsyncFileReadAheadCache ( fr , blockSize , readAhead , reads , cacheSize ) ) ;
} ) ;
}
return f ;
2017-11-15 15:33:17 +08:00
}
class BackupFile : public IBackupFile , ReferenceCounted < BackupFile > {
public :
BackupFile ( std : : string fileName , Reference < IAsyncFile > file , std : : string finalFullPath ) : IBackupFile ( fileName ) , m_file ( file ) , m_finalFullPath ( finalFullPath ) { }
2017-11-16 05:33:09 +08:00
Future < Void > append ( const void * data , int len ) {
Future < Void > r = m_file - > write ( data , len , m_offset ) ;
m_offset + = len ;
2017-11-15 15:33:17 +08:00
return r ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
ACTOR static Future < Void > finish_impl ( Reference < BackupFile > f ) {
2018-08-11 04:57:10 +08:00
wait ( f - > m_file - > truncate ( f - > size ( ) ) ) ; // Some IAsyncFile implementations extend in whole block sizes.
wait ( f - > m_file - > sync ( ) ) ;
2017-11-15 15:33:17 +08:00
std : : string name = f - > m_file - > getFilename ( ) ;
f - > m_file . clear ( ) ;
renameFile ( name , f - > m_finalFullPath ) ;
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > finish ( ) {
return finish_impl ( Reference < BackupFile > : : addRef ( this ) ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
void addref ( ) { return ReferenceCounted < BackupFile > : : addref ( ) ; }
void delref ( ) { return ReferenceCounted < BackupFile > : : delref ( ) ; }
private :
Reference < IAsyncFile > m_file ;
std : : string m_finalFullPath ;
} ;
Future < Reference < IBackupFile > > writeFile ( std : : string path ) {
int flags = IAsyncFile : : OPEN_NO_AIO | IAsyncFile : : OPEN_CREATE | IAsyncFile : : OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile : : OPEN_READWRITE ;
std : : string fullPath = joinPath ( m_path , path ) ;
platform : : createDirectory ( parentDirectory ( fullPath ) ) ;
std : : string temp = fullPath + " . " + g_random - > randomUniqueID ( ) . toString ( ) + " .temp " ;
Future < Reference < IAsyncFile > > f = IAsyncFileSystem : : filesystem ( ) - > open ( temp , flags , 0644 ) ;
return map ( f , [ = ] ( Reference < IAsyncFile > f ) {
return Reference < IBackupFile > ( new BackupFile ( path , f , fullPath ) ) ;
} ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
Future < Void > deleteFile ( std : : string path ) {
: : deleteFile ( joinPath ( m_path , path ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2018-01-17 20:09:43 +08:00
Future < FilesAndSizesT > listFiles ( std : : string path , std : : function < bool ( std : : string const & ) > ) {
2017-11-15 15:33:17 +08:00
FilesAndSizesT results ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
std : : vector < std : : string > files ;
platform : : findFilesRecursively ( joinPath ( m_path , path ) , files ) ;
2017-05-26 04:48:44 +08:00
2018-01-17 20:09:43 +08:00
// Remove .lnk files from results, they are a side effect of a backup that was *read* during simulation. See openFile() above for more info on why they are created.
2017-11-15 15:33:17 +08:00
if ( g_network - > isSimulated ( ) )
files . erase ( std : : remove_if ( files . begin ( ) , files . end ( ) , [ ] ( std : : string const & f ) { return StringRef ( f ) . endsWith ( LiteralStringRef ( " .lnk " ) ) ; } ) , files . end ( ) ) ;
for ( auto & f : files ) {
// Hide .part or .temp files.
StringRef s ( f ) ;
if ( ! s . endsWith ( LiteralStringRef ( " .part " ) ) & & ! s . endsWith ( LiteralStringRef ( " .temp " ) ) )
results . push_back ( { f . substr ( m_path . size ( ) + 1 ) , : : fileSize ( f ) } ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
return results ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
Future < Void > deleteContainer ( int * pNumDeleted ) {
2018-01-18 16:36:28 +08:00
// In order to avoid deleting some random directory due to user error, first describe the backup
// and make sure it has something in it.
2018-12-18 20:33:37 +08:00
return map ( describeBackup ( false , invalidVersion ) , [ = ] ( BackupDescription const & desc ) {
2018-01-18 16:36:28 +08:00
// If the backup has no snapshots and no logs then it's probably not a valid backup
if ( desc . snapshots . size ( ) = = 0 & & ! desc . minLogBegin . present ( ) )
throw backup_invalid_url ( ) ;
int count = platform : : eraseDirectoryRecursive ( m_path ) ;
if ( pNumDeleted ! = nullptr )
* pNumDeleted = count ;
return Void ( ) ;
} ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
private :
std : : string m_path ;
} ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
class BackupContainerBlobStore : public BackupContainerFileSystem , ReferenceCounted < BackupContainerBlobStore > {
private :
2018-01-24 03:46:16 +08:00
// Backup files to under a single folder prefix with subfolders for each named backup
static const std : : string DATAFOLDER ;
2018-01-29 16:32:41 +08:00
// Indexfolder contains keys for which user-named backups exist. Backup names can contain an arbitrary
2018-01-24 03:46:16 +08:00
// number of slashes so the backup names are kept in a separate folder tree from their actual data.
static const std : : string INDEXFOLDER ;
2017-11-15 15:33:17 +08:00
Reference < BlobStoreEndpoint > m_bstore ;
std : : string m_name ;
2017-05-26 04:48:44 +08:00
2018-11-13 19:00:59 +08:00
// All backup data goes into a single bucket
std : : string m_bucket ;
2018-01-24 03:46:16 +08:00
std : : string dataPath ( const std : : string path ) {
return DATAFOLDER + " / " + m_name + " / " + path ;
}
// Get the path of the backups's index entry
std : : string indexEntry ( ) {
return INDEXFOLDER + " / " + m_name ;
}
2017-11-15 15:33:17 +08:00
public :
2018-11-13 19:00:59 +08:00
BackupContainerBlobStore ( Reference < BlobStoreEndpoint > bstore , std : : string name , const BlobStoreEndpoint : : ParametersT & params )
: m_bstore ( bstore ) , m_name ( name ) , m_bucket ( " FDB_BACKUPS_V2 " ) {
// Currently only one parameter is supported, "bucket"
for ( auto & kv : params ) {
if ( kv . first = = " bucket " ) {
m_bucket = kv . second ;
continue ;
}
TraceEvent ( SevWarn , " BackupContainerBlobStoreInvalidParameter " ) . detail ( " Name " , printable ( kv . first ) ) . detail ( " Value " , printable ( kv . second ) ) ;
throw backup_invalid_url ( ) ;
}
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
void addref ( ) { return ReferenceCounted < BackupContainerBlobStore > : : addref ( ) ; }
void delref ( ) { return ReferenceCounted < BackupContainerBlobStore > : : delref ( ) ; }
2017-05-26 04:48:44 +08:00
2018-11-13 22:23:58 +08:00
static std : : string getURLFormat ( ) {
return BlobStoreEndpoint : : getURLFormat ( true ) + " (Note: The 'bucket' parameter is required.) " ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
virtual ~ BackupContainerBlobStore ( ) { }
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Reference < IAsyncFile > > readFile ( std : : string path ) {
return Reference < IAsyncFile > (
new AsyncFileReadAheadCache (
2018-11-13 19:00:59 +08:00
Reference < IAsyncFile > ( new AsyncFileBlobStoreRead ( m_bstore , m_bucket , dataPath ( path ) ) ) ,
2017-11-15 15:33:17 +08:00
m_bstore - > knobs . read_block_size ,
m_bstore - > knobs . read_ahead_blocks ,
m_bstore - > knobs . concurrent_reads_per_file ,
m_bstore - > knobs . read_cache_blocks_per_file
)
) ;
2017-05-26 04:48:44 +08:00
}
2018-11-13 19:00:59 +08:00
ACTOR static Future < std : : vector < std : : string > > listURLs ( Reference < BlobStoreEndpoint > bstore , std : : string bucket ) {
2018-01-24 03:46:16 +08:00
state std : : string basePath = INDEXFOLDER + ' / ' ;
2018-11-13 19:00:59 +08:00
BlobStoreEndpoint : : ListResult contents = wait ( bstore - > listBucket ( bucket , basePath ) ) ;
2017-12-13 09:44:03 +08:00
std : : vector < std : : string > results ;
2018-01-24 03:46:16 +08:00
for ( auto & f : contents . objects ) {
results . push_back ( bstore - > getResourceURL ( f . name . substr ( basePath . size ( ) ) ) ) ;
2017-12-13 09:44:03 +08:00
}
return results ;
}
2017-11-15 15:33:17 +08:00
class BackupFile : public IBackupFile , ReferenceCounted < BackupFile > {
public :
BackupFile ( std : : string fileName , Reference < IAsyncFile > file ) : IBackupFile ( fileName ) , m_file ( file ) { }
2017-05-26 04:48:44 +08:00
2017-11-16 05:33:09 +08:00
Future < Void > append ( const void * data , int len ) {
Future < Void > r = m_file - > write ( data , len , m_offset ) ;
m_offset + = len ;
2017-11-15 15:33:17 +08:00
return r ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > finish ( ) {
Reference < BackupFile > self = Reference < BackupFile > : : addRef ( this ) ;
return map ( m_file - > sync ( ) , [ = ] ( Void _ ) { self - > m_file . clear ( ) ; return Void ( ) ; } ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
void addref ( ) { return ReferenceCounted < BackupFile > : : addref ( ) ; }
void delref ( ) { return ReferenceCounted < BackupFile > : : delref ( ) ; }
private :
Reference < IAsyncFile > m_file ;
} ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Reference < IBackupFile > > writeFile ( std : : string path ) {
2018-11-13 19:00:59 +08:00
return Reference < IBackupFile > ( new BackupFile ( path , Reference < IAsyncFile > ( new AsyncFileBlobStoreWrite ( m_bstore , m_bucket , dataPath ( path ) ) ) ) ) ;
2017-11-15 15:33:17 +08:00
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > deleteFile ( std : : string path ) {
2018-11-13 19:00:59 +08:00
return m_bstore - > deleteObject ( m_bucket , dataPath ( path ) ) ;
2017-11-15 15:33:17 +08:00
}
2017-05-26 04:48:44 +08:00
2018-01-17 20:09:43 +08:00
ACTOR static Future < FilesAndSizesT > listFiles_impl ( Reference < BackupContainerBlobStore > bc , std : : string path , std : : function < bool ( std : : string const & ) > pathFilter ) {
// pathFilter expects container based paths, so create a wrapper which converts a raw path
// to a container path by removing the known backup name prefix.
2018-01-24 03:46:16 +08:00
state int prefixTrim = bc - > dataPath ( " " ) . size ( ) ;
2018-01-17 20:09:43 +08:00
std : : function < bool ( std : : string const & ) > rawPathFilter = [ = ] ( const std : : string & folderPath ) {
2018-01-24 03:46:16 +08:00
ASSERT ( folderPath . size ( ) > = prefixTrim ) ;
2018-01-17 20:09:43 +08:00
return pathFilter ( folderPath . substr ( prefixTrim ) ) ;
} ;
2018-11-13 19:00:59 +08:00
state BlobStoreEndpoint : : ListResult result = wait ( bc - > m_bstore - > listBucket ( bc - > m_bucket , bc - > dataPath ( path ) , ' / ' , std : : numeric_limits < int > : : max ( ) , rawPathFilter ) ) ;
2017-11-15 15:33:17 +08:00
FilesAndSizesT files ;
2018-01-24 03:46:16 +08:00
for ( auto & o : result . objects ) {
ASSERT ( o . name . size ( ) > = prefixTrim ) ;
files . push_back ( { o . name . substr ( prefixTrim ) , o . size } ) ;
}
2017-11-15 15:33:17 +08:00
return files ;
}
2017-05-26 04:48:44 +08:00
2018-01-17 20:09:43 +08:00
Future < FilesAndSizesT > listFiles ( std : : string path , std : : function < bool ( std : : string const & ) > pathFilter ) {
return listFiles_impl ( Reference < BackupContainerBlobStore > : : addRef ( this ) , path , pathFilter ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-19 20:28:22 +08:00
ACTOR static Future < Void > create_impl ( Reference < BackupContainerBlobStore > bc ) {
2018-11-14 08:06:39 +08:00
wait ( bc - > m_bstore - > createBucket ( bc - > m_bucket ) ) ;
2018-01-24 03:46:16 +08:00
// Check/create the index entry
2018-11-13 19:00:59 +08:00
bool exists = wait ( bc - > m_bstore - > objectExists ( bc - > m_bucket , bc - > indexEntry ( ) ) ) ;
2018-01-24 03:46:16 +08:00
if ( ! exists ) {
2018-11-14 08:06:39 +08:00
wait ( bc - > m_bstore - > writeEntireFile ( bc - > m_bucket , bc - > indexEntry ( ) , " " ) ) ;
2018-01-24 03:46:16 +08:00
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > create ( ) {
return create_impl ( Reference < BackupContainerBlobStore > : : addRef ( this ) ) ;
}
2017-05-26 04:48:44 +08:00
2018-12-21 10:05:23 +08:00
// The container exists if the index entry in the blob bucket exists
Future < bool > exists ( ) {
return m_bstore - > objectExists ( m_bucket , indexEntry ( ) ) ;
}
2017-11-15 15:33:17 +08:00
ACTOR static Future < Void > deleteContainer_impl ( Reference < BackupContainerBlobStore > bc , int * pNumDeleted ) {
2018-12-21 10:05:23 +08:00
bool e = wait ( bc - > exists ( ) ) ;
if ( ! e ) {
TraceEvent ( SevWarnAlways , " BackupContainerDoesNotExist " ) . detail ( " URL " , bc - > getURL ( ) ) ;
throw backup_does_not_exist ( ) ;
}
2018-01-29 16:32:41 +08:00
// First delete everything under the data prefix in the bucket
2018-11-14 08:06:39 +08:00
wait ( bc - > m_bstore - > deleteRecursively ( bc - > m_bucket , bc - > dataPath ( " " ) , pNumDeleted ) ) ;
2018-01-06 15:06:39 +08:00
2018-01-24 03:46:16 +08:00
// Now that all files are deleted, delete the index entry
2018-11-14 08:06:39 +08:00
wait ( bc - > m_bstore - > deleteObject ( bc - > m_bucket , bc - > indexEntry ( ) ) ) ;
2018-01-24 03:46:16 +08:00
2017-11-15 15:33:17 +08:00
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > deleteContainer ( int * pNumDeleted ) {
return deleteContainer_impl ( Reference < BackupContainerBlobStore > : : addRef ( this ) , pNumDeleted ) ;
}
2018-11-13 19:00:59 +08:00
std : : string getBucket ( ) const {
return m_bucket ;
}
2017-11-15 15:33:17 +08:00
} ;
2017-05-26 04:48:44 +08:00
2018-01-24 03:46:16 +08:00
const std : : string BackupContainerBlobStore : : DATAFOLDER = " data " ;
const std : : string BackupContainerBlobStore : : INDEXFOLDER = " backups " ;
2017-11-15 15:33:17 +08:00
std : : string IBackupContainer : : lastOpenError ;
2017-05-26 04:48:44 +08:00
std : : vector < std : : string > IBackupContainer : : getURLFormats ( ) {
std : : vector < std : : string > formats ;
formats . push_back ( BackupContainerLocalDirectory : : getURLFormat ( ) ) ;
formats . push_back ( BackupContainerBlobStore : : getURLFormat ( ) ) ;
return formats ;
}
// Get an IBackupContainer based on a container URL string
2017-11-15 15:33:17 +08:00
Reference < IBackupContainer > IBackupContainer : : openContainer ( std : : string url )
2017-05-26 04:48:44 +08:00
{
static std : : map < std : : string , Reference < IBackupContainer > > m_cache ;
Reference < IBackupContainer > & r = m_cache [ url ] ;
if ( r )
return r ;
try {
StringRef u ( url ) ;
if ( u . startsWith ( LiteralStringRef ( " file:// " ) ) )
r = Reference < IBackupContainer > ( new BackupContainerLocalDirectory ( url ) ) ;
else if ( u . startsWith ( LiteralStringRef ( " blobstore:// " ) ) ) {
std : : string resource ;
2018-11-13 19:00:59 +08:00
// The URL parameters contain blobstore endpoint tunables as well as possible backup-specific options.
BlobStoreEndpoint : : ParametersT backupParams ;
Reference < BlobStoreEndpoint > bstore = BlobStoreEndpoint : : fromString ( url , & resource , & lastOpenError , & backupParams ) ;
2017-05-26 04:48:44 +08:00
if ( resource . empty ( ) )
2017-11-15 15:33:17 +08:00
throw backup_invalid_url ( ) ;
2017-05-26 04:48:44 +08:00
for ( auto c : resource )
2018-01-24 03:46:16 +08:00
if ( ! isalnum ( c ) & & c ! = ' _ ' & & c ! = ' - ' & & c ! = ' . ' & & c ! = ' / ' )
2017-11-15 15:33:17 +08:00
throw backup_invalid_url ( ) ;
2018-11-13 19:00:59 +08:00
r = Reference < IBackupContainer > ( new BackupContainerBlobStore ( bstore , resource , backupParams ) ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
else {
lastOpenError = " invalid URL prefix " ;
throw backup_invalid_url ( ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
r - > URL = url ;
2017-05-26 04:48:44 +08:00
return r ;
} catch ( Error & e ) {
2017-11-15 15:33:17 +08:00
if ( e . code ( ) = = error_code_actor_cancelled )
throw ;
TraceEvent m ( SevWarn , " BackupContainer " ) ;
2018-12-16 16:33:30 +08:00
m . detail ( " Description " , " Invalid container specification. See help. " ) ;
m . detail ( " URL " , url ) ;
m . error ( e ) ;
2017-11-15 15:33:17 +08:00
if ( e . code ( ) = = error_code_backup_invalid_url )
2018-06-09 02:11:08 +08:00
m . detail ( " LastOpenError " , lastOpenError ) ;
2018-12-16 16:33:30 +08:00
2017-05-26 04:48:44 +08:00
throw ;
}
}
2017-12-13 09:44:03 +08:00
// Get a list of URLS to backup containers based on some a shorter URL. This function knows about some set of supported
// URL types which support this sort of backup discovery.
2017-12-21 05:48:31 +08:00
ACTOR Future < std : : vector < std : : string > > listContainers_impl ( std : : string baseURL ) {
2017-12-13 09:44:03 +08:00
try {
StringRef u ( baseURL ) ;
if ( u . startsWith ( LiteralStringRef ( " file:// " ) ) ) {
std : : vector < std : : string > results = wait ( BackupContainerLocalDirectory : : listURLs ( baseURL ) ) ;
return results ;
}
else if ( u . startsWith ( LiteralStringRef ( " blobstore:// " ) ) ) {
std : : string resource ;
2018-11-13 19:00:59 +08:00
BlobStoreEndpoint : : ParametersT backupParams ;
Reference < BlobStoreEndpoint > bstore = BlobStoreEndpoint : : fromString ( baseURL , & resource , & IBackupContainer : : lastOpenError , & backupParams ) ;
2017-12-13 09:44:03 +08:00
if ( ! resource . empty ( ) ) {
TraceEvent ( SevWarn , " BackupContainer " ) . detail ( " Description " , " Invalid backup container base URL, resource aka path should be blank. " ) . detail ( " URL " , baseURL ) ;
throw backup_invalid_url ( ) ;
}
2018-11-13 19:00:59 +08:00
// Create a dummy container to parse the backup-specific parameters from the URL and get a final bucket name
BackupContainerBlobStore dummy ( bstore , " dummy " , backupParams ) ;
std : : vector < std : : string > results = wait ( BackupContainerBlobStore : : listURLs ( bstore , dummy . getBucket ( ) ) ) ;
2017-12-13 09:44:03 +08:00
return results ;
}
else {
IBackupContainer : : lastOpenError = " invalid URL prefix " ;
throw backup_invalid_url ( ) ;
}
} catch ( Error & e ) {
if ( e . code ( ) = = error_code_actor_cancelled )
throw ;
TraceEvent m ( SevWarn , " BackupContainer " ) ;
2018-12-16 16:33:30 +08:00
m . detail ( " Description " , " Invalid backup container URL prefix. See help. " ) ;
m . detail ( " URL " , baseURL ) ;
m . error ( e ) ;
2017-12-13 09:44:03 +08:00
if ( e . code ( ) = = error_code_backup_invalid_url )
2018-06-09 02:11:08 +08:00
m . detail ( " LastOpenError " , IBackupContainer : : lastOpenError ) ;
2018-12-16 16:33:30 +08:00
2017-12-13 09:44:03 +08:00
throw ;
}
}
2017-12-21 05:48:31 +08:00
Future < std : : vector < std : : string > > IBackupContainer : : listContainers ( std : : string baseURL ) {
return listContainers_impl ( baseURL ) ;
}
2018-01-23 16:19:51 +08:00
ACTOR Future < Version > timeKeeperVersionFromDatetime ( std : : string datetime , Database db ) {
2018-01-23 15:57:01 +08:00
state KeyBackedMap < int64_t , Version > versionMap ( timeKeeperPrefixRange . begin ) ;
state Reference < ReadYourWritesTransaction > tr = Reference < ReadYourWritesTransaction > ( new ReadYourWritesTransaction ( db ) ) ;
int year , month , day , hour , minute , second ;
if ( sscanf ( datetime . c_str ( ) , " %d-%d-%d.%d:%d:%d " , & year , & month , & day , & hour , & minute , & second ) ! = 6 ) {
fprintf ( stderr , " ERROR: Incorrect date/time format. \n " ) ;
throw backup_error ( ) ;
}
struct tm expDateTime = { 0 } ;
expDateTime . tm_year = year - 1900 ;
expDateTime . tm_mon = month - 1 ;
expDateTime . tm_mday = day ;
expDateTime . tm_hour = hour ;
expDateTime . tm_min = minute ;
expDateTime . tm_sec = second ;
expDateTime . tm_isdst = - 1 ;
state int64_t time = ( int64_t ) mktime ( & expDateTime ) ;
loop {
try {
tr - > setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
2018-09-06 07:16:22 +08:00
tr - > setOption ( FDBTransactionOptions : : LOCK_AWARE ) ;
2018-01-23 15:57:01 +08:00
state std : : vector < std : : pair < int64_t , Version > > results = wait ( versionMap . getRange ( tr , 0 , time , 1 , false , true ) ) ;
if ( results . size ( ) ! = 1 ) {
// No key less than time was found in the database
// Look for a key >= time.
2018-10-05 13:18:15 +08:00
wait ( store ( results , versionMap . getRange ( tr , time , std : : numeric_limits < int64_t > : : max ( ) , 1 ) ) ) ;
2018-01-23 15:57:01 +08:00
if ( results . size ( ) ! = 1 ) {
fprintf ( stderr , " ERROR: Unable to calculate a version for given date/time. \n " ) ;
throw backup_error ( ) ;
}
}
// Adjust version found by the delta between time and the time found and min with 0.
auto & result = results [ 0 ] ;
return std : : max < Version > ( 0 , result . second + ( time - result . first ) * CLIENT_KNOBS - > CORE_VERSIONSPERSECOND ) ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr - > onError ( e ) ) ;
2018-01-23 15:57:01 +08:00
}
}
}
ACTOR Future < Optional < int64_t > > timeKeeperEpochsFromVersion ( Version v , Reference < ReadYourWritesTransaction > tr ) {
state KeyBackedMap < int64_t , Version > versionMap ( timeKeeperPrefixRange . begin ) ;
// Binary search to find the closest date with a version <= v
state int64_t min = 0 ;
state int64_t max = ( int64_t ) now ( ) ;
state int64_t mid ;
state std : : pair < int64_t , Version > found ;
tr - > setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
tr - > setOption ( FDBTransactionOptions : : LOCK_AWARE ) ;
loop {
mid = ( min + max + 1 ) / 2 ; // ceiling
// Find the highest time < mid
state std : : vector < std : : pair < int64_t , Version > > results = wait ( versionMap . getRange ( tr , min , mid , 1 , false , true ) ) ;
if ( results . size ( ) ! = 1 ) {
if ( mid = = min ) {
// There aren't any records having a version < v, so just look for any record having a time < now
// and base a result on it
2018-10-05 13:18:15 +08:00
wait ( store ( results , versionMap . getRange ( tr , 0 , ( int64_t ) now ( ) , 1 ) ) ) ;
2018-01-23 15:57:01 +08:00
if ( results . size ( ) ! = 1 ) {
// There aren't any timekeeper records to base a result on so return nothing
return Optional < int64_t > ( ) ;
}
found = results [ 0 ] ;
break ;
}
min = mid ;
continue ;
}
found = results [ 0 ] ;
if ( v < found . second ) {
max = found . first ;
}
else {
if ( found . first = = min ) {
break ;
}
min = found . first ;
}
}
return found . first + ( v - found . second ) / CLIENT_KNOBS - > CORE_VERSIONSPERSECOND ;
}
2018-11-25 09:24:54 +08:00
int chooseFileSize ( std : : vector < int > & sizes ) {
int size = 1000 ;
if ( ! sizes . empty ( ) ) {
size = sizes . back ( ) ;
sizes . pop_back ( ) ;
}
return size ;
}
2017-12-22 13:15:26 +08:00
ACTOR Future < Void > writeAndVerifyFile ( Reference < IBackupContainer > c , Reference < IBackupFile > f , int size ) {
state Standalone < StringRef > content ;
if ( size > 0 ) {
content = makeString ( size ) ;
for ( int i = 0 ; i < content . size ( ) ; + + i )
mutateString ( content ) [ i ] = ( uint8_t ) g_random - > randomInt ( 0 , 256 ) ;
2017-11-17 08:19:56 +08:00
2018-08-11 04:57:10 +08:00
wait ( f - > append ( content . begin ( ) , content . size ( ) ) ) ;
2017-12-22 13:15:26 +08:00
}
2018-08-11 04:57:10 +08:00
wait ( f - > finish ( ) ) ;
2017-12-21 05:48:31 +08:00
state Reference < IAsyncFile > inputFile = wait ( c - > readFile ( f - > getFileName ( ) ) ) ;
2017-12-22 13:15:26 +08:00
int64_t fileSize = wait ( inputFile - > size ( ) ) ;
ASSERT ( size = = fileSize ) ;
if ( size > 0 ) {
state Standalone < StringRef > buf = makeString ( size ) ;
int b = wait ( inputFile - > read ( mutateString ( buf ) , buf . size ( ) , 0 ) ) ;
ASSERT ( b = = buf . size ( ) ) ;
ASSERT ( buf = = content ) ;
}
2017-11-17 08:19:56 +08:00
return Void ( ) ;
}
2018-11-25 09:24:54 +08:00
// Randomly advance version by up to 1 second of versions
Version nextVersion ( Version v ) {
int64_t increment = g_random - > randomInt64 ( 1 , CLIENT_KNOBS - > CORE_VERSIONSPERSECOND ) ;
return v + increment ;
}
2017-11-15 15:33:17 +08:00
ACTOR Future < Void > testBackupContainer ( std : : string url ) {
printf ( " BackupContainerTest URL %s \n " , url . c_str ( ) ) ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
state Reference < IBackupContainer > c = IBackupContainer : : openContainer ( url ) ;
2018-01-18 16:36:28 +08:00
2017-12-13 09:44:03 +08:00
// Make sure container doesn't exist, then create it.
2018-01-18 16:36:28 +08:00
try {
2018-08-11 04:57:10 +08:00
wait ( c - > deleteContainer ( ) ) ;
2018-01-18 16:36:28 +08:00
} catch ( Error & e ) {
2019-01-09 08:29:00 +08:00
if ( e . code ( ) ! = error_code_backup_invalid_url & & e . code ( ) ! = error_code_backup_does_not_exist )
2018-01-18 16:36:28 +08:00
throw ;
}
2018-08-11 04:57:10 +08:00
wait ( c - > create ( ) ) ;
2017-11-15 15:33:17 +08:00
2018-11-25 09:24:54 +08:00
state std : : vector < Future < Void > > writes ;
state std : : map < Version , std : : vector < std : : string > > snapshots ;
state std : : map < Version , int64_t > snapshotSizes ;
state int nRangeFiles = 0 ;
state std : : map < Version , std : : string > logs ;
state Version v = g_random - > randomInt64 ( 0 , std : : numeric_limits < Version > : : max ( ) / 2 ) ;
2017-11-19 20:28:22 +08:00
2018-11-25 09:24:54 +08:00
// List of sizes to use to test edge cases on underlying file implementations
state std : : vector < int > fileSizes = { 0 , 10000000 , 5000005 } ;
2017-11-15 15:33:17 +08:00
2018-11-25 09:24:54 +08:00
loop {
state Version logStart = v ;
state int kvfiles = g_random - > randomInt ( 0 , 3 ) ;
while ( kvfiles > 0 ) {
if ( snapshots . empty ( ) ) {
snapshots [ v ] = { } ;
snapshotSizes [ v ] = 0 ;
if ( g_random - > coinflip ( ) ) {
v = nextVersion ( v ) ;
}
}
Reference < IBackupFile > range = wait ( c - > writeRangeFile ( snapshots . rbegin ( ) - > first , 0 , v , 10 ) ) ;
+ + nRangeFiles ;
v = nextVersion ( v ) ;
snapshots . rbegin ( ) - > second . push_back ( range - > getFileName ( ) ) ;
int size = chooseFileSize ( fileSizes ) ;
snapshotSizes . rbegin ( ) - > second + = size ;
writes . push_back ( writeAndVerifyFile ( c , range , size ) ) ;
if ( g_random - > random01 ( ) < .2 ) {
writes . push_back ( c - > writeKeyspaceSnapshotFile ( snapshots . rbegin ( ) - > second , snapshotSizes . rbegin ( ) - > second ) ) ;
snapshots [ v ] = { } ;
snapshotSizes [ v ] = 0 ;
break ;
}
2017-11-15 15:33:17 +08:00
2018-11-25 09:24:54 +08:00
- - kvfiles ;
}
2017-11-15 15:33:17 +08:00
2018-11-25 09:24:54 +08:00
if ( logStart = = v | | g_random - > coinflip ( ) ) {
v = nextVersion ( v ) ;
}
state Reference < IBackupFile > log = wait ( c - > writeLogFile ( logStart , v , 10 ) ) ;
logs [ logStart ] = log - > getFileName ( ) ;
int size = chooseFileSize ( fileSizes ) ;
writes . push_back ( writeAndVerifyFile ( c , log , size ) ) ;
// Randomly stop after a snapshot has finished and all manually seeded file sizes have been used.
if ( fileSizes . empty ( ) & & ! snapshots . empty ( ) & & snapshots . rbegin ( ) - > second . empty ( ) & & g_random - > random01 ( ) < .2 ) {
snapshots . erase ( snapshots . rbegin ( ) - > first ) ;
break ;
}
}
2017-11-17 08:19:56 +08:00
2018-11-28 06:41:46 +08:00
wait ( waitForAll ( writes ) ) ;
2017-11-17 08:19:56 +08:00
2018-12-22 14:42:29 +08:00
state BackupFileList listing = wait ( c - > dumpFileList ( ) ) ;
2018-11-25 09:24:54 +08:00
ASSERT ( listing . ranges . size ( ) = = nRangeFiles ) ;
ASSERT ( listing . logs . size ( ) = = logs . size ( ) ) ;
ASSERT ( listing . snapshots . size ( ) = = snapshots . size ( ) ) ;
2017-11-17 08:19:56 +08:00
2018-11-25 09:24:54 +08:00
state BackupDescription desc = wait ( c - > describeBackup ( ) ) ;
printf ( " \n %s \n " , desc . toString ( ) . c_str ( ) ) ;
// Do a series of expirations and verify resulting state
state int i = 0 ;
for ( ; i < listing . snapshots . size ( ) ; + + i ) {
2019-02-18 06:55:47 +08:00
{
// Ensure we can still restore to the latest version
Optional < RestorableFileSet > rest = wait ( c - > getRestoreSet ( desc . maxRestorableVersion . get ( ) ) ) ;
ASSERT ( rest . present ( ) ) ;
}
2018-11-25 09:24:54 +08:00
2019-02-18 06:55:47 +08:00
{
// Ensure we can restore to the end version of snapshot i
Optional < RestorableFileSet > rest = wait ( c - > getRestoreSet ( listing . snapshots [ i ] . endVersion ) ) ;
ASSERT ( rest . present ( ) ) ;
}
2018-11-25 09:24:54 +08:00
// Test expiring to the end of this snapshot
state Version expireVersion = listing . snapshots [ i ] . endVersion ;
// Expire everything up to but not including the snapshot end version
printf ( " EXPIRE TO %lld \n " , expireVersion ) ;
state Future < Void > f = c - > expireData ( expireVersion ) ;
2018-11-28 06:41:46 +08:00
wait ( ready ( f ) ) ;
2018-11-25 09:24:54 +08:00
// If there is an error, it must be backup_cannot_expire and we have to be on the last snapshot
if ( f . isError ( ) ) {
ASSERT ( f . getError ( ) . code ( ) = = error_code_backup_cannot_expire ) ;
ASSERT ( i = = listing . snapshots . size ( ) - 1 ) ;
2018-11-28 06:41:46 +08:00
wait ( c - > expireData ( expireVersion , true ) ) ;
2018-11-25 09:24:54 +08:00
}
2017-11-17 08:19:56 +08:00
2018-11-25 09:24:54 +08:00
BackupDescription d = wait ( c - > describeBackup ( ) ) ;
printf ( " \n %s \n " , d . toString ( ) . c_str ( ) ) ;
}
2017-11-17 08:19:56 +08:00
2018-11-25 09:24:54 +08:00
printf ( " DELETING \n " ) ;
2018-08-11 04:57:10 +08:00
wait ( c - > deleteContainer ( ) ) ;
2017-11-16 05:33:09 +08:00
2019-01-09 08:29:00 +08:00
state Future < BackupDescription > d = c - > describeBackup ( ) ;
2019-01-10 08:14:46 +08:00
wait ( ready ( d ) ) ;
2019-01-09 08:29:00 +08:00
ASSERT ( d . isError ( ) & & d . getError ( ) . code ( ) = = error_code_backup_does_not_exist ) ;
2017-11-16 05:33:09 +08:00
2018-12-22 14:42:29 +08:00
BackupFileList empty = wait ( c - > dumpFileList ( ) ) ;
2018-11-25 09:24:54 +08:00
ASSERT ( empty . ranges . size ( ) = = 0 ) ;
ASSERT ( empty . logs . size ( ) = = 0 ) ;
ASSERT ( empty . snapshots . size ( ) = = 0 ) ;
2017-11-15 15:33:17 +08:00
printf ( " BackupContainerTest URL=%s PASSED. \n " , url . c_str ( ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
2017-11-15 15:33:17 +08:00
}
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /backup/containers/localdir " ) {
2017-11-15 15:33:17 +08:00
if ( g_network - > isSimulated ( ) )
2018-08-11 04:57:10 +08:00
wait ( testBackupContainer ( format ( " file://simfdb/backups/%llx " , timer_int ( ) ) ) ) ;
2017-11-15 15:33:17 +08:00
else
2018-08-11 04:57:10 +08:00
wait ( testBackupContainer ( format ( " file:///private/tmp/fdb_backups/%llx " , timer_int ( ) ) ) ) ;
2017-11-15 15:33:17 +08:00
return Void ( ) ;
2017-05-26 04:48:44 +08:00
} ;
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /backup/containers/url " ) {
2017-12-06 09:13:15 +08:00
if ( ! g_network - > isSimulated ( ) ) {
2017-12-07 06:38:45 +08:00
const char * url = getenv ( " FDB_TEST_BACKUP_URL " ) ;
ASSERT ( url ! = nullptr ) ;
2018-08-11 04:57:10 +08:00
wait ( testBackupContainer ( url ) ) ;
2017-12-06 09:13:15 +08:00
}
2017-11-15 15:33:17 +08:00
return Void ( ) ;
} ;
2017-12-13 09:44:03 +08:00
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /backup/containers_list " ) {
2017-12-22 06:11:44 +08:00
if ( ! g_network - > isSimulated ( ) ) {
state const char * url = getenv ( " FDB_TEST_BACKUP_URL " ) ;
ASSERT ( url ! = nullptr ) ;
printf ( " Listing %s \n " , url ) ;
std : : vector < std : : string > urls = wait ( IBackupContainer : : listContainers ( url ) ) ;
for ( auto & u : urls ) {
printf ( " %s \n " , u . c_str ( ) ) ;
}
2017-12-13 09:44:03 +08:00
}
return Void ( ) ;
} ;