2017-05-26 04:48:44 +08:00
/*
* BackupContainer . actor . cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013 - 2018 Apple Inc . and the FoundationDB project authors
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* http : //www.apache.org/licenses/LICENSE-2.0
2018-02-22 02:25:11 +08:00
*
2017-05-26 04:48:44 +08:00
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
2018-10-20 01:30:13 +08:00
# include "fdbclient/BackupContainer.h"
2017-05-26 04:48:44 +08:00
# include "flow/Trace.h"
# include "flow/UnitTest.h"
# include "flow/Hash3.h"
# include "fdbrpc/AsyncFileReadAhead.actor.h"
2017-11-15 15:33:17 +08:00
# include "fdbrpc/Platform.h"
2018-10-27 04:49:42 +08:00
# include "fdbclient/AsyncFileBlobStore.actor.h"
2017-05-26 04:48:44 +08:00
# include "fdbclient/Status.h"
2018-01-17 20:09:43 +08:00
# include "fdbclient/SystemData.h"
# include "fdbclient/ReadYourWrites.h"
# include "fdbclient/KeyBackedTypes.h"
# include "fdbclient/RunTransaction.actor.h"
2017-05-26 04:48:44 +08:00
# include <algorithm>
2018-01-17 20:09:43 +08:00
# include <time.h>
2017-05-26 04:48:44 +08:00
2017-11-16 05:33:09 +08:00
namespace IBackupFile_impl {
2018-01-17 20:09:43 +08:00
ACTOR Future < Void > appendStringRefWithLen ( Reference < IBackupFile > file , Standalone < StringRef > s ) {
2017-11-16 05:33:09 +08:00
state uint32_t lenBuf = bigEndian32 ( ( uint32_t ) s . size ( ) ) ;
2018-08-11 04:57:10 +08:00
wait ( file - > append ( & lenBuf , sizeof ( lenBuf ) ) ) ;
wait ( file - > append ( s . begin ( ) , s . size ( ) ) ) ;
2017-11-16 05:33:09 +08:00
return Void ( ) ;
}
}
2018-01-17 20:09:43 +08:00
Future < Void > IBackupFile : : appendStringRefWithLen ( Standalone < StringRef > s ) {
return IBackupFile_impl : : appendStringRefWithLen ( Reference < IBackupFile > : : addRef ( this ) , s ) ;
2017-11-16 05:33:09 +08:00
}
2018-01-17 20:09:43 +08:00
std : : string formatTime ( int64_t t ) {
time_t curTime = ( time_t ) t ;
char buffer [ 128 ] ;
struct tm timeinfo ;
2018-01-18 03:35:34 +08:00
getLocalTime ( & curTime , & timeinfo ) ;
2018-01-17 20:09:43 +08:00
strftime ( buffer , 128 , " %Y-%m-%d %H:%M:%S " , & timeinfo ) ;
return buffer ;
}
Future < Void > fetchTimes ( Reference < ReadYourWritesTransaction > tr , std : : map < Version , int64_t > * pVersionTimeMap ) {
std : : vector < Future < Void > > futures ;
// Resolve each version in the map,
for ( auto & p : * pVersionTimeMap ) {
2018-01-23 15:57:01 +08:00
futures . push_back ( map ( timeKeeperEpochsFromVersion ( p . first , tr ) , [ = ] ( Optional < int64_t > t ) {
2018-01-17 20:09:43 +08:00
if ( t . present ( ) )
pVersionTimeMap - > at ( p . first ) = t . get ( ) ;
else
pVersionTimeMap - > erase ( p . first ) ;
return Void ( ) ;
} ) ) ;
}
return waitForAll ( futures ) ;
}
Future < Void > BackupDescription : : resolveVersionTimes ( Database cx ) {
// Populate map with versions needed
versionTimeMap . clear ( ) ;
for ( const KeyspaceSnapshotFile & m : snapshots ) {
versionTimeMap [ m . beginVersion ] ;
versionTimeMap [ m . endVersion ] ;
}
if ( minLogBegin . present ( ) )
versionTimeMap [ minLogBegin . get ( ) ] ;
if ( maxLogEnd . present ( ) )
versionTimeMap [ maxLogEnd . get ( ) ] ;
if ( contiguousLogEnd . present ( ) )
versionTimeMap [ contiguousLogEnd . get ( ) ] ;
if ( minRestorableVersion . present ( ) )
versionTimeMap [ minRestorableVersion . get ( ) ] ;
if ( maxRestorableVersion . present ( ) )
versionTimeMap [ maxRestorableVersion . get ( ) ] ;
return runRYWTransaction ( cx , [ = ] ( Reference < ReadYourWritesTransaction > tr ) { return fetchTimes ( tr , & versionTimeMap ) ; } ) ;
} ;
2017-11-15 15:33:17 +08:00
std : : string BackupDescription : : toString ( ) const {
std : : string info ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
info . append ( format ( " URL: %s \n " , url . c_str ( ) ) ) ;
info . append ( format ( " Restorable: %s \n " , maxRestorableVersion . present ( ) ? " true " : " false " ) ) ;
2018-01-17 20:09:43 +08:00
auto formatVersion = [ & ] ( Version v ) {
std : : string s ;
if ( ! versionTimeMap . empty ( ) ) {
auto i = versionTimeMap . find ( v ) ;
if ( i ! = versionTimeMap . end ( ) )
s = format ( " %lld (%s) " , v , formatTime ( i - > second ) . c_str ( ) ) ;
else
s = format ( " %lld (unknown) " , v ) ;
}
else {
s = format ( " %lld " , v ) ;
}
return s ;
} ;
for ( const KeyspaceSnapshotFile & m : snapshots ) {
info . append ( format ( " Snapshot: startVersion=%s endVersion=%s totalBytes=%lld restorable=%s \n " ,
formatVersion ( m . beginVersion ) . c_str ( ) , formatVersion ( m . endVersion ) . c_str ( ) , m . totalSize , m . restorable . orDefault ( false ) ? " true " : " false " ) ) ;
}
2018-01-03 15:22:35 +08:00
info . append ( format ( " SnapshotBytes: %lld \n " , snapshotBytes ) ) ;
2017-11-15 15:33:17 +08:00
if ( minLogBegin . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " MinLogBeginVersion: %s \n " , formatVersion ( minLogBegin . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( contiguousLogEnd . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " ContiguousLogEndVersion: %s \n " , formatVersion ( contiguousLogEnd . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( maxLogEnd . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " MaxLogEndVersion: %s \n " , formatVersion ( maxLogEnd . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( minRestorableVersion . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " MinRestorableVersion: %s \n " , formatVersion ( minRestorableVersion . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( maxRestorableVersion . present ( ) )
2018-01-17 20:09:43 +08:00
info . append ( format ( " MaxRestorableVersion: %s \n " , formatVersion ( maxRestorableVersion . get ( ) ) . c_str ( ) ) ) ;
2017-11-15 15:33:17 +08:00
if ( ! extendedDetail . empty ( ) )
info . append ( " ExtendedDetail: " ) . append ( extendedDetail ) ;
return info ;
}
/* BackupContainerFileSystem implements a backup container which stores files in a nested folder structure.
* Inheritors must only defined methods for writing , reading , deleting , sizing , and listing files .
*
* BackupInfo is stored as a JSON document at
* / info
* Snapshots are stored as JSON at file paths like
* / snapshots / snapshot , startVersion , endVersion , totalBytes
* Log and Range data files at file paths like
* / logs / . . . / log , startVersion , endVersion , blockSize
* / ranges / . . . / range , version , uid , blockSize
*
2018-01-24 07:02:15 +08:00
* Where . . . is a multi level path which sorts lexically into version order and targets 10 , 000 or less
* entries in each folder ( though a full speed snapshot could exceed this count at the innermost folder level )
2017-11-15 15:33:17 +08:00
*/
class BackupContainerFileSystem : public IBackupContainer {
public :
virtual void addref ( ) = 0 ;
virtual void delref ( ) = 0 ;
BackupContainerFileSystem ( ) { }
virtual ~ BackupContainerFileSystem ( ) { }
// Create the container
virtual Future < Void > create ( ) = 0 ;
// Get a list of fileNames and their sizes in the container under the given path
2018-01-17 20:09:43 +08:00
// The implementation can (but does not have to) use the folder path filter to avoid traversing
// specific subpaths.
2017-11-15 15:33:17 +08:00
typedef std : : vector < std : : pair < std : : string , int64_t > > FilesAndSizesT ;
2018-01-17 20:09:43 +08:00
virtual Future < FilesAndSizesT > listFiles ( std : : string path = " " , std : : function < bool ( std : : string const & ) > folderPathFilter = nullptr ) = 0 ;
2017-11-15 15:33:17 +08:00
// Open a file for read by fileName
virtual Future < Reference < IAsyncFile > > readFile ( std : : string fileName ) = 0 ;
// Open a file for write by fileName
virtual Future < Reference < IBackupFile > > writeFile ( std : : string fileName ) = 0 ;
// Delete a file
virtual Future < Void > deleteFile ( std : : string fileName ) = 0 ;
// Delete entire container. During the process, if pNumDeleted is not null it will be
// updated with the count of deleted files so that progress can be seen.
virtual Future < Void > deleteContainer ( int * pNumDeleted ) = 0 ;
2018-01-25 02:29:37 +08:00
// Creates a 2-level path (x/y) where v should go such that x/y/* contains (10^smallestBucket) possible versions
2018-01-03 15:22:35 +08:00
static std : : string versionFolderString ( Version v , int smallestBucket ) {
2018-01-25 02:29:37 +08:00
ASSERT ( smallestBucket < 14 ) ;
2018-01-24 07:02:15 +08:00
// Get a 0-padded fixed size representation of v
std : : string vFixedPrecision = format ( " %019lld " , v ) ;
ASSERT ( vFixedPrecision . size ( ) = = 19 ) ;
2018-01-25 02:29:37 +08:00
// Truncate smallestBucket from the fixed length representation
vFixedPrecision . resize ( vFixedPrecision . size ( ) - smallestBucket ) ;
2018-01-24 07:02:15 +08:00
2018-01-25 02:29:37 +08:00
// Split the remaining digits with a '/' 4 places from the right
vFixedPrecision . insert ( vFixedPrecision . size ( ) - 4 , 1 , ' / ' ) ;
2018-01-24 07:02:15 +08:00
return vFixedPrecision ;
}
// This useful for comparing version folder strings regardless of where their "/" dividers are, as it is possible
// that division points would change in the future.
static std : : string cleanFolderString ( std : : string f ) {
f . erase ( std : : remove ( f . begin ( ) , f . end ( ) , ' / ' ) , f . end ( ) ) ;
return f ;
2018-01-03 15:22:35 +08:00
}
2017-11-15 15:33:17 +08:00
2018-01-24 07:02:15 +08:00
// The innermost folder covers 100 seconds (1e8 versions) During a full speed backup it is possible though very unlikely write about 10,000 snapshot range files during that time.
2018-01-03 15:22:35 +08:00
static std : : string rangeVersionFolderString ( Version v ) {
2018-01-24 07:02:15 +08:00
return format ( " ranges/%s/ " , versionFolderString ( v , 8 ) . c_str ( ) ) ;
2018-01-03 15:22:35 +08:00
}
2018-01-24 07:02:15 +08:00
// The innermost folder covers 100,000 seconds (1e11 versions) which is 5,000 mutation log files at current settings.
2018-01-03 15:22:35 +08:00
static std : : string logVersionFolderString ( Version v ) {
2018-01-24 07:02:15 +08:00
return format ( " logs/%s/ " , versionFolderString ( v , 11 ) . c_str ( ) ) ;
2017-11-15 15:33:17 +08:00
}
Future < Reference < IBackupFile > > writeLogFile ( Version beginVersion , Version endVersion , int blockSize ) {
2018-01-24 07:02:15 +08:00
return writeFile ( logVersionFolderString ( beginVersion ) + format ( " log,%lld,%lld,%s,%d " , beginVersion , endVersion , g_random - > randomUniqueID ( ) . toString ( ) . c_str ( ) , blockSize ) ) ;
2017-11-15 15:33:17 +08:00
}
Future < Reference < IBackupFile > > writeRangeFile ( Version version , int blockSize ) {
2018-01-24 07:02:15 +08:00
return writeFile ( rangeVersionFolderString ( version ) + format ( " range,%lld,%s,%d " , version , g_random - > randomUniqueID ( ) . toString ( ) . c_str ( ) , blockSize ) ) ;
2017-11-15 15:33:17 +08:00
}
2018-01-06 15:06:39 +08:00
static bool pathToRangeFile ( RangeFile & out , std : : string path , int64_t size ) {
2017-11-15 15:33:17 +08:00
std : : string name = basename ( path ) ;
RangeFile f ;
f . fileName = path ;
f . fileSize = size ;
int len ;
2018-01-06 15:06:39 +08:00
if ( sscanf ( name . c_str ( ) , " range,%lld,%*[^,],%u%n " , & f . version , & f . blockSize , & len ) = = 2 & & len = = name . size ( ) ) {
out = f ;
return true ;
}
return false ;
2017-11-15 15:33:17 +08:00
}
2018-01-06 15:06:39 +08:00
static bool pathToLogFile ( LogFile & out , std : : string path , int64_t size ) {
2017-11-15 15:33:17 +08:00
std : : string name = basename ( path ) ;
LogFile f ;
f . fileName = path ;
f . fileSize = size ;
int len ;
2018-01-06 15:06:39 +08:00
if ( sscanf ( name . c_str ( ) , " log,%lld,%lld,%*[^,],%u%n " , & f . beginVersion , & f . endVersion , & f . blockSize , & len ) = = 3 & & len = = name . size ( ) ) {
out = f ;
return true ;
}
return false ;
2017-11-15 15:33:17 +08:00
}
2018-01-06 15:06:39 +08:00
static bool pathToKeyspaceSnapshotFile ( KeyspaceSnapshotFile & out , std : : string path ) {
2017-11-15 15:33:17 +08:00
std : : string name = basename ( path ) ;
KeyspaceSnapshotFile f ;
f . fileName = path ;
int len ;
2018-01-06 15:06:39 +08:00
if ( sscanf ( name . c_str ( ) , " snapshot,%lld,%lld,%lld%n " , & f . beginVersion , & f . endVersion , & f . totalSize , & len ) = = 3 & & len = = name . size ( ) ) {
out = f ;
return true ;
}
return false ;
2017-11-15 15:33:17 +08:00
}
// TODO: Do this more efficiently, as the range file list for a snapshot could potentially be hundreds of megabytes.
ACTOR static Future < std : : vector < RangeFile > > readKeyspaceSnapshot_impl ( Reference < BackupContainerFileSystem > bc , KeyspaceSnapshotFile snapshot ) {
// Read the range file list for the specified version range, and then index them by fileName.
std : : vector < RangeFile > files = wait ( bc - > listRangeFiles ( snapshot . beginVersion , snapshot . endVersion ) ) ;
state std : : map < std : : string , RangeFile > rangeIndex ;
for ( auto & f : files )
rangeIndex [ f . fileName ] = std : : move ( f ) ;
// Read the snapshot file, verify the version range, then find each of the range files by name in the index and return them.
state Reference < IAsyncFile > f = wait ( bc - > readFile ( snapshot . fileName ) ) ;
int64_t size = wait ( f - > size ( ) ) ;
state Standalone < StringRef > buf = makeString ( size ) ;
int _ = wait ( f - > read ( mutateString ( buf ) , buf . size ( ) , 0 ) ) ;
json_spirit : : mValue json ;
json_spirit : : read_string ( buf . toString ( ) , json ) ;
JSONDoc doc ( json ) ;
Version v ;
if ( ! doc . tryGet ( " beginVersion " , v ) | | v ! = snapshot . beginVersion )
throw restore_corrupted_data ( ) ;
if ( ! doc . tryGet ( " endVersion " , v ) | | v ! = snapshot . endVersion )
throw restore_corrupted_data ( ) ;
json_spirit : : mValue & filesArray = doc . create ( " files " ) ;
if ( filesArray . type ( ) ! = json_spirit : : array_type )
throw restore_corrupted_data ( ) ;
std : : vector < RangeFile > results ;
for ( auto const & fileValue : filesArray . get_array ( ) ) {
if ( fileValue . type ( ) ! = json_spirit : : str_type )
throw restore_corrupted_data ( ) ;
auto i = rangeIndex . find ( fileValue . get_str ( ) ) ;
if ( i = = rangeIndex . end ( ) )
throw restore_corrupted_data ( ) ;
results . push_back ( i - > second ) ;
}
return results ;
}
Future < std : : vector < RangeFile > > readKeyspaceSnapshot ( KeyspaceSnapshotFile snapshot ) {
return readKeyspaceSnapshot_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , snapshot ) ;
}
ACTOR static Future < Void > writeKeyspaceSnapshotFile_impl ( Reference < BackupContainerFileSystem > bc , std : : vector < std : : string > fileNames , int64_t totalBytes ) {
ASSERT ( ! fileNames . empty ( ) ) ;
2018-01-18 05:28:56 +08:00
state Version minVer = std : : numeric_limits < Version > : : max ( ) ;
state Version maxVer = 0 ;
state RangeFile rf ;
state json_spirit : : mArray fileArray ;
state int i ;
2017-11-15 15:33:17 +08:00
2018-01-18 05:28:56 +08:00
// Validate each filename, update version range
for ( i = 0 ; i < fileNames . size ( ) ; + + i ) {
auto const & f = fileNames [ i ] ;
2018-01-06 15:06:39 +08:00
if ( pathToRangeFile ( rf , f , 0 ) ) {
2018-01-18 05:28:56 +08:00
fileArray . push_back ( f ) ;
2018-01-06 15:06:39 +08:00
if ( rf . version < minVer )
minVer = rf . version ;
if ( rf . version > maxVer )
maxVer = rf . version ;
}
else
throw restore_unknown_file_type ( ) ;
2018-08-11 04:57:10 +08:00
wait ( yield ( ) ) ;
2017-11-15 15:33:17 +08:00
}
2018-01-18 05:28:56 +08:00
state json_spirit : : mValue json ;
state JSONDoc doc ( json ) ;
doc . create ( " files " ) = std : : move ( fileArray ) ;
2017-11-15 15:33:17 +08:00
doc . create ( " totalBytes " ) = totalBytes ;
doc . create ( " beginVersion " ) = minVer ;
doc . create ( " endVersion " ) = maxVer ;
2018-08-11 04:57:10 +08:00
wait ( yield ( ) ) ;
2017-11-15 15:33:17 +08:00
state std : : string docString = json_spirit : : write_string ( json ) ;
state Reference < IBackupFile > f = wait ( bc - > writeFile ( format ( " snapshots/snapshot,%lld,%lld,%lld " , minVer , maxVer , totalBytes ) ) ) ;
2018-08-11 04:57:10 +08:00
wait ( f - > append ( docString . data ( ) , docString . size ( ) ) ) ;
wait ( f - > finish ( ) ) ;
2017-11-15 15:33:17 +08:00
return Void ( ) ;
}
Future < Void > writeKeyspaceSnapshotFile ( std : : vector < std : : string > fileNames , int64_t totalBytes ) {
return writeKeyspaceSnapshotFile_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , fileNames , totalBytes ) ;
} ;
2018-09-22 02:48:28 +08:00
// List log files which contain data at any version >= beginVersion and <= targetVersion
2018-01-17 20:09:43 +08:00
// Lists files in sorted order by begin version. Does not check that results are non overlapping or contiguous.
2018-09-22 02:48:28 +08:00
Future < std : : vector < LogFile > > listLogFiles ( Version beginVersion = 0 , Version targetVersion = std : : numeric_limits < Version > : : max ( ) ) {
2018-01-17 20:09:43 +08:00
// The first relevant log file could have a begin version less than beginVersion based on the knobs which determine log file range size,
// so start at an earlier version adjusted by how many versions a file could contain.
//
2018-01-24 07:02:15 +08:00
// Get the cleaned (without slashes) first and last folders that could contain relevant results.
std : : string firstPath = cleanFolderString ( logVersionFolderString (
std : : max < Version > ( 0 , beginVersion - CLIENT_KNOBS - > BACKUP_MAX_LOG_RANGES * CLIENT_KNOBS - > LOG_RANGE_BLOCK_SIZE )
) ) ;
2018-09-22 02:48:28 +08:00
std : : string lastPath = cleanFolderString ( logVersionFolderString ( targetVersion ) ) ;
2018-01-17 20:09:43 +08:00
std : : function < bool ( std : : string const & ) > pathFilter = [ = ] ( const std : : string & folderPath ) {
2018-01-24 07:02:15 +08:00
// Remove slashes in the given folder path so that the '/' positions in the version folder string do not matter
std : : string cleaned = cleanFolderString ( folderPath ) ;
return StringRef ( firstPath ) . startsWith ( cleaned ) | | StringRef ( lastPath ) . startsWith ( cleaned )
| | ( cleaned > firstPath & & cleaned < lastPath ) ;
2018-01-17 20:09:43 +08:00
} ;
return map ( listFiles ( " logs/ " , pathFilter ) , [ = ] ( const FilesAndSizesT & files ) {
2017-11-15 15:33:17 +08:00
std : : vector < LogFile > results ;
2018-01-06 15:06:39 +08:00
LogFile lf ;
2017-11-15 15:33:17 +08:00
for ( auto & f : files ) {
2018-09-22 02:48:28 +08:00
if ( pathToLogFile ( lf , f . first , f . second ) & & lf . endVersion > beginVersion & & lf . beginVersion < = targetVersion )
2017-11-15 15:33:17 +08:00
results . push_back ( lf ) ;
}
std : : sort ( results . begin ( ) , results . end ( ) ) ;
return results ;
} ) ;
}
2018-01-17 20:09:43 +08:00
// List range files, in sorted version order, which contain data at or between beginVersion and endVersion
Future < std : : vector < RangeFile > > listRangeFiles ( Version beginVersion = 0 , Version endVersion = std : : numeric_limits < Version > : : max ( ) ) {
2018-01-24 07:02:15 +08:00
// Get the cleaned (without slashes) first and last folders that could contain relevant results.
std : : string firstPath = cleanFolderString ( rangeVersionFolderString ( beginVersion ) ) ;
std : : string lastPath = cleanFolderString ( rangeVersionFolderString ( endVersion ) ) ;
2018-01-17 20:09:43 +08:00
std : : function < bool ( std : : string const & ) > pathFilter = [ = ] ( const std : : string & folderPath ) {
2018-01-24 07:02:15 +08:00
// Remove slashes in the given folder path so that the '/' positions in the version folder string do not matter
std : : string cleaned = cleanFolderString ( folderPath ) ;
return StringRef ( firstPath ) . startsWith ( cleaned ) | | StringRef ( lastPath ) . startsWith ( cleaned )
| | ( cleaned > firstPath & & cleaned < lastPath ) ;
2018-01-17 20:09:43 +08:00
} ;
return map ( listFiles ( " ranges/ " , pathFilter ) , [ = ] ( const FilesAndSizesT & files ) {
2017-11-15 15:33:17 +08:00
std : : vector < RangeFile > results ;
2018-01-06 15:06:39 +08:00
RangeFile rf ;
2017-11-15 15:33:17 +08:00
for ( auto & f : files ) {
2018-01-06 15:06:39 +08:00
if ( pathToRangeFile ( rf , f . first , f . second ) & & rf . version > = beginVersion & & rf . version < = endVersion )
2017-11-15 15:33:17 +08:00
results . push_back ( rf ) ;
}
std : : sort ( results . begin ( ) , results . end ( ) ) ;
return results ;
} ) ;
}
2018-01-17 20:09:43 +08:00
// List snapshots which have been fully written, in sorted beginVersion order.
Future < std : : vector < KeyspaceSnapshotFile > > listKeyspaceSnapshots ( ) {
2017-11-15 15:33:17 +08:00
return map ( listFiles ( " snapshots/ " ) , [ = ] ( const FilesAndSizesT & files ) {
std : : vector < KeyspaceSnapshotFile > results ;
2018-01-06 15:06:39 +08:00
KeyspaceSnapshotFile sf ;
2017-11-15 15:33:17 +08:00
for ( auto & f : files ) {
2018-01-17 20:09:43 +08:00
if ( pathToKeyspaceSnapshotFile ( sf , f . first ) )
2017-11-15 15:33:17 +08:00
results . push_back ( sf ) ;
}
std : : sort ( results . begin ( ) , results . end ( ) ) ;
return results ;
} ) ;
}
2018-01-17 20:09:43 +08:00
ACTOR static Future < FullBackupListing > dumpFileList_impl ( Reference < BackupContainerFileSystem > bc ) {
2017-11-19 20:28:22 +08:00
state Future < std : : vector < RangeFile > > fRanges = bc - > listRangeFiles ( 0 , std : : numeric_limits < Version > : : max ( ) ) ;
2018-01-17 20:09:43 +08:00
state Future < std : : vector < KeyspaceSnapshotFile > > fSnapshots = bc - > listKeyspaceSnapshots ( ) ;
2017-11-19 20:28:22 +08:00
state Future < std : : vector < LogFile > > fLogs = bc - > listLogFiles ( 0 , std : : numeric_limits < Version > : : max ( ) ) ;
2018-08-11 04:57:10 +08:00
wait ( success ( fRanges ) & & success ( fSnapshots ) & & success ( fLogs ) ) ;
2017-11-19 20:28:22 +08:00
return FullBackupListing ( { fRanges . get ( ) , fLogs . get ( ) , fSnapshots . get ( ) } ) ;
}
2018-01-17 20:09:43 +08:00
Future < FullBackupListing > dumpFileList ( ) {
return dumpFileList_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) ) ;
2017-11-19 20:28:22 +08:00
}
2018-01-17 20:09:43 +08:00
ACTOR static Future < BackupDescription > describeBackup_impl ( Reference < BackupContainerFileSystem > bc , bool deepScan ) {
2017-11-15 15:33:17 +08:00
state BackupDescription desc ;
desc . url = bc - > getURL ( ) ;
2018-01-17 20:09:43 +08:00
// This is the range of logs we'll have to list to determine log continuity
state Version scanBegin = 0 ;
state Version scanEnd = std : : numeric_limits < Version > : : max ( ) ;
// Get range for which we know there are logs, if available
state Optional < Version > begin ;
state Optional < Version > end ;
if ( ! deepScan ) {
2018-08-11 04:57:10 +08:00
wait ( store ( bc - > logBeginVersion ( ) . get ( ) , begin ) & & store ( bc - > logEndVersion ( ) . get ( ) , end ) ) ;
2018-01-17 20:09:43 +08:00
}
// Use the known log range if present
if ( begin . present ( ) & & end . present ( ) ) {
2018-01-18 04:12:04 +08:00
// Logs are assumed to be contiguious between begin and max(begin, end), so initalize desc accordingly
// The use of max() is to allow for a stale end version that has been exceeded by begin version
2018-01-17 20:09:43 +08:00
desc . minLogBegin = begin . get ( ) ;
2018-01-18 04:12:04 +08:00
desc . maxLogEnd = std : : max ( begin . get ( ) , end . get ( ) ) ;
desc . contiguousLogEnd = desc . maxLogEnd ;
// Begin file scan at the contiguous log end version
scanBegin = desc . contiguousLogEnd . get ( ) ;
2018-01-17 20:09:43 +08:00
}
std : : vector < KeyspaceSnapshotFile > snapshots = wait ( bc - > listKeyspaceSnapshots ( ) ) ;
2017-11-15 15:33:17 +08:00
desc . snapshots = snapshots ;
2018-01-17 20:09:43 +08:00
std : : vector < LogFile > logs = wait ( bc - > listLogFiles ( scanBegin , scanEnd ) ) ;
2017-11-15 15:33:17 +08:00
if ( ! logs . empty ( ) ) {
desc . maxLogEnd = logs . rbegin ( ) - > endVersion ;
auto i = logs . begin ( ) ;
2018-01-17 20:09:43 +08:00
// If we didn't get log versions above then seed them using the first log file
if ( ! desc . contiguousLogEnd . present ( ) ) {
desc . minLogBegin = i - > beginVersion ;
desc . contiguousLogEnd = i - > endVersion ;
+ + i ;
}
2017-11-15 15:33:17 +08:00
auto & end = desc . contiguousLogEnd . get ( ) ; // For convenience to make loop cleaner
// Advance until continuity is broken
2018-01-17 20:09:43 +08:00
while ( i ! = logs . end ( ) ) {
2017-11-15 15:33:17 +08:00
if ( i - > beginVersion > end )
break ;
// If the next link in the log chain is found, update the end
if ( i - > beginVersion = = end )
end = i - > endVersion ;
2017-12-22 13:15:26 +08:00
+ + i ;
}
2017-11-17 08:19:56 +08:00
}
2018-01-17 20:09:43 +08:00
// Try to update the saved log versions if they are not set and we have values for them,
// but ignore errors in the update attempt in case the container is not writeable
// Also update logEndVersion if it has a value but it is less than contiguousLogEnd
try {
state Future < Void > updates = Void ( ) ;
if ( desc . minLogBegin . present ( ) & & ! begin . present ( ) )
updates = updates & & bc - > logBeginVersion ( ) . set ( desc . minLogBegin . get ( ) ) ;
if ( desc . contiguousLogEnd . present ( ) & & ( ! end . present ( ) | | end . get ( ) < desc . contiguousLogEnd . get ( ) ) )
updates = updates & & bc - > logEndVersion ( ) . set ( desc . contiguousLogEnd . get ( ) ) ;
2018-08-11 04:57:10 +08:00
wait ( updates ) ;
2018-01-17 20:09:43 +08:00
} catch ( Error & e ) {
if ( e . code ( ) = = error_code_actor_cancelled )
throw ;
TraceEvent ( SevWarn , " BackupContainerSafeVersionUpdateFailure " ) . detail ( " URL " , bc - > getURL ( ) ) ;
}
for ( auto & s : desc . snapshots ) {
// Calculate restorability of each snapshot. Assume true, then try to prove false
s . restorable = true ;
// If this is not a single-version snapshot then see if the available contiguous logs cover its range
if ( s . beginVersion ! = s . endVersion ) {
if ( ! desc . minLogBegin . present ( ) | | desc . minLogBegin . get ( ) > s . beginVersion )
s . restorable = false ;
2018-09-22 02:48:28 +08:00
if ( ! desc . contiguousLogEnd . present ( ) | | desc . contiguousLogEnd . get ( ) < = s . endVersion )
2018-01-17 20:09:43 +08:00
s . restorable = false ;
}
2018-01-03 15:22:35 +08:00
desc . snapshotBytes + = s . totalSize ;
2017-12-22 13:15:26 +08:00
2017-11-17 08:19:56 +08:00
// If the snapshot is at a single version then it requires no logs. Update min and max restorable.
// TODO: Somehow check / report if the restorable range is not or may not be contiguous.
if ( s . beginVersion = = s . endVersion ) {
if ( ! desc . minRestorableVersion . present ( ) | | s . endVersion < desc . minRestorableVersion . get ( ) )
desc . minRestorableVersion = s . endVersion ;
if ( ! desc . maxRestorableVersion . present ( ) | | s . endVersion > desc . maxRestorableVersion . get ( ) )
desc . maxRestorableVersion = s . endVersion ;
}
// If the snapshot is covered by the contiguous log chain then update min/max restorable.
2018-09-22 02:48:28 +08:00
if ( desc . minLogBegin . present ( ) & & s . beginVersion > = desc . minLogBegin . get ( ) & & s . endVersion < desc . contiguousLogEnd . get ( ) ) {
2017-11-17 08:19:56 +08:00
if ( ! desc . minRestorableVersion . present ( ) | | s . endVersion < desc . minRestorableVersion . get ( ) )
desc . minRestorableVersion = s . endVersion ;
2017-11-15 15:33:17 +08:00
2018-09-22 02:48:28 +08:00
if ( ! desc . maxRestorableVersion . present ( ) | | ( desc . contiguousLogEnd . get ( ) - 1 ) > desc . maxRestorableVersion . get ( ) )
desc . maxRestorableVersion = desc . contiguousLogEnd . get ( ) - 1 ;
2017-11-15 15:33:17 +08:00
}
}
return desc ;
}
// Uses the virtual methods to describe the backup contents
2018-01-17 20:09:43 +08:00
Future < BackupDescription > describeBackup ( bool deepScan = false ) {
return describeBackup_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , deepScan ) ;
2017-11-15 15:33:17 +08:00
}
2018-01-17 20:09:43 +08:00
ACTOR static Future < Void > expireData_impl ( Reference < BackupContainerFileSystem > bc , Version expireEndVersion , bool force , Version restorableBeginVersion ) {
if ( restorableBeginVersion < expireEndVersion )
throw backup_cannot_expire ( ) ;
state Version scanBegin = 0 ;
// Get the backup description.
state BackupDescription desc = wait ( bc - > describeBackup ( ) ) ;
// Assume force is needed, then try to prove otherwise.
// Force is required if there is not a restorable snapshot which both
// - begins at or after expireEndVersion
// - ends at or before restorableBeginVersion
2018-03-09 03:27:15 +08:00
state bool forceNeeded = true ;
2018-01-17 20:09:43 +08:00
for ( KeyspaceSnapshotFile & s : desc . snapshots ) {
if ( s . restorable . orDefault ( false ) & & s . beginVersion > = expireEndVersion & & s . endVersion < = restorableBeginVersion ) {
forceNeeded = false ;
break ;
}
}
// Get metadata
state Optional < Version > expiredEnd ;
state Optional < Version > logBegin ;
state Optional < Version > logEnd ;
2018-08-11 04:57:10 +08:00
wait ( store ( bc - > expiredEndVersion ( ) . get ( ) , expiredEnd ) & & store ( bc - > logBeginVersion ( ) . get ( ) , logBegin ) & & store ( bc - > logEndVersion ( ) . get ( ) , logEnd ) ) ;
2018-01-17 20:09:43 +08:00
// Update scan range if expiredEnd is present
if ( expiredEnd . present ( ) ) {
if ( expireEndVersion < = expiredEnd . get ( ) ) {
// If the expire request is to the version already expired to then there is no work to do so return true
return Void ( ) ;
}
scanBegin = expiredEnd . get ( ) ;
}
2018-11-28 01:50:39 +08:00
TraceEvent ( " BackupContainerFileSystem " )
. detail ( " ExpireEndVersion " , expireEndVersion )
. detail ( " ScanBeginVersion " , scanBegin )
. detail ( " CachedLogBegin " , logBegin . orDefault ( - 1 ) )
. detail ( " CachedLogEnd " , logEnd . orDefault ( - 1 ) )
. detail ( " CachedExpiredEnd " , expiredEnd . orDefault ( - 1 ) ) ;
2018-01-17 20:09:43 +08:00
// Get log files that contain any data at or before expireEndVersion
2018-09-22 02:48:28 +08:00
state std : : vector < LogFile > logs = wait ( bc - > listLogFiles ( scanBegin , expireEndVersion - 1 ) ) ;
2018-01-17 20:09:43 +08:00
// Get range files up to and including expireEndVersion
2018-09-22 02:48:28 +08:00
state std : : vector < RangeFile > ranges = wait ( bc - > listRangeFiles ( scanBegin , expireEndVersion - 1 ) ) ;
2018-01-17 20:09:43 +08:00
// The new logBeginVersion will be taken from the last log file, if there is one
state Optional < Version > newLogBeginVersion ;
if ( ! logs . empty ( ) ) {
LogFile & last = logs . back ( ) ;
// If the last log ends at expireEndVersion then that will be the next log begin
if ( last . endVersion = = expireEndVersion ) {
newLogBeginVersion = expireEndVersion ;
}
else {
2018-03-10 03:29:23 +08:00
// If the last log overlaps the expiredEnd then use the log's begin version and move the expiredEnd
// back to match it.
2018-01-17 20:09:43 +08:00
if ( last . endVersion > expireEndVersion ) {
newLogBeginVersion = last . beginVersion ;
logs . pop_back ( ) ;
2018-03-10 03:29:23 +08:00
expireEndVersion = newLogBeginVersion . get ( ) ;
2018-01-17 20:09:43 +08:00
}
}
}
2018-03-09 03:27:15 +08:00
// Make a list of files to delete
state std : : vector < std : : string > toDelete ;
2017-11-15 15:33:17 +08:00
2018-03-09 03:27:15 +08:00
// Move filenames out of vector then destroy it to save memory
2018-01-17 20:09:43 +08:00
for ( auto const & f : logs ) {
2018-03-09 03:27:15 +08:00
toDelete . push_back ( std : : move ( f . fileName ) ) ;
2018-01-17 20:09:43 +08:00
}
2018-03-09 03:27:15 +08:00
logs . clear ( ) ;
2017-11-15 15:33:17 +08:00
2018-03-09 03:27:15 +08:00
// Move filenames out of vector then destroy it to save memory
2018-01-17 20:09:43 +08:00
for ( auto const & f : ranges ) {
2018-11-28 01:50:39 +08:00
// The file version must be checked here again because it is likely that expireEndVersion is in the middle of a log file, in which case
// after the log and range file listings are done (using the original expireEndVersion) the expireEndVersion will be moved back slightly
// to the begin version of the last log file found (which is also the first log to not be deleted)
if ( f . version < expireEndVersion ) {
toDelete . push_back ( std : : move ( f . fileName ) ) ;
}
2018-01-17 20:09:43 +08:00
}
2018-03-09 03:27:15 +08:00
ranges . clear ( ) ;
2017-11-15 15:33:17 +08:00
2018-01-17 20:09:43 +08:00
for ( auto const & f : desc . snapshots ) {
if ( f . endVersion < expireEndVersion )
2018-03-09 03:27:15 +08:00
toDelete . push_back ( std : : move ( f . fileName ) ) ;
2018-01-17 20:09:43 +08:00
}
2018-03-10 04:03:10 +08:00
desc = BackupDescription ( ) ;
2017-11-15 15:33:17 +08:00
2018-03-09 03:27:15 +08:00
// If some files to delete were found AND force is needed AND the force option is NOT set, then fail
if ( ! toDelete . empty ( ) & & forceNeeded & & ! force )
throw backup_cannot_expire ( ) ;
2018-03-10 03:29:23 +08:00
// We are about to start deleting files, at which point no data prior to the expire end version can be
// safely assumed to exist. The [logBegin, logEnd) range from the container's metadata describes
// a range of log versions which can be assumed to exist, so if the range of data being deleted overlaps
// that range then the metadata range must be updated.
// If we're expiring the entire log range described by the metadata then clear both metadata values
if ( logEnd . present ( ) & & logEnd . get ( ) < expireEndVersion ) {
if ( logBegin . present ( ) )
2018-08-11 04:57:10 +08:00
wait ( bc - > logBeginVersion ( ) . clear ( ) ) ;
2018-03-10 03:29:23 +08:00
if ( logEnd . present ( ) )
2018-08-11 04:57:10 +08:00
wait ( bc - > logEndVersion ( ) . clear ( ) ) ;
2018-03-10 03:29:23 +08:00
}
else {
// If we are expiring to a point within the metadata range then update the begin if we have a new
// log begin version (which we should!) or clear the metadata range if we do not (which would be
// repairing the metadata from an incorrect state)
if ( logBegin . present ( ) & & logBegin . get ( ) < expireEndVersion ) {
if ( newLogBeginVersion . present ( ) ) {
2018-08-11 04:57:10 +08:00
wait ( bc - > logBeginVersion ( ) . set ( newLogBeginVersion . get ( ) ) ) ;
2018-03-10 03:29:23 +08:00
}
else {
if ( logBegin . present ( ) )
2018-08-11 04:57:10 +08:00
wait ( bc - > logBeginVersion ( ) . clear ( ) ) ;
2018-03-10 03:29:23 +08:00
if ( logEnd . present ( ) )
2018-08-11 04:57:10 +08:00
wait ( bc - > logEndVersion ( ) . clear ( ) ) ;
2018-03-10 03:29:23 +08:00
}
}
}
2018-03-09 03:27:15 +08:00
// Delete files, but limit parallelism because the file list could use a lot of memory and the corresponding
// delete actor states would use even more if they all existed at the same time.
state std : : list < Future < Void > > deleteFutures ;
while ( ! toDelete . empty ( ) | | ! deleteFutures . empty ( ) ) {
// While there are files to delete and budget in the deleteFutures list, start a delete
while ( ! toDelete . empty ( ) & & deleteFutures . size ( ) < CLIENT_KNOBS - > BACKUP_CONCURRENT_DELETES ) {
deleteFutures . push_back ( bc - > deleteFile ( toDelete . back ( ) ) ) ;
toDelete . pop_back ( ) ;
}
// Wait for deletes to finish until there are only targetDeletesInFlight remaining.
// If there are no files left to start then this value is 0, otherwise it is one less
// than the delete concurrency limit.
state int targetFuturesSize = toDelete . empty ( ) ? 0 : ( CLIENT_KNOBS - > BACKUP_CONCURRENT_DELETES - 1 ) ;
while ( deleteFutures . size ( ) > targetFuturesSize ) {
2018-08-11 04:57:10 +08:00
wait ( deleteFutures . front ( ) ) ;
2018-03-09 03:27:15 +08:00
deleteFutures . pop_front ( ) ;
}
}
2018-01-17 20:09:43 +08:00
// Update the expiredEndVersion property.
2018-08-11 04:57:10 +08:00
wait ( bc - > expiredEndVersion ( ) . set ( expireEndVersion ) ) ;
2018-01-17 20:09:43 +08:00
2017-11-15 15:33:17 +08:00
return Void ( ) ;
}
// Delete all data up to (but not including endVersion)
2018-01-17 20:09:43 +08:00
Future < Void > expireData ( Version expireEndVersion , bool force , Version restorableBeginVersion ) {
return expireData_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , expireEndVersion , force , restorableBeginVersion ) ;
2017-11-15 15:33:17 +08:00
}
2017-11-19 20:28:22 +08:00
ACTOR static Future < Optional < RestorableFileSet > > getRestoreSet_impl ( Reference < BackupContainerFileSystem > bc , Version targetVersion ) {
2017-11-15 15:33:17 +08:00
// Find the most recent keyrange snapshot to end at or before targetVersion
state Optional < KeyspaceSnapshotFile > snapshot ;
std : : vector < KeyspaceSnapshotFile > snapshots = wait ( bc - > listKeyspaceSnapshots ( ) ) ;
for ( auto const & s : snapshots ) {
if ( s . endVersion < = targetVersion )
snapshot = s ;
}
if ( snapshot . present ( ) ) {
state RestorableFileSet restorable ;
2017-11-25 16:46:16 +08:00
restorable . snapshot = snapshot . get ( ) ;
2017-11-17 08:19:56 +08:00
restorable . targetVersion = targetVersion ;
2017-11-15 15:33:17 +08:00
std : : vector < RangeFile > ranges = wait ( bc - > readKeyspaceSnapshot ( snapshot . get ( ) ) ) ;
restorable . ranges = ranges ;
2017-12-22 06:11:44 +08:00
// No logs needed if there is a complete key space snapshot at the target version.
if ( snapshot . get ( ) . beginVersion = = snapshot . get ( ) . endVersion & & snapshot . get ( ) . endVersion = = targetVersion )
2017-12-21 05:48:31 +08:00
return Optional < RestorableFileSet > ( restorable ) ;
2017-11-17 08:19:56 +08:00
2017-11-15 15:33:17 +08:00
std : : vector < LogFile > logs = wait ( bc - > listLogFiles ( snapshot . get ( ) . beginVersion , targetVersion ) ) ;
2017-12-22 06:11:44 +08:00
// If there are logs and the first one starts at or before the snapshot begin version then proceed
2017-11-15 15:33:17 +08:00
if ( ! logs . empty ( ) & & logs . front ( ) . beginVersion < = snapshot . get ( ) . beginVersion ) {
auto i = logs . begin ( ) ;
Version end = i - > endVersion ;
restorable . logs . push_back ( * i ) ;
// Add logs to restorable logs set until continuity is broken OR we reach targetVersion
while ( + + i ! = logs . end ( ) ) {
2018-09-22 02:48:28 +08:00
if ( i - > beginVersion > end | | i - > beginVersion > targetVersion )
2017-11-15 15:33:17 +08:00
break ;
// If the next link in the log chain is found, update the end
if ( i - > beginVersion = = end ) {
restorable . logs . push_back ( * i ) ;
end = i - > endVersion ;
}
}
if ( end > = targetVersion ) {
2017-12-21 05:48:31 +08:00
return Optional < RestorableFileSet > ( restorable ) ;
2017-11-15 15:33:17 +08:00
}
}
}
return Optional < RestorableFileSet > ( ) ;
}
Future < Optional < RestorableFileSet > > getRestoreSet ( Version targetVersion ) {
return getRestoreSet_impl ( Reference < BackupContainerFileSystem > : : addRef ( this ) , targetVersion ) ;
}
2018-01-17 20:09:43 +08:00
private :
struct VersionProperty {
VersionProperty ( Reference < BackupContainerFileSystem > bc , std : : string name ) : bc ( bc ) , path ( " properties/ " + name ) { }
Reference < BackupContainerFileSystem > bc ;
std : : string path ;
Future < Optional < Version > > get ( ) {
return readVersionProperty ( bc , path ) ;
}
Future < Void > set ( Version v ) {
return writeVersionProperty ( bc , path , v ) ;
}
Future < Void > clear ( ) {
return bc - > deleteFile ( path ) ;
}
} ;
public :
// To avoid the need to scan the underyling filesystem in many cases, some important version boundaries are stored in named files.
// These files can be deleted from the filesystem if they appear to be wrong or corrupt, and full scans will done
// when needed.
//
// The three versions below, when present, describe 4 version ranges which collectively cover the entire version timeline.
// 0 - expiredEndVersion: All files in this range have been deleted
// expiredEndVersion - presentBeginVersion: Files in this range *may* have been deleted so their presence must not be assumed.
// presentBeginVersion - presentEndVersion: Files in this range have NOT been deleted by any FDB backup operations.
// presentEndVersion - infinity: Files in this range may or may not exist yet. Scan to find what is there.
//
VersionProperty logBeginVersion ( ) { return { Reference < BackupContainerFileSystem > : : addRef ( this ) , " log_begin_version " } ; }
VersionProperty logEndVersion ( ) { return { Reference < BackupContainerFileSystem > : : addRef ( this ) , " log_end_version " } ; }
VersionProperty expiredEndVersion ( ) { return { Reference < BackupContainerFileSystem > : : addRef ( this ) , " expired_end_version " } ; }
ACTOR static Future < Void > writeVersionProperty ( Reference < BackupContainerFileSystem > bc , std : : string path , Version v ) {
try {
state Reference < IBackupFile > f = wait ( bc - > writeFile ( path ) ) ;
std : : string s = format ( " %lld " , v ) ;
2018-08-11 04:57:10 +08:00
wait ( f - > append ( s . data ( ) , s . size ( ) ) ) ;
wait ( f - > finish ( ) ) ;
2018-01-17 20:09:43 +08:00
return Void ( ) ;
} catch ( Error & e ) {
2018-08-02 05:30:57 +08:00
TraceEvent ( SevWarn , " BackupContainerWritePropertyFailed " ) . error ( e ) . detail ( " Path " , path ) ;
2018-01-17 20:09:43 +08:00
throw ;
}
}
ACTOR static Future < Optional < Version > > readVersionProperty ( Reference < BackupContainerFileSystem > bc , std : : string path ) {
try {
state Reference < IAsyncFile > f = wait ( bc - > readFile ( path ) ) ;
state int64_t size = wait ( f - > size ( ) ) ;
state std : : string s ;
s . resize ( size ) ;
int rs = wait ( f - > read ( ( uint8_t * ) s . data ( ) , size , 0 ) ) ;
Version v ;
int len ;
if ( rs = = size & & sscanf ( s . c_str ( ) , " %lld%n " , & v , & len ) = = 1 & & len = = size )
return v ;
TraceEvent ( SevWarn , " BackupContainerInvalidProperty " ) ;
throw backup_invalid_info ( ) ;
} catch ( Error & e ) {
if ( e . code ( ) = = error_code_file_not_found )
return Optional < Version > ( ) ;
2018-08-02 05:30:57 +08:00
TraceEvent ( SevWarn , " BackupContainerReadPropertyFailed " ) . error ( e ) . detail ( " Path " , path ) ;
2018-01-17 20:09:43 +08:00
throw ;
}
}
2017-11-15 15:33:17 +08:00
} ;
class BackupContainerLocalDirectory : public BackupContainerFileSystem , ReferenceCounted < BackupContainerLocalDirectory > {
2017-05-26 04:48:44 +08:00
public :
void addref ( ) { return ReferenceCounted < BackupContainerLocalDirectory > : : addref ( ) ; }
void delref ( ) { return ReferenceCounted < BackupContainerLocalDirectory > : : delref ( ) ; }
static std : : string getURLFormat ( ) { return " file://</path/to/base/dir/> " ; }
2018-01-18 03:35:34 +08:00
BackupContainerLocalDirectory ( std : : string url ) {
2017-05-26 04:48:44 +08:00
std : : string path ;
if ( url . find ( " file:// " ) ! = 0 ) {
TraceEvent ( SevWarn , " BackupContainerLocalDirectory " ) . detail ( " Description " , " Invalid URL for BackupContainerLocalDirectory " ) . detail ( " URL " , url ) ;
}
path = url . substr ( 7 ) ;
// Remove trailing slashes on path
path . erase ( path . find_last_not_of ( " \\ / " ) + 1 ) ;
if ( ! g_network - > isSimulated ( ) & & path ! = abspath ( path ) ) {
TraceEvent ( SevWarn , " BackupContainerLocalDirectory " ) . detail ( " Description " , " Backup path must be absolute (e.g. file:///some/path) " ) . detail ( " URL " , url ) . detail ( " Path " , path ) ;
throw io_error ( ) ;
}
// Finalized path written to will be will be <path>/backup-<uid>
m_path = path ;
}
2017-12-13 09:44:03 +08:00
static Future < std : : vector < std : : string > > listURLs ( std : : string url ) {
std : : string path ;
if ( url . find ( " file:// " ) ! = 0 ) {
TraceEvent ( SevWarn , " BackupContainerLocalDirectory " ) . detail ( " Description " , " Invalid URL for BackupContainerLocalDirectory " ) . detail ( " URL " , url ) ;
}
path = url . substr ( 7 ) ;
// Remove trailing slashes on path
path . erase ( path . find_last_not_of ( " \\ / " ) + 1 ) ;
if ( ! g_network - > isSimulated ( ) & & path ! = abspath ( path ) ) {
TraceEvent ( SevWarn , " BackupContainerLocalDirectory " ) . detail ( " Description " , " Backup path must be absolute (e.g. file:///some/path) " ) . detail ( " URL " , url ) . detail ( " Path " , path ) ;
throw io_error ( ) ;
}
std : : vector < std : : string > dirs = platform : : listDirectories ( path ) ;
std : : vector < std : : string > results ;
for ( auto & r : dirs ) {
if ( r = = " . " | | r = = " .. " )
continue ;
results . push_back ( std : : string ( " file:// " ) + joinPath ( path , r ) ) ;
}
return results ;
}
2017-05-26 04:48:44 +08:00
Future < Void > create ( ) {
// Nothing should be done here because create() can be called by any process working with the container URL, such as fdbbackup.
// Since "local directory" containers are by definition local to the machine they are accessed from,
// the container's creation (in this case the creation of a directory) must be ensured prior to every file creation,
// which is done in openFile().
// Creating the directory here will result in unnecessary directories being created on machines that run fdbbackup but not agents.
return Void ( ) ;
}
2017-11-15 15:33:17 +08:00
Future < Reference < IAsyncFile > > readFile ( std : : string path ) {
int flags = IAsyncFile : : OPEN_NO_AIO | IAsyncFile : : OPEN_READONLY | IAsyncFile : : OPEN_UNCACHED ;
// Simulation does not properly handle opening the same file from multiple machines using a shared filesystem,
// so create a symbolic link to make each file opening appear to be unique. This could also work in production
// but only if the source directory is writeable which shouldn't be required for a restore.
std : : string fullPath = joinPath ( m_path , path ) ;
# ifndef _WIN32
if ( g_network - > isSimulated ( ) ) {
if ( ! fileExists ( fullPath ) )
throw file_not_found ( ) ;
std : : string uniquePath = fullPath + " . " + g_random - > randomUniqueID ( ) . toString ( ) + " .lnk " ;
unlink ( uniquePath . c_str ( ) ) ;
ASSERT ( symlink ( basename ( path ) . c_str ( ) , uniquePath . c_str ( ) ) = = 0 ) ;
fullPath = uniquePath = uniquePath ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
// Opening cached mode forces read/write mode at a lower level, overriding the readonly request. So cached mode
// can't be used because backup files are read-only. Cached mode can only help during restore task retries handled
// by the same process that failed the first task execution anyway, which is a very rare case.
# endif
return IAsyncFileSystem : : filesystem ( ) - > open ( fullPath , flags , 0644 ) ;
}
class BackupFile : public IBackupFile , ReferenceCounted < BackupFile > {
public :
BackupFile ( std : : string fileName , Reference < IAsyncFile > file , std : : string finalFullPath ) : IBackupFile ( fileName ) , m_file ( file ) , m_finalFullPath ( finalFullPath ) { }
2017-11-16 05:33:09 +08:00
Future < Void > append ( const void * data , int len ) {
Future < Void > r = m_file - > write ( data , len , m_offset ) ;
m_offset + = len ;
2017-11-15 15:33:17 +08:00
return r ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
ACTOR static Future < Void > finish_impl ( Reference < BackupFile > f ) {
2018-08-11 04:57:10 +08:00
wait ( f - > m_file - > truncate ( f - > size ( ) ) ) ; // Some IAsyncFile implementations extend in whole block sizes.
wait ( f - > m_file - > sync ( ) ) ;
2017-11-15 15:33:17 +08:00
std : : string name = f - > m_file - > getFilename ( ) ;
f - > m_file . clear ( ) ;
renameFile ( name , f - > m_finalFullPath ) ;
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > finish ( ) {
return finish_impl ( Reference < BackupFile > : : addRef ( this ) ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
void addref ( ) { return ReferenceCounted < BackupFile > : : addref ( ) ; }
void delref ( ) { return ReferenceCounted < BackupFile > : : delref ( ) ; }
private :
Reference < IAsyncFile > m_file ;
std : : string m_finalFullPath ;
} ;
Future < Reference < IBackupFile > > writeFile ( std : : string path ) {
int flags = IAsyncFile : : OPEN_NO_AIO | IAsyncFile : : OPEN_CREATE | IAsyncFile : : OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile : : OPEN_READWRITE ;
std : : string fullPath = joinPath ( m_path , path ) ;
platform : : createDirectory ( parentDirectory ( fullPath ) ) ;
std : : string temp = fullPath + " . " + g_random - > randomUniqueID ( ) . toString ( ) + " .temp " ;
Future < Reference < IAsyncFile > > f = IAsyncFileSystem : : filesystem ( ) - > open ( temp , flags , 0644 ) ;
return map ( f , [ = ] ( Reference < IAsyncFile > f ) {
return Reference < IBackupFile > ( new BackupFile ( path , f , fullPath ) ) ;
} ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
Future < Void > deleteFile ( std : : string path ) {
: : deleteFile ( joinPath ( m_path , path ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
}
2018-01-17 20:09:43 +08:00
Future < FilesAndSizesT > listFiles ( std : : string path , std : : function < bool ( std : : string const & ) > ) {
2017-11-15 15:33:17 +08:00
FilesAndSizesT results ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
std : : vector < std : : string > files ;
platform : : findFilesRecursively ( joinPath ( m_path , path ) , files ) ;
2017-05-26 04:48:44 +08:00
2018-01-17 20:09:43 +08:00
// Remove .lnk files from results, they are a side effect of a backup that was *read* during simulation. See openFile() above for more info on why they are created.
2017-11-15 15:33:17 +08:00
if ( g_network - > isSimulated ( ) )
files . erase ( std : : remove_if ( files . begin ( ) , files . end ( ) , [ ] ( std : : string const & f ) { return StringRef ( f ) . endsWith ( LiteralStringRef ( " .lnk " ) ) ; } ) , files . end ( ) ) ;
for ( auto & f : files ) {
// Hide .part or .temp files.
StringRef s ( f ) ;
if ( ! s . endsWith ( LiteralStringRef ( " .part " ) ) & & ! s . endsWith ( LiteralStringRef ( " .temp " ) ) )
results . push_back ( { f . substr ( m_path . size ( ) + 1 ) , : : fileSize ( f ) } ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
return results ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
Future < Void > deleteContainer ( int * pNumDeleted ) {
2018-01-18 16:36:28 +08:00
// In order to avoid deleting some random directory due to user error, first describe the backup
// and make sure it has something in it.
return map ( describeBackup ( ) , [ = ] ( BackupDescription const & desc ) {
// If the backup has no snapshots and no logs then it's probably not a valid backup
if ( desc . snapshots . size ( ) = = 0 & & ! desc . minLogBegin . present ( ) )
throw backup_invalid_url ( ) ;
int count = platform : : eraseDirectoryRecursive ( m_path ) ;
if ( pNumDeleted ! = nullptr )
* pNumDeleted = count ;
return Void ( ) ;
} ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
private :
std : : string m_path ;
} ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
class BackupContainerBlobStore : public BackupContainerFileSystem , ReferenceCounted < BackupContainerBlobStore > {
private :
2018-01-24 03:46:16 +08:00
// Backup files to under a single folder prefix with subfolders for each named backup
static const std : : string DATAFOLDER ;
2018-01-29 16:32:41 +08:00
// Indexfolder contains keys for which user-named backups exist. Backup names can contain an arbitrary
2018-01-24 03:46:16 +08:00
// number of slashes so the backup names are kept in a separate folder tree from their actual data.
static const std : : string INDEXFOLDER ;
2017-11-15 15:33:17 +08:00
Reference < BlobStoreEndpoint > m_bstore ;
std : : string m_name ;
2017-05-26 04:48:44 +08:00
2018-11-13 19:00:59 +08:00
// All backup data goes into a single bucket
std : : string m_bucket ;
2018-01-24 03:46:16 +08:00
std : : string dataPath ( const std : : string path ) {
return DATAFOLDER + " / " + m_name + " / " + path ;
}
// Get the path of the backups's index entry
std : : string indexEntry ( ) {
return INDEXFOLDER + " / " + m_name ;
}
2017-11-15 15:33:17 +08:00
public :
2018-11-13 19:00:59 +08:00
BackupContainerBlobStore ( Reference < BlobStoreEndpoint > bstore , std : : string name , const BlobStoreEndpoint : : ParametersT & params )
: m_bstore ( bstore ) , m_name ( name ) , m_bucket ( " FDB_BACKUPS_V2 " ) {
// Currently only one parameter is supported, "bucket"
for ( auto & kv : params ) {
if ( kv . first = = " bucket " ) {
m_bucket = kv . second ;
continue ;
}
TraceEvent ( SevWarn , " BackupContainerBlobStoreInvalidParameter " ) . detail ( " Name " , printable ( kv . first ) ) . detail ( " Value " , printable ( kv . second ) ) ;
throw backup_invalid_url ( ) ;
}
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
void addref ( ) { return ReferenceCounted < BackupContainerBlobStore > : : addref ( ) ; }
void delref ( ) { return ReferenceCounted < BackupContainerBlobStore > : : delref ( ) ; }
2017-05-26 04:48:44 +08:00
2018-11-13 22:23:58 +08:00
static std : : string getURLFormat ( ) {
return BlobStoreEndpoint : : getURLFormat ( true ) + " (Note: The 'bucket' parameter is required.) " ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
virtual ~ BackupContainerBlobStore ( ) { }
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Reference < IAsyncFile > > readFile ( std : : string path ) {
return Reference < IAsyncFile > (
new AsyncFileReadAheadCache (
2018-11-13 19:00:59 +08:00
Reference < IAsyncFile > ( new AsyncFileBlobStoreRead ( m_bstore , m_bucket , dataPath ( path ) ) ) ,
2017-11-15 15:33:17 +08:00
m_bstore - > knobs . read_block_size ,
m_bstore - > knobs . read_ahead_blocks ,
m_bstore - > knobs . concurrent_reads_per_file ,
m_bstore - > knobs . read_cache_blocks_per_file
)
) ;
2017-05-26 04:48:44 +08:00
}
2018-11-13 19:00:59 +08:00
ACTOR static Future < std : : vector < std : : string > > listURLs ( Reference < BlobStoreEndpoint > bstore , std : : string bucket ) {
2018-01-24 03:46:16 +08:00
state std : : string basePath = INDEXFOLDER + ' / ' ;
2018-11-13 19:00:59 +08:00
BlobStoreEndpoint : : ListResult contents = wait ( bstore - > listBucket ( bucket , basePath ) ) ;
2017-12-13 09:44:03 +08:00
std : : vector < std : : string > results ;
2018-01-24 03:46:16 +08:00
for ( auto & f : contents . objects ) {
results . push_back ( bstore - > getResourceURL ( f . name . substr ( basePath . size ( ) ) ) ) ;
2017-12-13 09:44:03 +08:00
}
return results ;
}
2017-11-15 15:33:17 +08:00
class BackupFile : public IBackupFile , ReferenceCounted < BackupFile > {
public :
BackupFile ( std : : string fileName , Reference < IAsyncFile > file ) : IBackupFile ( fileName ) , m_file ( file ) { }
2017-05-26 04:48:44 +08:00
2017-11-16 05:33:09 +08:00
Future < Void > append ( const void * data , int len ) {
Future < Void > r = m_file - > write ( data , len , m_offset ) ;
m_offset + = len ;
2017-11-15 15:33:17 +08:00
return r ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > finish ( ) {
Reference < BackupFile > self = Reference < BackupFile > : : addRef ( this ) ;
return map ( m_file - > sync ( ) , [ = ] ( Void _ ) { self - > m_file . clear ( ) ; return Void ( ) ; } ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
void addref ( ) { return ReferenceCounted < BackupFile > : : addref ( ) ; }
void delref ( ) { return ReferenceCounted < BackupFile > : : delref ( ) ; }
private :
Reference < IAsyncFile > m_file ;
} ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Reference < IBackupFile > > writeFile ( std : : string path ) {
2018-11-13 19:00:59 +08:00
return Reference < IBackupFile > ( new BackupFile ( path , Reference < IAsyncFile > ( new AsyncFileBlobStoreWrite ( m_bstore , m_bucket , dataPath ( path ) ) ) ) ) ;
2017-11-15 15:33:17 +08:00
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > deleteFile ( std : : string path ) {
2018-11-13 19:00:59 +08:00
return m_bstore - > deleteObject ( m_bucket , dataPath ( path ) ) ;
2017-11-15 15:33:17 +08:00
}
2017-05-26 04:48:44 +08:00
2018-01-17 20:09:43 +08:00
ACTOR static Future < FilesAndSizesT > listFiles_impl ( Reference < BackupContainerBlobStore > bc , std : : string path , std : : function < bool ( std : : string const & ) > pathFilter ) {
// pathFilter expects container based paths, so create a wrapper which converts a raw path
// to a container path by removing the known backup name prefix.
2018-01-24 03:46:16 +08:00
state int prefixTrim = bc - > dataPath ( " " ) . size ( ) ;
2018-01-17 20:09:43 +08:00
std : : function < bool ( std : : string const & ) > rawPathFilter = [ = ] ( const std : : string & folderPath ) {
2018-01-24 03:46:16 +08:00
ASSERT ( folderPath . size ( ) > = prefixTrim ) ;
2018-01-17 20:09:43 +08:00
return pathFilter ( folderPath . substr ( prefixTrim ) ) ;
} ;
2018-11-13 19:00:59 +08:00
state BlobStoreEndpoint : : ListResult result = wait ( bc - > m_bstore - > listBucket ( bc - > m_bucket , bc - > dataPath ( path ) , ' / ' , std : : numeric_limits < int > : : max ( ) , rawPathFilter ) ) ;
2017-11-15 15:33:17 +08:00
FilesAndSizesT files ;
2018-01-24 03:46:16 +08:00
for ( auto & o : result . objects ) {
ASSERT ( o . name . size ( ) > = prefixTrim ) ;
files . push_back ( { o . name . substr ( prefixTrim ) , o . size } ) ;
}
2017-11-15 15:33:17 +08:00
return files ;
}
2017-05-26 04:48:44 +08:00
2018-01-17 20:09:43 +08:00
Future < FilesAndSizesT > listFiles ( std : : string path , std : : function < bool ( std : : string const & ) > pathFilter ) {
return listFiles_impl ( Reference < BackupContainerBlobStore > : : addRef ( this ) , path , pathFilter ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-19 20:28:22 +08:00
ACTOR static Future < Void > create_impl ( Reference < BackupContainerBlobStore > bc ) {
2018-11-14 08:06:39 +08:00
wait ( bc - > m_bstore - > createBucket ( bc - > m_bucket ) ) ;
2018-01-24 03:46:16 +08:00
// Check/create the index entry
2018-11-13 19:00:59 +08:00
bool exists = wait ( bc - > m_bstore - > objectExists ( bc - > m_bucket , bc - > indexEntry ( ) ) ) ;
2018-01-24 03:46:16 +08:00
if ( ! exists ) {
2018-11-14 08:06:39 +08:00
wait ( bc - > m_bstore - > writeEntireFile ( bc - > m_bucket , bc - > indexEntry ( ) , " " ) ) ;
2018-01-24 03:46:16 +08:00
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > create ( ) {
return create_impl ( Reference < BackupContainerBlobStore > : : addRef ( this ) ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
ACTOR static Future < Void > deleteContainer_impl ( Reference < BackupContainerBlobStore > bc , int * pNumDeleted ) {
2018-01-29 16:32:41 +08:00
// First delete everything under the data prefix in the bucket
2018-11-14 08:06:39 +08:00
wait ( bc - > m_bstore - > deleteRecursively ( bc - > m_bucket , bc - > dataPath ( " " ) , pNumDeleted ) ) ;
2018-01-06 15:06:39 +08:00
2018-01-24 03:46:16 +08:00
// Now that all files are deleted, delete the index entry
2018-11-14 08:06:39 +08:00
wait ( bc - > m_bstore - > deleteObject ( bc - > m_bucket , bc - > indexEntry ( ) ) ) ;
2018-01-24 03:46:16 +08:00
2017-11-15 15:33:17 +08:00
return Void ( ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
Future < Void > deleteContainer ( int * pNumDeleted ) {
return deleteContainer_impl ( Reference < BackupContainerBlobStore > : : addRef ( this ) , pNumDeleted ) ;
}
2018-11-13 19:00:59 +08:00
std : : string getBucket ( ) const {
return m_bucket ;
}
2017-11-15 15:33:17 +08:00
} ;
2017-05-26 04:48:44 +08:00
2018-01-24 03:46:16 +08:00
const std : : string BackupContainerBlobStore : : DATAFOLDER = " data " ;
const std : : string BackupContainerBlobStore : : INDEXFOLDER = " backups " ;
2017-11-15 15:33:17 +08:00
std : : string IBackupContainer : : lastOpenError ;
2017-05-26 04:48:44 +08:00
std : : vector < std : : string > IBackupContainer : : getURLFormats ( ) {
std : : vector < std : : string > formats ;
formats . push_back ( BackupContainerLocalDirectory : : getURLFormat ( ) ) ;
formats . push_back ( BackupContainerBlobStore : : getURLFormat ( ) ) ;
return formats ;
}
// Get an IBackupContainer based on a container URL string
2017-11-15 15:33:17 +08:00
Reference < IBackupContainer > IBackupContainer : : openContainer ( std : : string url )
2017-05-26 04:48:44 +08:00
{
static std : : map < std : : string , Reference < IBackupContainer > > m_cache ;
Reference < IBackupContainer > & r = m_cache [ url ] ;
if ( r )
return r ;
try {
StringRef u ( url ) ;
if ( u . startsWith ( LiteralStringRef ( " file:// " ) ) )
r = Reference < IBackupContainer > ( new BackupContainerLocalDirectory ( url ) ) ;
else if ( u . startsWith ( LiteralStringRef ( " blobstore:// " ) ) ) {
std : : string resource ;
2018-11-13 19:00:59 +08:00
// The URL parameters contain blobstore endpoint tunables as well as possible backup-specific options.
BlobStoreEndpoint : : ParametersT backupParams ;
Reference < BlobStoreEndpoint > bstore = BlobStoreEndpoint : : fromString ( url , & resource , & lastOpenError , & backupParams ) ;
2017-05-26 04:48:44 +08:00
if ( resource . empty ( ) )
2017-11-15 15:33:17 +08:00
throw backup_invalid_url ( ) ;
2017-05-26 04:48:44 +08:00
for ( auto c : resource )
2018-01-24 03:46:16 +08:00
if ( ! isalnum ( c ) & & c ! = ' _ ' & & c ! = ' - ' & & c ! = ' . ' & & c ! = ' / ' )
2017-11-15 15:33:17 +08:00
throw backup_invalid_url ( ) ;
2018-11-13 19:00:59 +08:00
r = Reference < IBackupContainer > ( new BackupContainerBlobStore ( bstore , resource , backupParams ) ) ;
2017-05-26 04:48:44 +08:00
}
2017-11-15 15:33:17 +08:00
else {
lastOpenError = " invalid URL prefix " ;
throw backup_invalid_url ( ) ;
}
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
r - > URL = url ;
2017-05-26 04:48:44 +08:00
return r ;
} catch ( Error & e ) {
2017-11-15 15:33:17 +08:00
if ( e . code ( ) = = error_code_actor_cancelled )
throw ;
TraceEvent m ( SevWarn , " BackupContainer " ) ;
m . detail ( " Description " , " Invalid container specification. See help. " ) . detail ( " URL " , url ) ;
if ( e . code ( ) = = error_code_backup_invalid_url )
2018-06-09 02:11:08 +08:00
m . detail ( " LastOpenError " , lastOpenError ) ;
2017-05-26 04:48:44 +08:00
throw ;
}
}
2017-12-13 09:44:03 +08:00
// Get a list of URLS to backup containers based on some a shorter URL. This function knows about some set of supported
// URL types which support this sort of backup discovery.
2017-12-21 05:48:31 +08:00
ACTOR Future < std : : vector < std : : string > > listContainers_impl ( std : : string baseURL ) {
2017-12-13 09:44:03 +08:00
try {
StringRef u ( baseURL ) ;
if ( u . startsWith ( LiteralStringRef ( " file:// " ) ) ) {
std : : vector < std : : string > results = wait ( BackupContainerLocalDirectory : : listURLs ( baseURL ) ) ;
return results ;
}
else if ( u . startsWith ( LiteralStringRef ( " blobstore:// " ) ) ) {
std : : string resource ;
2018-11-13 19:00:59 +08:00
BlobStoreEndpoint : : ParametersT backupParams ;
Reference < BlobStoreEndpoint > bstore = BlobStoreEndpoint : : fromString ( baseURL , & resource , & IBackupContainer : : lastOpenError , & backupParams ) ;
2017-12-13 09:44:03 +08:00
if ( ! resource . empty ( ) ) {
TraceEvent ( SevWarn , " BackupContainer " ) . detail ( " Description " , " Invalid backup container base URL, resource aka path should be blank. " ) . detail ( " URL " , baseURL ) ;
throw backup_invalid_url ( ) ;
}
2018-11-13 19:00:59 +08:00
// Create a dummy container to parse the backup-specific parameters from the URL and get a final bucket name
BackupContainerBlobStore dummy ( bstore , " dummy " , backupParams ) ;
std : : vector < std : : string > results = wait ( BackupContainerBlobStore : : listURLs ( bstore , dummy . getBucket ( ) ) ) ;
2017-12-13 09:44:03 +08:00
return results ;
}
else {
IBackupContainer : : lastOpenError = " invalid URL prefix " ;
throw backup_invalid_url ( ) ;
}
} catch ( Error & e ) {
if ( e . code ( ) = = error_code_actor_cancelled )
throw ;
TraceEvent m ( SevWarn , " BackupContainer " ) ;
m . detail ( " Description " , " Invalid backup container URL prefix. See help. " ) . detail ( " URL " , baseURL ) ;
if ( e . code ( ) = = error_code_backup_invalid_url )
2018-06-09 02:11:08 +08:00
m . detail ( " LastOpenError " , IBackupContainer : : lastOpenError ) ;
2017-12-13 09:44:03 +08:00
throw ;
}
}
2017-12-21 05:48:31 +08:00
Future < std : : vector < std : : string > > IBackupContainer : : listContainers ( std : : string baseURL ) {
return listContainers_impl ( baseURL ) ;
}
2018-01-23 16:19:51 +08:00
ACTOR Future < Version > timeKeeperVersionFromDatetime ( std : : string datetime , Database db ) {
2018-01-23 15:57:01 +08:00
state KeyBackedMap < int64_t , Version > versionMap ( timeKeeperPrefixRange . begin ) ;
state Reference < ReadYourWritesTransaction > tr = Reference < ReadYourWritesTransaction > ( new ReadYourWritesTransaction ( db ) ) ;
int year , month , day , hour , minute , second ;
if ( sscanf ( datetime . c_str ( ) , " %d-%d-%d.%d:%d:%d " , & year , & month , & day , & hour , & minute , & second ) ! = 6 ) {
fprintf ( stderr , " ERROR: Incorrect date/time format. \n " ) ;
throw backup_error ( ) ;
}
struct tm expDateTime = { 0 } ;
expDateTime . tm_year = year - 1900 ;
expDateTime . tm_mon = month - 1 ;
expDateTime . tm_mday = day ;
expDateTime . tm_hour = hour ;
expDateTime . tm_min = minute ;
expDateTime . tm_sec = second ;
expDateTime . tm_isdst = - 1 ;
state int64_t time = ( int64_t ) mktime ( & expDateTime ) ;
loop {
try {
tr - > setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
2018-09-06 07:16:22 +08:00
tr - > setOption ( FDBTransactionOptions : : LOCK_AWARE ) ;
2018-01-23 15:57:01 +08:00
state std : : vector < std : : pair < int64_t , Version > > results = wait ( versionMap . getRange ( tr , 0 , time , 1 , false , true ) ) ;
if ( results . size ( ) ! = 1 ) {
// No key less than time was found in the database
// Look for a key >= time.
2018-08-11 04:57:10 +08:00
wait ( store ( versionMap . getRange ( tr , time , std : : numeric_limits < int64_t > : : max ( ) , 1 ) , results ) ) ;
2018-01-23 15:57:01 +08:00
if ( results . size ( ) ! = 1 ) {
fprintf ( stderr , " ERROR: Unable to calculate a version for given date/time. \n " ) ;
throw backup_error ( ) ;
}
}
// Adjust version found by the delta between time and the time found and min with 0.
auto & result = results [ 0 ] ;
return std : : max < Version > ( 0 , result . second + ( time - result . first ) * CLIENT_KNOBS - > CORE_VERSIONSPERSECOND ) ;
} catch ( Error & e ) {
2018-08-11 04:57:10 +08:00
wait ( tr - > onError ( e ) ) ;
2018-01-23 15:57:01 +08:00
}
}
}
ACTOR Future < Optional < int64_t > > timeKeeperEpochsFromVersion ( Version v , Reference < ReadYourWritesTransaction > tr ) {
state KeyBackedMap < int64_t , Version > versionMap ( timeKeeperPrefixRange . begin ) ;
// Binary search to find the closest date with a version <= v
state int64_t min = 0 ;
state int64_t max = ( int64_t ) now ( ) ;
state int64_t mid ;
state std : : pair < int64_t , Version > found ;
tr - > setOption ( FDBTransactionOptions : : ACCESS_SYSTEM_KEYS ) ;
tr - > setOption ( FDBTransactionOptions : : LOCK_AWARE ) ;
loop {
mid = ( min + max + 1 ) / 2 ; // ceiling
// Find the highest time < mid
state std : : vector < std : : pair < int64_t , Version > > results = wait ( versionMap . getRange ( tr , min , mid , 1 , false , true ) ) ;
if ( results . size ( ) ! = 1 ) {
if ( mid = = min ) {
// There aren't any records having a version < v, so just look for any record having a time < now
// and base a result on it
2018-08-11 04:57:10 +08:00
wait ( store ( versionMap . getRange ( tr , 0 , ( int64_t ) now ( ) , 1 ) , results ) ) ;
2018-01-23 15:57:01 +08:00
if ( results . size ( ) ! = 1 ) {
// There aren't any timekeeper records to base a result on so return nothing
return Optional < int64_t > ( ) ;
}
found = results [ 0 ] ;
break ;
}
min = mid ;
continue ;
}
found = results [ 0 ] ;
if ( v < found . second ) {
max = found . first ;
}
else {
if ( found . first = = min ) {
break ;
}
min = found . first ;
}
}
return found . first + ( v - found . second ) / CLIENT_KNOBS - > CORE_VERSIONSPERSECOND ;
}
2017-12-22 13:15:26 +08:00
ACTOR Future < Void > writeAndVerifyFile ( Reference < IBackupContainer > c , Reference < IBackupFile > f , int size ) {
state Standalone < StringRef > content ;
if ( size > 0 ) {
content = makeString ( size ) ;
for ( int i = 0 ; i < content . size ( ) ; + + i )
mutateString ( content ) [ i ] = ( uint8_t ) g_random - > randomInt ( 0 , 256 ) ;
2017-11-17 08:19:56 +08:00
2018-08-11 04:57:10 +08:00
wait ( f - > append ( content . begin ( ) , content . size ( ) ) ) ;
2017-12-22 13:15:26 +08:00
}
2018-08-11 04:57:10 +08:00
wait ( f - > finish ( ) ) ;
2017-12-21 05:48:31 +08:00
state Reference < IAsyncFile > inputFile = wait ( c - > readFile ( f - > getFileName ( ) ) ) ;
2017-12-22 13:15:26 +08:00
int64_t fileSize = wait ( inputFile - > size ( ) ) ;
ASSERT ( size = = fileSize ) ;
if ( size > 0 ) {
state Standalone < StringRef > buf = makeString ( size ) ;
int b = wait ( inputFile - > read ( mutateString ( buf ) , buf . size ( ) , 0 ) ) ;
ASSERT ( b = = buf . size ( ) ) ;
ASSERT ( buf = = content ) ;
}
2017-11-17 08:19:56 +08:00
return Void ( ) ;
}
2017-11-15 15:33:17 +08:00
ACTOR Future < Void > testBackupContainer ( std : : string url ) {
printf ( " BackupContainerTest URL %s \n " , url . c_str ( ) ) ;
2017-05-26 04:48:44 +08:00
2017-11-15 15:33:17 +08:00
state Reference < IBackupContainer > c = IBackupContainer : : openContainer ( url ) ;
2018-01-18 16:36:28 +08:00
2017-12-13 09:44:03 +08:00
// Make sure container doesn't exist, then create it.
2018-01-18 16:36:28 +08:00
try {
2018-08-11 04:57:10 +08:00
wait ( c - > deleteContainer ( ) ) ;
2018-01-18 16:36:28 +08:00
} catch ( Error & e ) {
if ( e . code ( ) ! = error_code_backup_invalid_url )
throw ;
}
2018-08-11 04:57:10 +08:00
wait ( c - > create ( ) ) ;
2017-11-15 15:33:17 +08:00
2018-01-24 03:46:16 +08:00
state int64_t versionShift = g_random - > randomInt64 ( 0 , std : : numeric_limits < Version > : : max ( ) - 500 ) ;
2018-01-03 15:22:35 +08:00
2018-01-24 03:46:16 +08:00
state Reference < IBackupFile > log1 = wait ( c - > writeLogFile ( 100 + versionShift , 150 + versionShift , 10 ) ) ;
state Reference < IBackupFile > log2 = wait ( c - > writeLogFile ( 150 + versionShift , 300 + versionShift , 10 ) ) ;
state Reference < IBackupFile > range1 = wait ( c - > writeRangeFile ( 160 + versionShift , 10 ) ) ;
state Reference < IBackupFile > range2 = wait ( c - > writeRangeFile ( 300 + versionShift , 10 ) ) ;
state Reference < IBackupFile > range3 = wait ( c - > writeRangeFile ( 310 + versionShift , 10 ) ) ;
2017-11-17 08:19:56 +08:00
2018-08-11 04:57:10 +08:00
wait (
2018-02-08 02:38:31 +08:00
writeAndVerifyFile ( c , log1 , 0 )
& & writeAndVerifyFile ( c , log2 , g_random - > randomInt ( 0 , 10000000 ) )
& & writeAndVerifyFile ( c , range1 , g_random - > randomInt ( 0 , 1000 ) )
& & writeAndVerifyFile ( c , range2 , g_random - > randomInt ( 0 , 100000 ) )
& & writeAndVerifyFile ( c , range3 , g_random - > randomInt ( 0 , 3000000 ) )
) ;
2018-08-11 04:57:10 +08:00
wait (
2018-02-08 02:38:31 +08:00
c - > writeKeyspaceSnapshotFile ( { range1 - > getFileName ( ) , range2 - > getFileName ( ) } , range1 - > size ( ) + range2 - > size ( ) )
& & c - > writeKeyspaceSnapshotFile ( { range3 - > getFileName ( ) } , range3 - > size ( ) )
) ;
2017-11-15 15:33:17 +08:00
2018-01-24 03:46:16 +08:00
printf ( " Checking file list dump \n " ) ;
2018-01-17 20:09:43 +08:00
FullBackupListing listing = wait ( c - > dumpFileList ( ) ) ;
2017-11-19 20:28:22 +08:00
ASSERT ( listing . logs . size ( ) = = 2 ) ;
ASSERT ( listing . ranges . size ( ) = = 3 ) ;
ASSERT ( listing . snapshots . size ( ) = = 2 ) ;
2017-11-15 15:33:17 +08:00
state BackupDescription desc = wait ( c - > describeBackup ( ) ) ;
2018-01-18 16:36:28 +08:00
printf ( " Backup Description 1 \n %s " , desc . toString ( ) . c_str ( ) ) ;
2017-11-15 15:33:17 +08:00
ASSERT ( desc . maxRestorableVersion . present ( ) ) ;
Optional < RestorableFileSet > rest = wait ( c - > getRestoreSet ( desc . maxRestorableVersion . get ( ) ) ) ;
ASSERT ( rest . present ( ) ) ;
2017-11-17 08:19:56 +08:00
ASSERT ( rest . get ( ) . logs . size ( ) = = 0 ) ;
2017-11-15 15:33:17 +08:00
ASSERT ( rest . get ( ) . ranges . size ( ) = = 1 ) ;
2018-01-24 03:46:16 +08:00
Optional < RestorableFileSet > rest = wait ( c - > getRestoreSet ( 150 + versionShift ) ) ;
2017-11-17 08:19:56 +08:00
ASSERT ( ! rest . present ( ) ) ;
2017-11-15 15:33:17 +08:00
2018-01-24 03:46:16 +08:00
Optional < RestorableFileSet > rest = wait ( c - > getRestoreSet ( 300 + versionShift ) ) ;
2017-11-17 08:19:56 +08:00
ASSERT ( rest . present ( ) ) ;
ASSERT ( rest . get ( ) . logs . size ( ) = = 1 ) ;
ASSERT ( rest . get ( ) . ranges . size ( ) = = 2 ) ;
2018-01-18 16:36:28 +08:00
printf ( " Expire 1 \n " ) ;
2018-08-11 04:57:10 +08:00
wait ( c - > expireData ( 100 + versionShift ) ) ;
2017-11-17 08:19:56 +08:00
BackupDescription d = wait ( c - > describeBackup ( ) ) ;
2018-01-18 16:36:28 +08:00
printf ( " Backup Description 2 \n %s " , d . toString ( ) . c_str ( ) ) ;
2018-01-24 03:46:16 +08:00
ASSERT ( d . minLogBegin = = 100 + versionShift ) ;
2017-11-17 08:19:56 +08:00
ASSERT ( d . maxRestorableVersion = = desc . maxRestorableVersion ) ;
2018-01-18 16:36:28 +08:00
printf ( " Expire 2 \n " ) ;
2018-08-11 04:57:10 +08:00
wait ( c - > expireData ( 101 + versionShift ) ) ;
2017-11-17 08:19:56 +08:00
BackupDescription d = wait ( c - > describeBackup ( ) ) ;
2018-01-18 16:36:28 +08:00
printf ( " Backup Description 3 \n %s " , d . toString ( ) . c_str ( ) ) ;
2018-01-24 03:46:16 +08:00
ASSERT ( d . minLogBegin = = 100 + versionShift ) ;
2017-11-17 08:19:56 +08:00
ASSERT ( d . maxRestorableVersion = = desc . maxRestorableVersion ) ;
2018-01-18 16:36:28 +08:00
printf ( " Expire 3 \n " ) ;
2018-08-11 04:57:10 +08:00
wait ( c - > expireData ( 300 + versionShift ) ) ;
2017-11-17 08:19:56 +08:00
BackupDescription d = wait ( c - > describeBackup ( ) ) ;
2018-01-18 16:36:28 +08:00
printf ( " Backup Description 4 \n %s " , d . toString ( ) . c_str ( ) ) ;
ASSERT ( d . minLogBegin . present ( ) ) ;
2017-11-17 08:19:56 +08:00
ASSERT ( d . snapshots . size ( ) = = desc . snapshots . size ( ) ) ;
ASSERT ( d . maxRestorableVersion = = desc . maxRestorableVersion ) ;
2018-01-18 16:36:28 +08:00
printf ( " Expire 4 \n " ) ;
2018-08-11 04:57:10 +08:00
wait ( c - > expireData ( 301 + versionShift , true ) ) ;
2017-11-17 08:19:56 +08:00
BackupDescription d = wait ( c - > describeBackup ( ) ) ;
2018-01-18 16:36:28 +08:00
printf ( " Backup Description 4 \n %s " , d . toString ( ) . c_str ( ) ) ;
2017-11-17 08:19:56 +08:00
ASSERT ( d . snapshots . size ( ) = = 1 ) ;
2018-01-18 16:36:28 +08:00
ASSERT ( ! d . minLogBegin . present ( ) ) ;
2017-11-17 08:19:56 +08:00
2018-08-11 04:57:10 +08:00
wait ( c - > deleteContainer ( ) ) ;
2017-11-16 05:33:09 +08:00
2017-11-17 08:19:56 +08:00
BackupDescription d = wait ( c - > describeBackup ( ) ) ;
2018-01-18 16:36:28 +08:00
printf ( " Backup Description 5 \n %s " , d . toString ( ) . c_str ( ) ) ;
2017-11-17 08:19:56 +08:00
ASSERT ( d . snapshots . size ( ) = = 0 ) ;
2018-01-18 16:36:28 +08:00
ASSERT ( ! d . minLogBegin . present ( ) ) ;
2017-11-16 05:33:09 +08:00
2017-11-15 15:33:17 +08:00
printf ( " BackupContainerTest URL=%s PASSED. \n " , url . c_str ( ) ) ;
2017-05-26 04:48:44 +08:00
return Void ( ) ;
2017-11-15 15:33:17 +08:00
}
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /backup/containers/localdir " ) {
2017-11-15 15:33:17 +08:00
if ( g_network - > isSimulated ( ) )
2018-08-11 04:57:10 +08:00
wait ( testBackupContainer ( format ( " file://simfdb/backups/%llx " , timer_int ( ) ) ) ) ;
2017-11-15 15:33:17 +08:00
else
2018-08-11 04:57:10 +08:00
wait ( testBackupContainer ( format ( " file:///private/tmp/fdb_backups/%llx " , timer_int ( ) ) ) ) ;
2017-11-15 15:33:17 +08:00
return Void ( ) ;
2017-05-26 04:48:44 +08:00
} ;
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /backup/containers/url " ) {
2017-12-06 09:13:15 +08:00
if ( ! g_network - > isSimulated ( ) ) {
2017-12-07 06:38:45 +08:00
const char * url = getenv ( " FDB_TEST_BACKUP_URL " ) ;
ASSERT ( url ! = nullptr ) ;
2018-08-11 04:57:10 +08:00
wait ( testBackupContainer ( url ) ) ;
2017-12-06 09:13:15 +08:00
}
2017-11-15 15:33:17 +08:00
return Void ( ) ;
} ;
2017-12-13 09:44:03 +08:00
2018-10-06 13:09:58 +08:00
TEST_CASE ( " /backup/containers_list " ) {
2017-12-22 06:11:44 +08:00
if ( ! g_network - > isSimulated ( ) ) {
state const char * url = getenv ( " FDB_TEST_BACKUP_URL " ) ;
ASSERT ( url ! = nullptr ) ;
printf ( " Listing %s \n " , url ) ;
std : : vector < std : : string > urls = wait ( IBackupContainer : : listContainers ( url ) ) ;
for ( auto & u : urls ) {
printf ( " %s \n " , u . c_str ( ) ) ;
}
2017-12-13 09:44:03 +08:00
}
return Void ( ) ;
} ;