From 2ee1782c19bf21b51a5c26bc218798e6f5ef5b66 Mon Sep 17 00:00:00 2001 From: Stephen Atherton Date: Fri, 25 Oct 2019 14:52:06 -0700 Subject: [PATCH 1/2] Bug fixes in Redwood. BTree height was not being reset when a new empty root is written. IKeyValueStore wrapper was not obeying the row limit in a reverse range query. Added yields to and delays to break up tasks and set IO priorities. --- fdbserver/VersionedBTree.actor.cpp | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 9f5db9a5f7..10f9636178 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -1092,6 +1092,10 @@ public: // If the user chosen physical page size is larger, then there will be a gap of unused space after // between the end of page 1 and the start of page 2. ACTOR static Future> readHeaderPage(COWPager *self, PhysicalPageID pageID) { + if(g_network->getCurrentTask() > TaskPriority::DiskRead) { + wait(delay(0, TaskPriority::DiskRead)); + } + state Reference page(new FastAllocatedPage(smallestPhysicalBlock, smallestPhysicalBlock)); int readBytes = wait(self->pageFile->read(page->mutate(), smallestPhysicalBlock, (int64_t)pageID * smallestPhysicalBlock)); debug_printf("COWPager(%s) header op=read_complete %s bytes=%d\n", self->filename.c_str(), toString(pageID).c_str(), readBytes); @@ -1100,6 +1104,10 @@ public: } ACTOR static Future> readPhysicalPage(COWPager *self, PhysicalPageID pageID) { + if(g_network->getCurrentTask() > TaskPriority::DiskRead) { + wait(delay(0, TaskPriority::DiskRead)); + } + state Reference page = self->newPageBuffer(); debug_printf("COWPager(%s) op=read_physical_start %s\n", self->filename.c_str(), toString(pageID).c_str()); int readBytes = wait(self->pageFile->read(page->mutate(), self->physicalPageSize, (int64_t)pageID * self->physicalPageSize)); @@ -1200,11 +1208,17 @@ public: debug_printf("COWPager(%s) Syncing\n", self->filename.c_str()); // Sync everything except the header + if(g_network->getCurrentTask() > TaskPriority::DiskWrite) { + wait(delay(0, TaskPriority::DiskWrite)); + } wait(self->pageFile->sync()); debug_printf("COWPager(%s) commit version %" PRId64 " sync 1\n", self->filename.c_str(), self->pHeader->committedVersion); // Update header on disk and sync again. wait(self->writeHeaderPage(0, self->headerPage)); + if(g_network->getCurrentTask() > TaskPriority::DiskWrite) { + wait(delay(0, TaskPriority::DiskWrite)); + } wait(self->pageFile->sync()); debug_printf("COWPager(%s) commit version %" PRId64 " sync 2\n", self->filename.c_str(), self->pHeader->committedVersion); @@ -2275,10 +2289,10 @@ struct BTreePage { } }; -static void makeEmptyPage(Reference page, uint8_t newFlags) { +static void makeEmptyRoot(Reference page) { BTreePage *btpage = (BTreePage *)page->begin(); btpage->formatVersion = BTreePage::FORMAT_VERSION; - btpage->flags = newFlags; + btpage->flags = BTreePage::IS_LEAF; btpage->height = 1; btpage->kvBytes = 0; btpage->itemCount = 0; @@ -2641,7 +2655,7 @@ public: self->m_header.height = 1; ++latest; Reference page = self->m_pager->newPageBuffer(); - makeEmptyPage(page, BTreePage::IS_LEAF); + makeEmptyRoot(page); self->m_pager->updatePage(id, page); self->m_pager->setCommitVersion(latest); @@ -3232,6 +3246,7 @@ private: childPageID.push_back(records.arena(), id); } } + wait(yield()); // Update activity counts ++counts.pageWrites; @@ -3331,7 +3346,7 @@ private: debug_printf("readPage() op=readForDeferredClear %s @%" PRId64 " \n", toString(id).c_str(), snapshot->getVersion()); } - wait(delay(0, TaskPriority::DiskRead)); + wait(yield()); state Reference page; @@ -3815,7 +3830,8 @@ private: debug_printf("Writing new empty root.\n"); LogicalPageID newRootID = wait(self->m_pager->newPageID()); Reference page = self->m_pager->newPageBuffer(); - makeEmptyPage(page, BTreePage::IS_LEAF); + makeEmptyRoot(page); + self->m_header.height = 1; self->m_pager->updatePage(newRootID, page); rootPageID = BTreePageID((LogicalPageID *)&newRootID, 1); } @@ -4513,7 +4529,7 @@ public: KeyValueRef kv(KeyRef(result.arena(), cur->getKey()), ValueRef(result.arena(), cur->getValue())); accumulatedBytes += kv.expectedSize(); result.push_back(result.arena(), kv); - if(--rowLimit == 0 || accumulatedBytes >= byteLimit) { + if(++rowLimit == 0 || accumulatedBytes >= byteLimit) { break; } wait(cur->prev(true)); From f175ed30b3ca9fecaacc187b52e4b76f8e6ec598 Mon Sep 17 00:00:00 2001 From: "A.J. Beamon" Date: Thu, 31 Oct 2019 09:52:21 -0700 Subject: [PATCH 2/2] Cleanup the fdbbackup cleanup command output. Add cleanup to the usage output printed for fdbbackup. --- fdbbackup/backup.actor.cpp | 7 ++++++- fdbclient/BackupAgentBase.actor.cpp | 32 ++++++++++++++++------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 9e4a109648..b57c26ddfd 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -905,7 +905,7 @@ void printBackupContainerInfo() { static void printBackupUsage(bool devhelp) { printf("FoundationDB " FDB_VT_PACKAGE_NAME " (v" FDB_VT_VERSION ")\n"); - printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | list) [OPTIONS]\n\n", exeBackup.toString().c_str()); + printf("Usage: %s (start | status | abort | wait | discontinue | pause | resume | expire | delete | describe | list | cleanup) [OPTIONS]\n\n", exeBackup.toString().c_str()); printf(" -C CONNFILE The path of a file containing the connection string for the\n" " FoundationDB cluster. The default is first the value of the\n" " FDB_CLUSTER_FILE environment variable, then `./fdb.cluster',\n" @@ -956,6 +956,11 @@ static void printBackupUsage(bool devhelp) { printf(" --trace_format FORMAT\n" " Select the format of the trace files. xml (the default) and json are supported.\n" " Has no effect unless --log is specified.\n"); + printf(" --max_cleanup_seconds SECONDS\n" + " Specifies the amount of time a backup or DR needs to be stale before cleanup will\n" + " remove mutations for it. By default this is set to one hour.\n"); + printf(" --delete_data\n" + " This flag will cause cleanup to remove mutations for the most stale backup or DR.\n"); #ifndef TLS_DISABLED printf(TLS_HELP); #endif diff --git a/fdbclient/BackupAgentBase.actor.cpp b/fdbclient/BackupAgentBase.actor.cpp index 5627a1a349..6a02bac4b3 100644 --- a/fdbclient/BackupAgentBase.actor.cpp +++ b/fdbclient/BackupAgentBase.actor.cpp @@ -862,29 +862,33 @@ ACTOR Future cleanupLogMutations(Database cx, Value destUidValue, bool del wait(success(foundDRKey) && success(foundBackupKey)); if(foundDRKey.get().present() && foundBackupKey.get().present()) { - printf("WARNING: Found a tag which looks like both a backup and a DR. This tag was %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); + printf("WARNING: Found a tag that looks like both a backup and a DR. This tag is %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); } else if(foundDRKey.get().present() && !foundBackupKey.get().present()) { - printf("Found a DR which was %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); + printf("Found a DR that is %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); } else if(!foundDRKey.get().present() && foundBackupKey.get().present()) { - printf("Found a Backup which was %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); + printf("Found a Backup that is %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); } else { - printf("WARNING: Found a unknown tag which was %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); + printf("WARNING: Found an unknown tag that is %.4f hours behind.\n", (readVer - currVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); } loggedLogUids.insert(currLogUid); } } - if( readVer - minVersion > CLIENT_KNOBS->MIN_CLEANUP_SECONDS*CLIENT_KNOBS->CORE_VERSIONSPERSECOND && deleteData && (!removingLogUid.present() || minVersionLogUid == removingLogUid.get()) ) { - removingLogUid = minVersionLogUid; - wait(eraseLogData(tr, minVersionLogUid, destUidValue)); - wait(tr->commit()); - printf("\nSuccessfully removed the tag which was %.4f hours behind.\n", (readVer - minVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); - } else if(removingLogUid.present() && minVersionLogUid != removingLogUid.get()) { - printf("\nWARNING: The oldest tag was possibly removed, run again without `--delete_data' to check.\n"); - } else if( deleteData ) { - printf("\nWARNING: Did not delete data because the tag was not at least %.4f hours behind. Change `--min_cleanup_seconds' to adjust this threshold.\n", CLIENT_KNOBS->MIN_CLEANUP_SECONDS/3600.0); + if(deleteData) { + if(readVer - minVersion > CLIENT_KNOBS->MIN_CLEANUP_SECONDS*CLIENT_KNOBS->CORE_VERSIONSPERSECOND && (!removingLogUid.present() || minVersionLogUid == removingLogUid.get())) { + removingLogUid = minVersionLogUid; + wait(eraseLogData(tr, minVersionLogUid, destUidValue)); + wait(tr->commit()); + printf("\nSuccessfully removed the tag that was %.4f hours behind.\n\n", (readVer - minVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); + } else if(removingLogUid.present() && minVersionLogUid != removingLogUid.get()) { + printf("\nWARNING: The oldest tag was possibly removed, run again without `--delete_data' to check.\n\n"); + } else { + printf("\nWARNING: Did not delete data because the tag is not at least %.4f hours behind. Change `--min_cleanup_seconds' to adjust this threshold.\n\n", CLIENT_KNOBS->MIN_CLEANUP_SECONDS/3600.0); + } + } else if(readVer - minVersion > CLIENT_KNOBS->MIN_CLEANUP_SECONDS*CLIENT_KNOBS->CORE_VERSIONSPERSECOND) { + printf("\nPassing `--delete_data' would delete the tag that is %.4f hours behind.\n\n", (readVer - minVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); } else { - printf("\nPassing `--delete_data' would delete the tag which was %.4f hours behind.\n", (readVer - minVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); + printf("\nPassing `--delete_data' would not delete the tag that is %.4f hours behind. Change `--min_cleanup_seconds' to adjust the cleanup threshold.\n\n", (readVer - minVersion)/(3600.0*CLIENT_KNOBS->CORE_VERSIONSPERSECOND)); } return Void();