2023-10-20 22:35:26 +08:00
|
|
|
// SPDX-FileCopyrightText: 2023 XWiki CryptPad Team <contact@cryptpad.org> and contributors
|
|
|
|
//
|
|
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
var nThen = require("nthen");
|
|
|
|
var Bloom = require("@mcrowe/minibloom");
|
|
|
|
var Util = require("../lib/common-util");
|
|
|
|
var Pins = require("../lib/pins");
|
2020-10-12 21:48:12 +08:00
|
|
|
var Keys = require("./keys");
|
2023-01-19 11:27:39 +08:00
|
|
|
var Path = require('node:path');
|
|
|
|
var config = require("./load-config");
|
|
|
|
var Fs = require("node:fs");
|
|
|
|
var Fse = require("fs-extra");
|
2020-09-29 20:34:51 +08:00
|
|
|
|
|
|
|
var getNewestTime = function (stats) {
|
|
|
|
return stats[['atime', 'ctime', 'mtime'].reduce(function (a, b) {
|
|
|
|
return stats[b] > stats[a]? b: a;
|
|
|
|
})];
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
Env = {
|
2020-09-29 21:18:23 +08:00
|
|
|
limits: {
|
|
|
|
<unsafeKey>: <limit>,
|
|
|
|
},
|
2020-09-30 17:12:14 +08:00
|
|
|
archiveRetentionTime: <number of days>,
|
|
|
|
accountRetentionTime: <number of days>,
|
|
|
|
inactiveTime: <number of days>,
|
|
|
|
paths: {
|
|
|
|
pin: <path to pin storage>
|
2020-09-29 20:34:51 +08:00
|
|
|
},
|
|
|
|
store,
|
|
|
|
pinStore,
|
|
|
|
Log,
|
|
|
|
blobStore,
|
|
|
|
};
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
2021-02-17 16:47:41 +08:00
|
|
|
// the number of ms artificially introduced between CPU-intensive operations
|
|
|
|
var THROTTLE_FACTOR = 10;
|
2023-01-19 12:44:22 +08:00
|
|
|
var PROGRESS_FACTOR = 1000;
|
2021-02-17 16:47:41 +08:00
|
|
|
|
|
|
|
var evictArchived = function (Env, cb) {
|
|
|
|
var Log;
|
|
|
|
var store;
|
|
|
|
var blobs;
|
|
|
|
var retentionTime = +new Date() - (Env.archiveRetentionTime * 24 * 3600 * 1000);
|
|
|
|
|
2020-10-14 19:53:53 +08:00
|
|
|
var report = {
|
|
|
|
// archivedChannelsRemoved,
|
|
|
|
// archivedAccountsRemoved,
|
|
|
|
// archivedBlobProofsRemoved,
|
|
|
|
// archivedBlobsRemoved,
|
|
|
|
|
|
|
|
// totalChannels,
|
|
|
|
// activeChannels,
|
|
|
|
|
|
|
|
// totalBlobs,
|
|
|
|
// activeBlobs,
|
|
|
|
|
|
|
|
// totalAccounts,
|
|
|
|
// activeAccounts,
|
|
|
|
|
|
|
|
// channelsArchived,
|
|
|
|
|
2020-10-15 15:41:09 +08:00
|
|
|
launchTime: +new Date(),
|
2020-10-14 19:53:53 +08:00
|
|
|
// runningTime,
|
|
|
|
};
|
2020-09-29 20:34:51 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var loadStorage = function () {
|
|
|
|
store = Env.store;
|
|
|
|
Log = Env.Log;
|
|
|
|
blobs = Env.blobStore;
|
|
|
|
};
|
|
|
|
|
2023-01-19 11:27:39 +08:00
|
|
|
var migrateBlobRoot = function (from, to) {
|
|
|
|
// only migrate subpaths, leave everything else alone
|
|
|
|
if (!Path.dirname(from).startsWith(Path.dirname(to))) { return; }
|
|
|
|
|
|
|
|
// expects a directory
|
|
|
|
var recurse = function (relativePath) {
|
|
|
|
var src = Path.join(from, relativePath);
|
|
|
|
var children;
|
|
|
|
try {
|
|
|
|
children = Fs.readdirSync(src);
|
|
|
|
} catch (err) {
|
|
|
|
if (err.code === 'ENOENT') { return; }
|
|
|
|
// if you can't read a directory's contents
|
|
|
|
// then nothing else will work, so just abort
|
|
|
|
Log.verbose("EVICT_ARCHIVED_NOT_DIRECTORY", {
|
|
|
|
error: err,
|
|
|
|
});
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
var dest;
|
|
|
|
if (children.length === 0) {
|
|
|
|
try {
|
|
|
|
Fse.removeSync(src);
|
|
|
|
} catch (err2) {
|
|
|
|
Log.error('EVICT_ARCHIVED_EMPTY_DIR_REMOVAL', {
|
|
|
|
error: err2,
|
|
|
|
});
|
|
|
|
// removal is non-essential, so we can continue
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// make an equivalent path in the target directory
|
|
|
|
dest = Path.join(to, relativePath);
|
|
|
|
|
|
|
|
try {
|
|
|
|
Fse.mkdirpSync(dest);
|
|
|
|
} catch (err3) {
|
|
|
|
Log.error("EVICT_ARCHIVED_BLOB_MIGRATION", {
|
|
|
|
error: err3,
|
|
|
|
});
|
|
|
|
|
|
|
|
// failure to create the host directory
|
|
|
|
// will cause problems when we try to move
|
|
|
|
// so bail out here
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
children.forEach(function (child) {
|
|
|
|
var childSrcPath = Path.join(src, child);
|
|
|
|
var stat = Fs.statSync(childSrcPath);
|
|
|
|
if (stat.isDirectory()) {
|
|
|
|
return void recurse(Path.join(relativePath, child));
|
|
|
|
}
|
|
|
|
|
|
|
|
var childDestPath = Path.join(dest, child);
|
|
|
|
|
|
|
|
try {
|
|
|
|
Log.verbose("EVICT_ARCHIVED_MOVE_FROM_DEPRECATED_PATH", {
|
|
|
|
from: childSrcPath,
|
|
|
|
to: childDestPath,
|
|
|
|
});
|
|
|
|
Fse.moveSync(childSrcPath, childDestPath, {
|
|
|
|
overwrite: false,
|
|
|
|
});
|
|
|
|
} catch (err4) {
|
|
|
|
Log.error('EVICT_ARCHIVED_MOVE_FAILURE', {
|
|
|
|
error: err4,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
});
|
|
|
|
};
|
|
|
|
recurse('');
|
|
|
|
};
|
|
|
|
|
|
|
|
/* In CryptPad 5.2.0 we merged a patch which converted
|
|
|
|
all of CryptPad's root filepaths to their absolute form,
|
|
|
|
rather than the relative paths we'd been using until then.
|
|
|
|
Unfortunately, we overlooked a case where two absolute
|
|
|
|
paths were concatenated together, resulting in blobs being
|
|
|
|
archived to an incorrect path.
|
|
|
|
|
|
|
|
This migration detects evidence of incorrect archivals
|
|
|
|
and moves such archived files to their intended location
|
|
|
|
before continuing with the normal eviction procedure.
|
|
|
|
*/
|
|
|
|
var migrateIncorrectBlobs = function () {
|
|
|
|
var incorrectPaths = [
|
|
|
|
Path.join(Env.paths.archive, config.blobPath),
|
|
|
|
Path.join(Env.paths.archive, Path.resolve(config.blobPath))
|
|
|
|
];
|
|
|
|
var correctPath = Path.join(Env.paths.archive, 'blob');
|
|
|
|
incorrectPaths.forEach(root => {
|
|
|
|
migrateBlobRoot(root, correctPath);
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
var removeArchivedChannels = function (w) {
|
|
|
|
// this block will iterate over archived channels and removes them
|
|
|
|
// if they've been in cold storage for longer than your configured archive time
|
|
|
|
|
|
|
|
// if the admin has not set an 'archiveRetentionTime', this block makes no sense
|
|
|
|
// so just skip it
|
2020-09-30 17:12:14 +08:00
|
|
|
if (typeof(Env.archiveRetentionTime) !== "number") { return; }
|
2020-09-29 20:34:51 +08:00
|
|
|
|
|
|
|
// count the number of files which have been removed in this run
|
|
|
|
var removed = 0;
|
|
|
|
var accounts = 0;
|
|
|
|
|
|
|
|
var handler = function (err, item, cb) {
|
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error('EVICT_ARCHIVED_CHANNEL_ITERATION', err, cb);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
// don't mess with files that are freshly stored in cold storage
|
|
|
|
// based on ctime because that's changed when the file is moved...
|
|
|
|
if (+new Date(item.ctime) > retentionTime) {
|
|
|
|
return void cb();
|
|
|
|
}
|
|
|
|
|
|
|
|
// but if it's been stored for the configured time...
|
|
|
|
// expire it
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) {
|
|
|
|
if (item.channel.length === 32) { removed++; }
|
|
|
|
else if (item.channel.length === 44) { accounts++; }
|
2024-01-08 21:12:04 +08:00
|
|
|
return void Log.info("EVICT_ARCHIVED_CHANNEL_DRY_RUN", item.channel, cb);
|
2024-01-08 21:08:26 +08:00
|
|
|
}
|
2020-09-29 20:34:51 +08:00
|
|
|
store.removeArchivedChannel(item.channel, w(function (err) {
|
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error('EVICT_ARCHIVED_CHANNEL_REMOVAL_ERROR', {
|
2020-09-29 20:34:51 +08:00
|
|
|
error: err,
|
|
|
|
channel: item.channel,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, cb);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (item.channel.length === 32) {
|
|
|
|
removed++;
|
|
|
|
} else if (item.channel.length === 44) {
|
|
|
|
accounts++;
|
|
|
|
}
|
|
|
|
|
2023-08-23 16:57:11 +08:00
|
|
|
Log.info('EVICT_ARCHIVED_CHANNEL_REMOVAL', item.channel, cb);
|
2020-09-29 20:34:51 +08:00
|
|
|
}));
|
|
|
|
};
|
|
|
|
|
|
|
|
// if you hit an error, log it
|
|
|
|
// otherwise, when there are no more channels to process
|
|
|
|
// log some stats about how many were removed
|
|
|
|
var done = function (err) {
|
|
|
|
if (err) {
|
|
|
|
return Log.error('EVICT_ARCHIVED_FINAL_ERROR', err);
|
|
|
|
}
|
2020-10-14 19:53:53 +08:00
|
|
|
report.archivedChannelsRemoved = removed;
|
|
|
|
report.archivedAccountsRemoved = accounts;
|
2020-09-29 20:34:51 +08:00
|
|
|
Log.info('EVICT_ARCHIVED_CHANNELS_REMOVED', removed);
|
|
|
|
Log.info('EVICT_ARCHIVED_ACCOUNTS_REMOVED', accounts);
|
|
|
|
};
|
|
|
|
|
|
|
|
store.listArchivedChannels(handler, w(done));
|
|
|
|
};
|
|
|
|
|
|
|
|
var removeArchivedBlobProofs = function (w) {
|
2020-09-30 17:12:14 +08:00
|
|
|
if (typeof(Env.archiveRetentionTime) !== "number") { return; }
|
2020-09-29 20:34:51 +08:00
|
|
|
// Iterate over archive blob ownership proofs and remove them
|
|
|
|
// if they are older than the specified retention time
|
|
|
|
var removed = 0;
|
|
|
|
blobs.list.archived.proofs(function (err, item, next) {
|
2020-10-14 19:52:31 +08:00
|
|
|
next = Util.mkAsync(next, THROTTLE_FACTOR);
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
|
|
|
Log.error("EVICT_BLOB_LIST_ARCHIVED_PROOF_ERROR", err);
|
|
|
|
return void next();
|
|
|
|
}
|
2024-01-08 21:41:00 +08:00
|
|
|
if (item && item.ctime > retentionTime) { return void next(); }
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) {
|
|
|
|
removed++;
|
|
|
|
return void Log.info("EVICT_ARCHIVED_BLOB_PROOF_DRY_RUN", item, next);
|
|
|
|
}
|
2020-09-29 20:34:51 +08:00
|
|
|
blobs.remove.archived.proof(item.safeKey, item.blobId, (function (err) {
|
|
|
|
if (err) {
|
|
|
|
Log.error("EVICT_ARCHIVED_BLOB_PROOF_ERROR", item);
|
|
|
|
return void next();
|
|
|
|
}
|
|
|
|
Log.info("EVICT_ARCHIVED_BLOB_PROOF", item);
|
|
|
|
removed++;
|
|
|
|
next();
|
|
|
|
}));
|
|
|
|
}, w(function () {
|
2020-10-14 19:53:53 +08:00
|
|
|
report.archivedBlobProofsRemoved = removed;
|
2020-09-29 20:34:51 +08:00
|
|
|
Log.info('EVICT_ARCHIVED_BLOB_PROOFS_REMOVED', removed);
|
|
|
|
}));
|
|
|
|
};
|
|
|
|
|
|
|
|
var removeArchivedBlobs = function (w) {
|
2020-09-30 17:12:14 +08:00
|
|
|
if (typeof(Env.archiveRetentionTime) !== "number") { return; }
|
2020-09-29 20:34:51 +08:00
|
|
|
// Iterate over archived blobs and remove them
|
|
|
|
// if they are older than the specified retention time
|
|
|
|
var removed = 0;
|
|
|
|
blobs.list.archived.blobs(function (err, item, next) {
|
2020-10-14 19:52:31 +08:00
|
|
|
next = Util.mkAsync(next, THROTTLE_FACTOR);
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
|
|
|
Log.error("EVICT_BLOB_LIST_ARCHIVED_BLOBS_ERROR", err);
|
|
|
|
return void next();
|
|
|
|
}
|
2024-01-08 21:41:00 +08:00
|
|
|
if (item && item.ctime > retentionTime) { return void next(); }
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) {
|
|
|
|
removed++;
|
|
|
|
return void Log.info("EVICT_ARCHIVED_BLOB_DRY_RUN", item, next);
|
|
|
|
}
|
2020-09-29 20:34:51 +08:00
|
|
|
blobs.remove.archived.blob(item.blobId, function (err) {
|
|
|
|
if (err) {
|
|
|
|
Log.error("EVICT_ARCHIVED_BLOB_ERROR", item);
|
|
|
|
return void next();
|
|
|
|
}
|
|
|
|
Log.info("EVICT_ARCHIVED_BLOB", item);
|
|
|
|
removed++;
|
|
|
|
next();
|
|
|
|
});
|
|
|
|
}, w(function () {
|
2020-10-14 19:53:53 +08:00
|
|
|
report.archivedBlobsRemoved = removed;
|
2020-09-29 20:34:51 +08:00
|
|
|
Log.info('EVICT_ARCHIVED_BLOBS_REMOVED', removed);
|
|
|
|
}));
|
|
|
|
};
|
|
|
|
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) { Env.Log.info('DRY RUN'); }
|
2021-02-17 16:47:41 +08:00
|
|
|
nThen(loadStorage)
|
2023-01-19 11:27:39 +08:00
|
|
|
.nThen(migrateIncorrectBlobs)
|
2021-02-17 16:47:41 +08:00
|
|
|
.nThen(removeArchivedChannels)
|
|
|
|
.nThen(removeArchivedBlobProofs)
|
|
|
|
.nThen(removeArchivedBlobs)
|
|
|
|
.nThen(function () {
|
2023-01-23 10:59:36 +08:00
|
|
|
cb(void 0, report);
|
2021-02-17 16:47:41 +08:00
|
|
|
});
|
|
|
|
};
|
|
|
|
|
|
|
|
module.exports = function (Env, cb) {
|
|
|
|
var complete = Util.once(Util.mkAsync(cb));
|
|
|
|
var report = {
|
|
|
|
// archivedChannelsRemoved,
|
|
|
|
// archivedAccountsRemoved,
|
|
|
|
// archivedBlobProofsRemoved,
|
|
|
|
// archivedBlobsRemoved,
|
|
|
|
|
|
|
|
// totalChannels,
|
|
|
|
// activeChannels,
|
|
|
|
|
|
|
|
// totalBlobs,
|
|
|
|
// activeBlobs,
|
|
|
|
|
|
|
|
// totalAccounts,
|
|
|
|
// activeAccounts,
|
|
|
|
|
|
|
|
// channelsArchived,
|
|
|
|
|
|
|
|
launchTime: +new Date(),
|
|
|
|
// runningTime,
|
|
|
|
};
|
|
|
|
|
|
|
|
// the administrator should have set an 'inactiveTime' in their config
|
|
|
|
// if they didn't, just exit.
|
|
|
|
if (!Env.inactiveTime || typeof(Env.inactiveTime) !== "number") {
|
|
|
|
return void complete("NO_INACTIVE_TIME");
|
|
|
|
}
|
|
|
|
|
|
|
|
// get a list of premium accounts on this instance
|
|
|
|
// pre-converted to the 'safeKey' format so we can easily compare
|
|
|
|
// them against ids we see on the filesystem
|
|
|
|
var premiumSafeKeys = Object.keys(Env.limits || {})
|
|
|
|
.map(function (id) {
|
|
|
|
return Keys.canonicalize(id);
|
|
|
|
})
|
|
|
|
.filter(Boolean)
|
|
|
|
.map(Util.escapeKeyCharacters);
|
|
|
|
|
|
|
|
// files which have not been changed since before this date can be considered inactive
|
|
|
|
var inactiveTime = +new Date() - (Env.inactiveTime * 24 * 3600 * 1000);
|
|
|
|
|
|
|
|
// files which were archived before this date can be considered safe to remove
|
|
|
|
var retentionTime = +new Date() - (Env.archiveRetentionTime * 24 * 3600 * 1000);
|
|
|
|
|
|
|
|
var store;
|
|
|
|
var pinStore;
|
|
|
|
var Log;
|
|
|
|
var blobs;
|
|
|
|
|
|
|
|
/* It's fairly easy to know if a channel or blob is active
|
|
|
|
but knowing whether it is pinned requires that we
|
|
|
|
keep the set of pinned documents in memory.
|
|
|
|
|
|
|
|
Some users will share the same set of documents in their pin lists,
|
|
|
|
so the representation of pinned documents should scale sub-linearly
|
|
|
|
with the number of users and pinned documents.
|
|
|
|
|
|
|
|
That said, sub-linear isn't great...
|
|
|
|
A Bloom filter is "a space-efficient probabilistic data structure"
|
|
|
|
which lets us check whether an item is _probably_ or _definitely not_
|
|
|
|
in a set. This is good enough for our purposes since we just want to
|
|
|
|
know whether something can safely be removed and false negatives
|
|
|
|
(not safe to remove when it actually is) are acceptable.
|
|
|
|
|
|
|
|
We set our capacity to some large number, and the error rate to whatever
|
|
|
|
we think is acceptable.
|
|
|
|
|
|
|
|
TODO make this configurable ?
|
|
|
|
*/
|
2023-08-21 18:45:18 +08:00
|
|
|
var BLOOM_CAPACITY = (1 << 24) - 1; // over two million items
|
2023-01-23 10:59:36 +08:00
|
|
|
var BLOOM_ERROR = 1 / 10000; // an error rate of one in ten thousand
|
2021-02-17 16:47:41 +08:00
|
|
|
|
|
|
|
// we'll use one filter for the set of active documents
|
|
|
|
var activeDocs = Bloom.optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
|
|
|
|
// and another one for the set of pinned documents
|
|
|
|
var pinnedDocs = Bloom. optimalFilter(BLOOM_CAPACITY, BLOOM_ERROR);
|
|
|
|
|
|
|
|
var startTime = +new Date();
|
|
|
|
var msSinceStart = function () {
|
|
|
|
return (+new Date()) - startTime;
|
|
|
|
};
|
|
|
|
|
|
|
|
var loadStorage = function () {
|
|
|
|
store = Env.store;
|
|
|
|
pinStore = Env.pinStore;
|
|
|
|
Log = Env.Log;
|
|
|
|
blobs = Env.blobStore;
|
|
|
|
};
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
var categorizeChannelsByActivity = function (w) {
|
|
|
|
var channels = 0;
|
|
|
|
var active = 0;
|
|
|
|
var handler = function (err, item, cb) {
|
|
|
|
channels++;
|
2023-01-19 12:44:22 +08:00
|
|
|
if (channels % PROGRESS_FACTOR === 0) {
|
|
|
|
Log.info('EVICT_CHANNEL_CATEGORIZATION_PROGRESS', {
|
|
|
|
channels: channels,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error('EVICT_CHANNEL_CATEGORIZATION', err, cb);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// if the channel has been modified recently
|
|
|
|
// we don't use mtime because we don't want to count access to the file, just modifications
|
|
|
|
if (+new Date(item.mtime) > inactiveTime) {
|
|
|
|
// add it to the set of activeDocs
|
|
|
|
activeDocs.add(item.channel);
|
|
|
|
active++;
|
|
|
|
return void cb();
|
|
|
|
}
|
|
|
|
|
|
|
|
return void cb();
|
|
|
|
};
|
|
|
|
|
|
|
|
var done = function () {
|
2020-10-14 19:53:53 +08:00
|
|
|
report.activeChannels = active;
|
|
|
|
report.totalChannels = channels;
|
2020-09-29 20:34:51 +08:00
|
|
|
Log.info('EVICT_CHANNELS_CATEGORIZED', {
|
|
|
|
active: active,
|
|
|
|
channels: channels,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, w());
|
2020-09-29 20:34:51 +08:00
|
|
|
};
|
|
|
|
|
2023-01-19 12:44:22 +08:00
|
|
|
Log.info('EVICT_CHANNEL_ACTIVITY_START', 'Assessing channel activity');
|
2020-09-29 20:34:51 +08:00
|
|
|
store.listChannels(handler, w(done));
|
|
|
|
};
|
|
|
|
|
|
|
|
var categorizeBlobsByActivity = function (w) {
|
|
|
|
var n_blobs = 0;
|
|
|
|
var active = 0;
|
|
|
|
|
2023-01-19 12:44:22 +08:00
|
|
|
Log.info('EVICT_BLOBS_ACTIVITY_START', 'Assessing blob activity');
|
2020-09-29 20:34:51 +08:00
|
|
|
blobs.list.blobs(function (err, item, next) {
|
2020-10-14 19:52:31 +08:00
|
|
|
next = Util.mkAsync(next, THROTTLE_FACTOR);
|
2020-09-29 20:34:51 +08:00
|
|
|
n_blobs++;
|
2023-01-19 12:44:22 +08:00
|
|
|
if (n_blobs % PROGRESS_FACTOR === 0) {
|
|
|
|
Log.info('EVICT_BLOB_CATEGORIZATION_PROGRESS', {
|
|
|
|
blobs: n_blobs,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error("EVICT_BLOB_CATEGORIZATION", err, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
if (!item) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return void Log.error("EVICT_BLOB_CATEGORIZATION_INVALID", item, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
2023-01-23 10:59:36 +08:00
|
|
|
if (item.mtime > inactiveTime) {
|
2020-09-29 20:34:51 +08:00
|
|
|
activeDocs.add(item.blobId);
|
|
|
|
active++;
|
|
|
|
return void next();
|
|
|
|
}
|
|
|
|
next();
|
|
|
|
}, w(function () {
|
2020-10-14 19:53:53 +08:00
|
|
|
report.totalBlobs = n_blobs;
|
|
|
|
report.activeBlobs = active;
|
2020-09-29 20:34:51 +08:00
|
|
|
Log.info('EVICT_BLOBS_CATEGORIZED', {
|
|
|
|
active: active,
|
|
|
|
blobs: n_blobs,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, w());
|
2020-09-29 20:34:51 +08:00
|
|
|
}));
|
|
|
|
};
|
|
|
|
|
|
|
|
var categorizeAccountsByActivity = function (w) {
|
|
|
|
// iterate over all accounts
|
|
|
|
var accounts = 0;
|
|
|
|
var inactive = 0;
|
|
|
|
|
|
|
|
var accountRetentionTime;
|
2020-09-30 17:12:14 +08:00
|
|
|
if (typeof(Env.accountRetentionTime) === 'number' && Env.accountRetentionTime > 0) {
|
|
|
|
accountRetentionTime = +new Date() - (24 * 3600 * 1000 * Env.accountRetentionTime);
|
2020-09-29 20:34:51 +08:00
|
|
|
} else {
|
|
|
|
accountRetentionTime = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
var pinAll = function (pinList) {
|
|
|
|
pinList.forEach(function (docId) {
|
|
|
|
pinnedDocs.add(docId);
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
2020-09-29 21:18:23 +08:00
|
|
|
var docIsActive = function (docId) {
|
|
|
|
return activeDocs.test(docId);
|
|
|
|
};
|
2020-09-29 20:34:51 +08:00
|
|
|
|
2020-10-12 21:48:12 +08:00
|
|
|
var accountIsActive = function (mtime, pinList) {
|
2020-09-30 17:12:14 +08:00
|
|
|
// console.log("id [%s] in premiumSafeKeys", id, premiumSafeKeys.indexOf(id) !== -1);
|
2020-09-29 20:34:51 +08:00
|
|
|
// if their pin log has changed recently then consider them active
|
|
|
|
if (mtime && mtime > accountRetentionTime) {
|
|
|
|
return true;
|
|
|
|
}
|
2020-09-29 21:18:23 +08:00
|
|
|
|
|
|
|
// iterate over their pinned documents until you find one that has been active
|
2020-10-12 21:48:12 +08:00
|
|
|
return pinList.some(docIsActive);
|
|
|
|
};
|
2020-09-29 21:18:23 +08:00
|
|
|
|
2020-10-12 21:48:12 +08:00
|
|
|
var isPremiumAccount = function (id) {
|
2020-09-29 21:18:23 +08:00
|
|
|
return premiumSafeKeys.indexOf(id) !== -1;
|
2020-09-29 20:34:51 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
var PRESERVE_INACTIVE_ACCOUNTS = accountRetentionTime <= 0;
|
|
|
|
|
|
|
|
// otherwise, we'll only retain data from active accounts
|
|
|
|
// so we need more heuristics
|
|
|
|
var handler = function (content, id, next) {
|
2020-10-14 19:52:31 +08:00
|
|
|
next = Util.mkAsync(next, THROTTLE_FACTOR);
|
2020-09-29 20:34:51 +08:00
|
|
|
accounts++;
|
2023-01-19 12:44:22 +08:00
|
|
|
if (accounts % PROGRESS_FACTOR === 0) {
|
|
|
|
Log.info('EVICT_ACCOUNT_CATEGORIZATION_PROGRESS', {
|
|
|
|
accounts: accounts,
|
|
|
|
});
|
|
|
|
}
|
2020-09-29 20:34:51 +08:00
|
|
|
|
|
|
|
var mtime = content.latest;
|
|
|
|
var pinList = Object.keys(content.pins);
|
|
|
|
|
2020-10-12 21:48:12 +08:00
|
|
|
if (accountIsActive(mtime, pinList)) {
|
2020-09-29 20:34:51 +08:00
|
|
|
// add active accounts' pinned documents to a second bloom filter
|
|
|
|
pinAll(pinList);
|
|
|
|
return void next();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise they are inactive.
|
|
|
|
// We keep track of how many accounts are inactive whether or not
|
|
|
|
// we plan to delete them, because it may be interesting information
|
|
|
|
inactive++;
|
|
|
|
if (PRESERVE_INACTIVE_ACCOUNTS) {
|
2023-08-23 16:57:11 +08:00
|
|
|
pinAll(pinList);
|
|
|
|
return Log.info('EVICT_INACTIVE_ACCOUNT_PRESERVED', {
|
2020-10-14 19:53:53 +08:00
|
|
|
id: id,
|
|
|
|
mtime: mtime,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
|
2020-10-12 21:48:12 +08:00
|
|
|
if (isPremiumAccount(id)) {
|
2023-08-23 16:57:11 +08:00
|
|
|
pinAll(pinList);
|
|
|
|
return Log.info("EVICT_INACTIVE_PREMIUM_ACCOUNT", {
|
2020-10-12 21:48:12 +08:00
|
|
|
id: id,
|
|
|
|
mtime: mtime,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, next);
|
2020-10-12 21:48:12 +08:00
|
|
|
}
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
// remove the pin logs of inactive accounts if inactive account removal is configured
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) {
|
2024-01-09 02:11:20 +08:00
|
|
|
return void Log.info("EVICT_INACTIVE_ACCOUNT_DRY_RUN", id, next);
|
2024-01-08 21:08:26 +08:00
|
|
|
}
|
2023-09-07 23:03:20 +08:00
|
|
|
pinStore.archiveChannel(id, undefined, function (err) {
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error('EVICT_INACTIVE_ACCOUNT_PIN_LOG', err, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
2023-08-23 16:57:11 +08:00
|
|
|
Log.info('EVICT_INACTIVE_ACCOUNT_LOG', id, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
});
|
|
|
|
};
|
|
|
|
|
|
|
|
var done = function () {
|
|
|
|
var label = PRESERVE_INACTIVE_ACCOUNTS?
|
|
|
|
"EVICT_COUNT_ACCOUNTS":
|
|
|
|
"EVICT_INACTIVE_ACCOUNTS";
|
|
|
|
|
2020-10-14 19:53:53 +08:00
|
|
|
report.totalAccounts = accounts;
|
|
|
|
report.activeAccounts = accounts - inactive;
|
2020-09-29 20:34:51 +08:00
|
|
|
Log.info(label, {
|
|
|
|
accounts: accounts,
|
|
|
|
inactive: inactive,
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
2023-01-19 12:44:22 +08:00
|
|
|
Log.info('EVICT_ACCOUNTS_ACTIVITY_START', 'Assessing account activity');
|
2020-09-29 20:34:51 +08:00
|
|
|
Pins.load(w(done), {
|
2020-09-30 17:12:14 +08:00
|
|
|
pinPath: Env.paths.pin,
|
2020-09-29 20:34:51 +08:00
|
|
|
handler: handler,
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
|
|
|
var archiveInactiveBlobs = function (w) {
|
|
|
|
// iterate over blobs and remove them
|
|
|
|
// if they have not been accessed within the specified retention time
|
|
|
|
var removed = 0;
|
2020-10-14 19:53:53 +08:00
|
|
|
var total = 0;
|
2023-01-19 12:44:22 +08:00
|
|
|
|
|
|
|
Log.info('EVICT_BLOB_START', {});
|
2020-09-29 20:34:51 +08:00
|
|
|
blobs.list.blobs(function (err, item, next) {
|
2020-10-14 19:53:53 +08:00
|
|
|
next = Util.mkAsync(next, THROTTLE_FACTOR);
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error("EVICT_BLOB_LIST_BLOBS_ERROR", err, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
if (!item) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return void Log.error('EVICT_BLOB_LIST_BLOBS_NO_ITEM', item, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
2020-10-14 19:53:53 +08:00
|
|
|
total++;
|
2023-01-19 12:44:22 +08:00
|
|
|
if (total % PROGRESS_FACTOR === 0) {
|
|
|
|
Log.info("EVICT_BLOB_PROGRESS", {
|
|
|
|
blobs: total,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
if (pinnedDocs.test(item.blobId)) { return void next(); }
|
|
|
|
if (activeDocs.test(item.blobId)) { return void next(); }
|
|
|
|
|
|
|
|
// This seems redundant because we're already checking the bloom filter
|
|
|
|
// but we can't implement a 'fast mode' for the iterator
|
|
|
|
// unless we address this race condition with this last-minute double-check
|
2023-01-23 10:59:36 +08:00
|
|
|
if (item.mtime > inactiveTime) { return void next(); }
|
2020-09-29 20:34:51 +08:00
|
|
|
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) {
|
|
|
|
removed++;
|
|
|
|
return void Log.info("EVICT_ARCHIVE_BLOB_DRY_RUN", {
|
|
|
|
item: item,
|
|
|
|
}, next);
|
|
|
|
}
|
2023-09-07 23:03:20 +08:00
|
|
|
blobs.archive.blob(item.blobId, 'INACTIVE', function (err) {
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error("EVICT_ARCHIVE_BLOB_ERROR", {
|
2020-09-29 20:34:51 +08:00
|
|
|
error: err,
|
|
|
|
item: item,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
2024-01-08 21:08:26 +08:00
|
|
|
removed++;
|
2020-09-29 20:34:51 +08:00
|
|
|
Log.info("EVICT_ARCHIVE_BLOB", {
|
|
|
|
item: item,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
});
|
|
|
|
}, w(function () {
|
2020-10-14 19:53:53 +08:00
|
|
|
report.totalBlobs = total;
|
|
|
|
report.activeBlobs = total - removed;
|
2023-08-23 16:57:11 +08:00
|
|
|
Log.info('EVICT_BLOBS_REMOVED', removed, w());
|
2020-09-29 20:34:51 +08:00
|
|
|
}));
|
|
|
|
};
|
|
|
|
|
|
|
|
var archiveInactiveBlobProofs = function (w) {
|
|
|
|
// iterate over blob proofs and remove them
|
|
|
|
// if they don't correspond to a pinned or active file
|
|
|
|
var removed = 0;
|
2023-01-19 12:44:22 +08:00
|
|
|
var total = 0;
|
|
|
|
|
|
|
|
Log.info("EVICT_ARCHIVE_INACTIVE_BLOB_PROOFS_START", {});
|
2020-09-29 20:34:51 +08:00
|
|
|
blobs.list.proofs(function (err, item, next) {
|
2020-10-14 19:52:31 +08:00
|
|
|
next = Util.mkAsync(next, THROTTLE_FACTOR);
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return void Log.error("EVICT_BLOB_LIST_PROOFS_ERROR", err, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
if (!item) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return void Log.error('EVICT_BLOB_LIST_PROOFS_NO_ITEM', item, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
2023-01-19 12:44:22 +08:00
|
|
|
total++;
|
|
|
|
|
|
|
|
if (total % PROGRESS_FACTOR === 0) {
|
|
|
|
Log.info('EVICT_BLOB_PROOF_PROGRESS', {
|
|
|
|
proofs: total,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
if (pinnedDocs.test(item.blobId)) { return void next(); }
|
2023-01-23 10:59:36 +08:00
|
|
|
if (item.mtime > inactiveTime) { return void next(); }
|
2020-09-29 20:34:51 +08:00
|
|
|
nThen(function (w) {
|
|
|
|
blobs.size(item.blobId, w(function (err, size) {
|
|
|
|
if (err) {
|
|
|
|
w.abort();
|
2023-08-23 16:57:11 +08:00
|
|
|
return void Log.error("EVICT_BLOB_LIST_PROOFS_ERROR", err, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
if (size !== 0) {
|
|
|
|
w.abort();
|
|
|
|
next();
|
|
|
|
}
|
|
|
|
}));
|
|
|
|
}).nThen(function () {
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) {
|
|
|
|
removed++;
|
|
|
|
return void Log.info("EVICT_BLOB_PROOF_LONELY_DRY_RUN", item, next);
|
|
|
|
}
|
2020-09-29 20:34:51 +08:00
|
|
|
blobs.remove.proof(item.safeKey, item.blobId, function (err) {
|
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error("EVICT_BLOB_PROOF_LONELY_ERROR", item, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
removed++;
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.info("EVICT_BLOB_PROOF_LONELY", item, next);
|
2020-09-29 20:34:51 +08:00
|
|
|
});
|
|
|
|
});
|
|
|
|
}, w(function () {
|
2023-01-19 12:44:22 +08:00
|
|
|
Log.info("EVICT_BLOB_PROOFS_REMOVED", {
|
|
|
|
removed,
|
|
|
|
total,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, w());
|
2020-09-29 20:34:51 +08:00
|
|
|
}));
|
|
|
|
};
|
|
|
|
|
|
|
|
var archiveInactiveChannels = function (w) {
|
|
|
|
var channels = 0;
|
|
|
|
var archived = 0;
|
|
|
|
|
|
|
|
var handler = function (err, item, cb) {
|
2020-10-14 19:52:31 +08:00
|
|
|
cb = Util.mkAsync(cb, THROTTLE_FACTOR);
|
2020-09-29 20:34:51 +08:00
|
|
|
channels++;
|
2023-01-19 12:44:22 +08:00
|
|
|
if (channels % PROGRESS_FACTOR === 0) {
|
|
|
|
Log.info('EVICT_INACTIVE_CHANNELS_PROGRESS', {
|
|
|
|
channels,
|
|
|
|
archived,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error('EVICT_CHANNEL_ITERATION', err, cb);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
2023-01-23 10:59:36 +08:00
|
|
|
|
|
|
|
// ignore the special admin broadcast channel
|
|
|
|
if (item.channel.length === 33) { return void cb(); }
|
|
|
|
|
2020-09-29 20:34:51 +08:00
|
|
|
// check if the database has any ephemeral channels
|
|
|
|
// if it does it's because of a bug, and they should be removed
|
|
|
|
if (item.channel.length === 34) {
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) {
|
|
|
|
return void Log.info("EVICT_EPHEMERAL_DRY_RUN", item.channel, cb);
|
|
|
|
}
|
2020-09-29 20:34:51 +08:00
|
|
|
return void store.removeChannel(item.channel, w(function (err) {
|
|
|
|
if (err) {
|
2023-08-23 16:57:11 +08:00
|
|
|
return Log.error('EVICT_EPHEMERAL_CHANNEL_REMOVAL_ERROR', {
|
2020-09-29 20:34:51 +08:00
|
|
|
error: err,
|
|
|
|
channel: item.channel,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, cb);
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
2023-08-23 16:57:11 +08:00
|
|
|
Log.info('EVICT_EPHEMERAL_CHANNEL_REMOVAL', item.channel, cb);
|
2020-09-29 20:34:51 +08:00
|
|
|
}));
|
|
|
|
}
|
|
|
|
|
|
|
|
// bail out if the channel is in the set of activeDocs
|
|
|
|
if (activeDocs.test(item.channel)) { return void cb(); }
|
|
|
|
|
|
|
|
// ignore the channel if it's pinned
|
|
|
|
if (pinnedDocs.test(item.channel)) { return void cb(); }
|
|
|
|
|
|
|
|
nThen(function (w) {
|
|
|
|
// double check that the channel really is inactive before archiving it
|
|
|
|
// because it might have been created after the initial activity scan
|
|
|
|
store.getChannelStats(item.channel, w(function (err, newerItem) {
|
|
|
|
if (err) { return; }
|
2023-01-23 10:59:36 +08:00
|
|
|
if (newerItem && getNewestTime(newerItem) > retentionTime) {
|
2020-09-29 20:34:51 +08:00
|
|
|
// it's actually active, so don't archive it.
|
|
|
|
w.abort();
|
|
|
|
cb();
|
|
|
|
}
|
|
|
|
// else fall through to the archival
|
|
|
|
}));
|
2023-08-23 16:57:11 +08:00
|
|
|
}).nThen(function (w) {
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) {
|
|
|
|
archived++;
|
2024-01-09 22:09:41 +08:00
|
|
|
w.abort();
|
2024-01-08 21:08:26 +08:00
|
|
|
return void Log.info("EVICT_CHANNEL_ARCHIVAL_DRY_RUN", item.channel, cb);
|
|
|
|
}
|
2023-09-07 23:03:20 +08:00
|
|
|
return void store.archiveChannel(item.channel, 'INACTIVE', w(function (err) {
|
2020-09-29 20:34:51 +08:00
|
|
|
if (err) {
|
|
|
|
Log.error('EVICT_CHANNEL_ARCHIVAL_ERROR', {
|
|
|
|
error: err,
|
|
|
|
channel: item.channel,
|
2023-08-23 16:57:11 +08:00
|
|
|
}, w());
|
|
|
|
return;
|
2020-09-29 20:34:51 +08:00
|
|
|
}
|
|
|
|
archived++;
|
2024-01-08 21:08:26 +08:00
|
|
|
Log.info('EVICT_CHANNEL_ARCHIVAL', item.channel, w());
|
2020-09-29 20:34:51 +08:00
|
|
|
}));
|
2023-08-23 16:57:11 +08:00
|
|
|
}).nThen(cb);
|
2020-09-29 20:34:51 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
var done = function () {
|
2020-10-14 19:53:53 +08:00
|
|
|
report.channelsArchived = archived;
|
2023-01-19 12:44:22 +08:00
|
|
|
return void Log.info('EVICT_CHANNELS_ARCHIVED', {
|
|
|
|
channels,
|
|
|
|
archived,
|
|
|
|
});
|
2020-09-29 20:34:51 +08:00
|
|
|
};
|
|
|
|
|
2023-01-19 12:44:22 +08:00
|
|
|
Log.info('EVICT_INACTIVE_CHANNELS_START', {});
|
2020-09-29 20:34:51 +08:00
|
|
|
store.listChannels(handler, w(done), true); // using a hacky "fast mode" since we only need the channel id
|
|
|
|
};
|
|
|
|
|
2024-01-08 21:08:26 +08:00
|
|
|
if (Env.DRY_RUN) { Env.Log.info('DRY RUN'); }
|
2020-09-29 20:34:51 +08:00
|
|
|
nThen(loadStorage)
|
|
|
|
|
|
|
|
// iterate over all documents and add them to a bloom filter if they have been active
|
|
|
|
.nThen(categorizeChannelsByActivity)
|
|
|
|
.nThen(categorizeBlobsByActivity)
|
|
|
|
|
|
|
|
// iterate over all accounts and add them to a bloom filter if they are active
|
|
|
|
.nThen(categorizeAccountsByActivity)
|
|
|
|
|
|
|
|
// iterate again and archive inactive unpinned documents
|
|
|
|
// (documents which are not in either bloom filter)
|
|
|
|
|
|
|
|
.nThen(archiveInactiveBlobs)
|
|
|
|
.nThen(archiveInactiveBlobProofs)
|
|
|
|
.nThen(archiveInactiveChannels)
|
|
|
|
.nThen(function () {
|
2020-10-14 19:53:53 +08:00
|
|
|
var runningTime = report.runningTime = msSinceStart();
|
|
|
|
Log.info("EVICT_TIME_TO_RUN_SCRIPT", runningTime);
|
2020-09-29 20:34:51 +08:00
|
|
|
}).nThen(function () {
|
2020-10-14 19:53:53 +08:00
|
|
|
complete(void 0, report);
|
2020-09-29 20:34:51 +08:00
|
|
|
});
|
|
|
|
};
|
2021-02-17 16:47:41 +08:00
|
|
|
|
|
|
|
module.exports.archived = evictArchived;
|