2023-04-12 09:59:56 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2017-10-18 12:37:34 +08:00
|
|
|
/*
|
2023-04-12 09:59:57 +08:00
|
|
|
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
|
2023-04-12 09:59:56 +08:00
|
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
2017-10-18 12:37:34 +08:00
|
|
|
*/
|
|
|
|
#include "xfs.h"
|
|
|
|
#include "xfs_fs.h"
|
|
|
|
#include "xfs_shared.h"
|
|
|
|
#include "xfs_format.h"
|
|
|
|
#include "xfs_trans_resv.h"
|
|
|
|
#include "xfs_mount.h"
|
|
|
|
#include "xfs_log_format.h"
|
|
|
|
#include "xfs_trans.h"
|
|
|
|
#include "xfs_inode.h"
|
2018-05-10 01:02:00 +08:00
|
|
|
#include "xfs_quota.h"
|
|
|
|
#include "xfs_qm.h"
|
2018-05-14 21:34:36 +08:00
|
|
|
#include "xfs_errortag.h"
|
|
|
|
#include "xfs_error.h"
|
2019-11-07 09:19:33 +08:00
|
|
|
#include "xfs_scrub.h"
|
2017-10-18 12:37:34 +08:00
|
|
|
#include "scrub/scrub.h"
|
2017-10-18 12:37:36 +08:00
|
|
|
#include "scrub/common.h"
|
2017-10-18 12:37:34 +08:00
|
|
|
#include "scrub/trace.h"
|
2018-05-14 21:34:36 +08:00
|
|
|
#include "scrub/repair.h"
|
2019-04-16 23:22:00 +08:00
|
|
|
#include "scrub/health.h"
|
2017-10-18 12:37:34 +08:00
|
|
|
|
2017-10-18 12:37:35 +08:00
|
|
|
/*
|
|
|
|
* Online Scrub and Repair
|
|
|
|
*
|
|
|
|
* Traditionally, XFS (the kernel driver) did not know how to check or
|
|
|
|
* repair on-disk data structures. That task was left to the xfs_check
|
|
|
|
* and xfs_repair tools, both of which require taking the filesystem
|
|
|
|
* offline for a thorough but time consuming examination. Online
|
|
|
|
* scrub & repair, on the other hand, enables us to check the metadata
|
|
|
|
* for obvious errors while carefully stepping around the filesystem's
|
|
|
|
* ongoing operations, locking rules, etc.
|
|
|
|
*
|
|
|
|
* Given that most XFS metadata consist of records stored in a btree,
|
|
|
|
* most of the checking functions iterate the btree blocks themselves
|
|
|
|
* looking for irregularities. When a record block is encountered, each
|
|
|
|
* record can be checked for obviously bad values. Record values can
|
|
|
|
* also be cross-referenced against other btrees to look for potential
|
|
|
|
* misunderstandings between pieces of metadata.
|
|
|
|
*
|
|
|
|
* It is expected that the checkers responsible for per-AG metadata
|
|
|
|
* structures will lock the AG headers (AGI, AGF, AGFL), iterate the
|
|
|
|
* metadata structure, and perform any relevant cross-referencing before
|
|
|
|
* unlocking the AG and returning the results to userspace. These
|
|
|
|
* scrubbers must not keep an AG locked for too long to avoid tying up
|
|
|
|
* the block and inode allocators.
|
|
|
|
*
|
|
|
|
* Block maps and b-trees rooted in an inode present a special challenge
|
|
|
|
* because they can involve extents from any AG. The general scrubber
|
|
|
|
* structure of lock -> check -> xref -> unlock still holds, but AG
|
|
|
|
* locking order rules /must/ be obeyed to avoid deadlocks. The
|
|
|
|
* ordering rule, of course, is that we must lock in increasing AG
|
|
|
|
* order. Helper functions are provided to track which AG headers we've
|
|
|
|
* already locked. If we detect an imminent locking order violation, we
|
|
|
|
* can signal a potential deadlock, in which case the scrubber can jump
|
|
|
|
* out to the top level, lock all the AGs in order, and retry the scrub.
|
|
|
|
*
|
|
|
|
* For file data (directories, extended attributes, symlinks) scrub, we
|
|
|
|
* can simply lock the inode and walk the data. For btree data
|
|
|
|
* (directories and attributes) we follow the same btree-scrubbing
|
|
|
|
* strategy outlined previously to check the records.
|
|
|
|
*
|
|
|
|
* We use a bit of trickery with transactions to avoid buffer deadlocks
|
|
|
|
* if there is a cycle in the metadata. The basic problem is that
|
|
|
|
* travelling down a btree involves locking the current buffer at each
|
|
|
|
* tree level. If a pointer should somehow point back to a buffer that
|
|
|
|
* we've already examined, we will deadlock due to the second buffer
|
|
|
|
* locking attempt. Note however that grabbing a buffer in transaction
|
|
|
|
* context links the locked buffer to the transaction. If we try to
|
|
|
|
* re-grab the buffer in the context of the same transaction, we avoid
|
|
|
|
* the second lock attempt and continue. Between the verifier and the
|
|
|
|
* scrubber, something will notice that something is amiss and report
|
|
|
|
* the corruption. Therefore, each scrubber will allocate an empty
|
|
|
|
* transaction, attach buffers to it, and cancel the transaction at the
|
|
|
|
* end of the scrub run. Cancelling a non-dirty transaction simply
|
|
|
|
* unlocks the buffers.
|
|
|
|
*
|
|
|
|
* There are four pieces of data that scrub can communicate to
|
|
|
|
* userspace. The first is the error code (errno), which can be used to
|
|
|
|
* communicate operational errors in performing the scrub. There are
|
|
|
|
* also three flags that can be set in the scrub context. If the data
|
|
|
|
* structure itself is corrupt, the CORRUPT flag will be set. If
|
|
|
|
* the metadata is correct but otherwise suboptimal, the PREEN flag
|
|
|
|
* will be set.
|
2018-01-17 10:52:14 +08:00
|
|
|
*
|
|
|
|
* We perform secondary validation of filesystem metadata by
|
|
|
|
* cross-referencing every record with all other available metadata.
|
|
|
|
* For example, for block mapping extents, we verify that there are no
|
|
|
|
* records in the free space and inode btrees corresponding to that
|
|
|
|
* space extent and that there is a corresponding entry in the reverse
|
|
|
|
* mapping btree. Inconsistent metadata is noted by setting the
|
|
|
|
* XCORRUPT flag; btree query function errors are noted by setting the
|
|
|
|
* XFAIL flag and deleting the cursor to prevent further attempts to
|
|
|
|
* cross-reference with a defective btree.
|
2018-05-14 21:34:36 +08:00
|
|
|
*
|
|
|
|
* If a piece of metadata proves corrupt or suboptimal, the userspace
|
|
|
|
* program can ask the kernel to apply some tender loving care (TLC) to
|
|
|
|
* the metadata object by setting the REPAIR flag and re-calling the
|
|
|
|
* scrub ioctl. "Corruption" is defined by metadata violating the
|
|
|
|
* on-disk specification; operations cannot continue if the violation is
|
|
|
|
* left untreated. It is possible for XFS to continue if an object is
|
|
|
|
* "suboptimal", however performance may be degraded. Repairs are
|
|
|
|
* usually performed by rebuilding the metadata entirely out of
|
|
|
|
* redundant metadata. Optimizing, on the other hand, can sometimes be
|
|
|
|
* done without rebuilding entire structures.
|
|
|
|
*
|
|
|
|
* Generally speaking, the repair code has the following code structure:
|
|
|
|
* Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
|
|
|
|
* The first check helps us figure out if we need to rebuild or simply
|
|
|
|
* optimize the structure so that the rebuild knows what to do. The
|
|
|
|
* second check evaluates the completeness of the repair; that is what
|
|
|
|
* is reported to userspace.
|
2018-07-20 03:29:11 +08:00
|
|
|
*
|
|
|
|
* A quick note on symbol prefixes:
|
|
|
|
* - "xfs_" are general XFS symbols.
|
|
|
|
* - "xchk_" are symbols related to metadata checking.
|
|
|
|
* - "xrep_" are symbols related to metadata repair.
|
|
|
|
* - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS.
|
2017-10-18 12:37:35 +08:00
|
|
|
*/
|
|
|
|
|
2017-10-18 12:37:36 +08:00
|
|
|
/*
|
|
|
|
* Scrub probe -- userspace uses this to probe if we're willing to scrub
|
|
|
|
* or repair a given mountpoint. This will be used by xfs_scrub to
|
|
|
|
* probe the kernel's abilities to scrub (and repair) the metadata. We
|
|
|
|
* do this by validating the ioctl inputs from userspace, preparing the
|
|
|
|
* filesystem for a scrub (or a repair) operation, and immediately
|
|
|
|
* returning to userspace. Userspace can use the returned errno and
|
|
|
|
* structure state to decide (in broad terms) if scrub/repair are
|
|
|
|
* supported by the running kernel.
|
|
|
|
*/
|
2017-11-07 03:53:58 +08:00
|
|
|
static int
|
2018-07-20 03:29:11 +08:00
|
|
|
xchk_probe(
|
2018-07-20 03:29:12 +08:00
|
|
|
struct xfs_scrub *sc)
|
2017-10-18 12:37:36 +08:00
|
|
|
{
|
2018-07-20 03:29:12 +08:00
|
|
|
int error = 0;
|
2017-10-18 12:37:36 +08:00
|
|
|
|
2018-07-20 03:29:11 +08:00
|
|
|
if (xchk_should_terminate(sc, &error))
|
2017-10-18 12:37:36 +08:00
|
|
|
return error;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-10-18 12:37:35 +08:00
|
|
|
/* Scrub setup and teardown */
|
|
|
|
|
2023-04-12 09:59:59 +08:00
|
|
|
static inline void
|
|
|
|
xchk_fsgates_disable(
|
|
|
|
struct xfs_scrub *sc)
|
|
|
|
{
|
|
|
|
if (!(sc->flags & XCHK_FSGATES_ALL))
|
|
|
|
return;
|
|
|
|
|
|
|
|
trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL);
|
|
|
|
|
|
|
|
if (sc->flags & XCHK_FSGATES_DRAIN)
|
|
|
|
xfs_drain_wait_disable();
|
|
|
|
|
|
|
|
sc->flags &= ~XCHK_FSGATES_ALL;
|
|
|
|
}
|
|
|
|
|
2017-10-18 12:37:35 +08:00
|
|
|
/* Free all the resources and finish the transactions. */
|
|
|
|
STATIC int
|
2018-07-20 03:29:11 +08:00
|
|
|
xchk_teardown(
|
2018-07-20 03:29:12 +08:00
|
|
|
struct xfs_scrub *sc,
|
2018-07-20 03:29:12 +08:00
|
|
|
int error)
|
2017-10-18 12:37:35 +08:00
|
|
|
{
|
2021-03-23 00:51:51 +08:00
|
|
|
struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
|
|
|
|
|
2018-07-20 03:29:11 +08:00
|
|
|
xchk_ag_free(sc, &sc->sa);
|
2017-10-18 12:37:35 +08:00
|
|
|
if (sc->tp) {
|
2018-05-14 21:34:36 +08:00
|
|
|
if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
|
|
|
|
error = xfs_trans_commit(sc->tp);
|
|
|
|
else
|
|
|
|
xfs_trans_cancel(sc->tp);
|
2017-10-18 12:37:35 +08:00
|
|
|
sc->tp = NULL;
|
|
|
|
}
|
2017-10-18 12:37:42 +08:00
|
|
|
if (sc->ip) {
|
2018-01-17 10:53:57 +08:00
|
|
|
if (sc->ilock_flags)
|
|
|
|
xfs_iunlock(sc->ip, sc->ilock_flags);
|
2017-10-18 12:37:42 +08:00
|
|
|
if (sc->ip != ip_in &&
|
|
|
|
!xfs_internal_inum(sc->mp, sc->ip->i_ino))
|
xfs: manage inode DONTCACHE status at irele time
Right now, there are statements scattered all over the online fsck
codebase about how we can't use XFS_IGET_DONTCACHE because of concerns
about scrub's unusual practice of releasing inodes with transactions
held.
However, iget is the wrong place to handle this -- the DONTCACHE state
doesn't matter at all until we try to *release* the inode, and here we
get things wrong in multiple ways:
First, if we /do/ have a transaction, we must NOT drop the inode,
because the inode could have dirty pages, dropping the inode will
trigger writeback, and writeback can trigger a nested transaction.
Second, if the inode already had an active reference and the DONTCACHE
flag set, the icache hit when scrub grabs another ref will not clear
DONTCACHE. This is sort of by design, since DONTCACHE is now used to
initiate cache drops so that sysadmins can change a file's access mode
between pagecache and DAX.
Third, if we do actually have the last active reference to the inode, we
can set DONTCACHE to avoid polluting the cache. This is the /one/ case
where we actually want that flag.
Create an xchk_irele helper to encode all that logic and switch the
online fsck code to use it. Since this now means that nearly all
scrubbers use the same xfs_iget flags, we can wrap them too.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-04-12 10:00:20 +08:00
|
|
|
xchk_irele(sc, sc->ip);
|
2017-10-18 12:37:42 +08:00
|
|
|
sc->ip = NULL;
|
|
|
|
}
|
2021-03-23 00:51:51 +08:00
|
|
|
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
|
|
|
|
mnt_drop_write_file(sc->file);
|
2017-10-18 12:37:45 +08:00
|
|
|
if (sc->buf) {
|
2023-04-12 10:00:30 +08:00
|
|
|
if (sc->buf_cleanup)
|
|
|
|
sc->buf_cleanup(sc->buf);
|
2022-11-07 09:03:16 +08:00
|
|
|
kvfree(sc->buf);
|
2023-04-12 10:00:30 +08:00
|
|
|
sc->buf_cleanup = NULL;
|
2017-10-18 12:37:45 +08:00
|
|
|
sc->buf = NULL;
|
|
|
|
}
|
2023-04-12 09:59:59 +08:00
|
|
|
|
|
|
|
xchk_fsgates_disable(sc);
|
2017-10-18 12:37:35 +08:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Scrubbing dispatch. */
|
|
|
|
|
2018-07-20 03:29:11 +08:00
|
|
|
static const struct xchk_meta_ops meta_scrub_ops[] = {
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_NONE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_fs,
|
|
|
|
.scrub = xchk_probe,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_probe,
|
2017-10-18 12:37:36 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_SB] = { /* superblock */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2023-04-12 09:59:59 +08:00
|
|
|
.setup = xchk_setup_agheader,
|
2018-07-20 03:29:11 +08:00
|
|
|
.scrub = xchk_superblock,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_superblock,
|
2017-10-18 12:37:38 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_AGF] = { /* agf */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2023-04-12 09:59:59 +08:00
|
|
|
.setup = xchk_setup_agheader,
|
2018-07-20 03:29:11 +08:00
|
|
|
.scrub = xchk_agf,
|
2018-08-10 13:42:53 +08:00
|
|
|
.repair = xrep_agf,
|
2017-10-18 12:37:39 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_AGFL]= { /* agfl */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2023-04-12 09:59:59 +08:00
|
|
|
.setup = xchk_setup_agheader,
|
2018-07-20 03:29:11 +08:00
|
|
|
.scrub = xchk_agfl,
|
2018-08-10 13:43:02 +08:00
|
|
|
.repair = xrep_agfl,
|
2017-10-18 12:37:39 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_AGI] = { /* agi */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2023-04-12 09:59:59 +08:00
|
|
|
.setup = xchk_setup_agheader,
|
2018-07-20 03:29:11 +08:00
|
|
|
.scrub = xchk_agi,
|
2018-08-10 13:43:04 +08:00
|
|
|
.repair = xrep_agi,
|
2017-10-18 12:37:39 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_ag_allocbt,
|
|
|
|
.scrub = xchk_bnobt,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:40 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_ag_allocbt,
|
|
|
|
.scrub = xchk_cntbt,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:40 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_INOBT] = { /* inobt */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_ag_iallocbt,
|
|
|
|
.scrub = xchk_inobt,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:40 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_FINOBT] = { /* finobt */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_ag_iallocbt,
|
|
|
|
.scrub = xchk_finobt,
|
2021-08-19 09:46:54 +08:00
|
|
|
.has = xfs_has_finobt,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:40 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_ag_rmapbt,
|
|
|
|
.scrub = xchk_rmapbt,
|
2021-08-19 09:46:54 +08:00
|
|
|
.has = xfs_has_rmapbt,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:41 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_PERAG,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_ag_refcountbt,
|
|
|
|
.scrub = xchk_refcountbt,
|
2021-08-19 09:46:54 +08:00
|
|
|
.has = xfs_has_reflink,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:41 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_INODE] = { /* inode record */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_INODE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_inode,
|
|
|
|
.scrub = xchk_inode,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:42 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_INODE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_inode_bmap,
|
|
|
|
.scrub = xchk_bmap_data,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:43 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_INODE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_inode_bmap,
|
|
|
|
.scrub = xchk_bmap_attr,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:43 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_INODE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_inode_bmap,
|
|
|
|
.scrub = xchk_bmap_cow,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:43 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_DIR] = { /* directory */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_INODE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_directory,
|
|
|
|
.scrub = xchk_directory,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:44 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_INODE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_xattr,
|
|
|
|
.scrub = xchk_xattr,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:45 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_INODE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_symlink,
|
|
|
|
.scrub = xchk_symlink,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:45 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_INODE,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_parent,
|
|
|
|
.scrub = xchk_parent,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:46 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_FS,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_rt,
|
|
|
|
.scrub = xchk_rtbitmap,
|
2021-08-19 09:46:54 +08:00
|
|
|
.has = xfs_has_realtime,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:46 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_FS,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_rt,
|
|
|
|
.scrub = xchk_rtsummary,
|
2021-08-19 09:46:54 +08:00
|
|
|
.has = xfs_has_realtime,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:46 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_FS,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_quota,
|
|
|
|
.scrub = xchk_quota,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:47 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_FS,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_quota,
|
|
|
|
.scrub = xchk_quota,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:47 +08:00
|
|
|
},
|
2018-01-09 02:41:33 +08:00
|
|
|
[XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */
|
2018-01-09 02:41:34 +08:00
|
|
|
.type = ST_FS,
|
2018-07-20 03:29:11 +08:00
|
|
|
.setup = xchk_setup_quota,
|
|
|
|
.scrub = xchk_quota,
|
2018-07-20 03:29:11 +08:00
|
|
|
.repair = xrep_notsupported,
|
2017-10-18 12:37:47 +08:00
|
|
|
},
|
2019-04-26 09:26:24 +08:00
|
|
|
[XFS_SCRUB_TYPE_FSCOUNTERS] = { /* fs summary counters */
|
|
|
|
.type = ST_FS,
|
|
|
|
.setup = xchk_setup_fscounters,
|
|
|
|
.scrub = xchk_fscounters,
|
|
|
|
.repair = xrep_notsupported,
|
|
|
|
},
|
2017-10-18 12:37:35 +08:00
|
|
|
};
|
|
|
|
|
2018-01-09 02:41:34 +08:00
|
|
|
static int
|
2018-07-20 03:29:11 +08:00
|
|
|
xchk_validate_inputs(
|
2018-01-09 02:41:34 +08:00
|
|
|
struct xfs_mount *mp,
|
2017-10-18 12:37:34 +08:00
|
|
|
struct xfs_scrub_metadata *sm)
|
|
|
|
{
|
2018-01-09 02:41:34 +08:00
|
|
|
int error;
|
2018-07-20 03:29:11 +08:00
|
|
|
const struct xchk_meta_ops *ops;
|
2017-10-18 12:37:35 +08:00
|
|
|
|
|
|
|
error = -EINVAL;
|
2018-01-09 02:41:34 +08:00
|
|
|
/* Check our inputs. */
|
2017-10-18 12:37:35 +08:00
|
|
|
sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
|
|
|
|
if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
|
|
|
|
goto out;
|
2018-01-09 02:41:34 +08:00
|
|
|
/* sm_reserved[] must be zero */
|
2017-10-18 12:37:35 +08:00
|
|
|
if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
error = -ENOENT;
|
2018-01-09 02:41:34 +08:00
|
|
|
/* Do we know about this type of metadata? */
|
2017-10-18 12:37:35 +08:00
|
|
|
if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
|
|
|
|
goto out;
|
|
|
|
ops = &meta_scrub_ops[sm->sm_type];
|
2018-01-09 02:41:33 +08:00
|
|
|
if (ops->setup == NULL || ops->scrub == NULL)
|
2017-10-18 12:37:35 +08:00
|
|
|
goto out;
|
2018-01-09 02:41:34 +08:00
|
|
|
/* Does this fs even support this type of metadata? */
|
2021-08-19 09:46:54 +08:00
|
|
|
if (ops->has && !ops->has(mp))
|
2018-01-09 02:41:34 +08:00
|
|
|
goto out;
|
2017-10-18 12:37:35 +08:00
|
|
|
|
2018-01-09 02:41:34 +08:00
|
|
|
error = -EINVAL;
|
|
|
|
/* restricting fields must be appropriate for type */
|
|
|
|
switch (ops->type) {
|
|
|
|
case ST_NONE:
|
|
|
|
case ST_FS:
|
|
|
|
if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
|
|
|
|
goto out;
|
|
|
|
break;
|
|
|
|
case ST_PERAG:
|
|
|
|
if (sm->sm_ino || sm->sm_gen ||
|
|
|
|
sm->sm_agno >= mp->m_sb.sb_agcount)
|
|
|
|
goto out;
|
|
|
|
break;
|
|
|
|
case ST_INODE:
|
|
|
|
if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
|
|
|
|
goto out;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2018-05-14 21:34:36 +08:00
|
|
|
/*
|
|
|
|
* We only want to repair read-write v5+ filesystems. Defer the check
|
|
|
|
* for ops->repair until after our scrub confirms that we need to
|
|
|
|
* perform repairs so that we avoid failing due to not supporting
|
|
|
|
* repairing an object that doesn't need repairs.
|
|
|
|
*/
|
|
|
|
if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
|
|
|
|
error = -EOPNOTSUPP;
|
2021-08-19 09:46:37 +08:00
|
|
|
if (!xfs_has_crc(mp))
|
2018-05-14 21:34:36 +08:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
error = -EROFS;
|
2021-08-19 09:46:52 +08:00
|
|
|
if (xfs_is_readonly(mp))
|
2018-05-14 21:34:36 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2017-10-18 12:37:35 +08:00
|
|
|
|
2018-01-09 02:41:34 +08:00
|
|
|
error = 0;
|
|
|
|
out:
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2018-05-14 21:34:36 +08:00
|
|
|
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
2018-07-20 03:29:12 +08:00
|
|
|
static inline void xchk_postmortem(struct xfs_scrub *sc)
|
2018-05-14 21:34:36 +08:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Userspace asked us to repair something, we repaired it, rescanned
|
|
|
|
* it, and the rescan says it's still broken. Scream about this in
|
|
|
|
* the system logs.
|
|
|
|
*/
|
|
|
|
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
|
|
|
|
(sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
|
|
|
|
XFS_SCRUB_OFLAG_XCORRUPT)))
|
2018-07-20 03:29:11 +08:00
|
|
|
xrep_failure(sc->mp);
|
2018-05-14 21:34:36 +08:00
|
|
|
}
|
|
|
|
#else
|
2018-07-20 03:29:12 +08:00
|
|
|
static inline void xchk_postmortem(struct xfs_scrub *sc)
|
2018-05-14 21:34:36 +08:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Userspace asked us to scrub something, it's broken, and we have no
|
|
|
|
* way of fixing it. Scream in the logs.
|
|
|
|
*/
|
|
|
|
if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
|
|
|
|
XFS_SCRUB_OFLAG_XCORRUPT))
|
|
|
|
xfs_alert_ratelimited(sc->mp,
|
|
|
|
"Corruption detected during scrub.");
|
|
|
|
}
|
|
|
|
#endif /* CONFIG_XFS_ONLINE_REPAIR */
|
|
|
|
|
2018-01-09 02:41:34 +08:00
|
|
|
/* Dispatch metadata scrubbing. */
|
|
|
|
int
|
|
|
|
xfs_scrub_metadata(
|
2021-03-23 00:51:51 +08:00
|
|
|
struct file *file,
|
2018-01-09 02:41:34 +08:00
|
|
|
struct xfs_scrub_metadata *sm)
|
|
|
|
{
|
2021-09-17 03:19:00 +08:00
|
|
|
struct xfs_scrub *sc;
|
2021-04-08 08:59:39 +08:00
|
|
|
struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount;
|
2018-01-09 02:41:34 +08:00
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
|
2018-07-20 03:29:11 +08:00
|
|
|
(sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR));
|
2018-01-09 02:41:34 +08:00
|
|
|
|
2021-04-08 08:59:39 +08:00
|
|
|
trace_xchk_start(XFS_I(file_inode(file)), sm, error);
|
2018-01-09 02:41:34 +08:00
|
|
|
|
|
|
|
/* Forbidden if we are shut down or mounted norecovery. */
|
|
|
|
error = -ESHUTDOWN;
|
2021-08-19 09:46:53 +08:00
|
|
|
if (xfs_is_shutdown(mp))
|
2018-01-09 02:41:34 +08:00
|
|
|
goto out;
|
|
|
|
error = -ENOTRECOVERABLE;
|
2021-08-19 09:46:52 +08:00
|
|
|
if (xfs_has_norecovery(mp))
|
2018-01-09 02:41:34 +08:00
|
|
|
goto out;
|
|
|
|
|
2018-07-20 03:29:11 +08:00
|
|
|
error = xchk_validate_inputs(mp, sm);
|
2018-01-09 02:41:34 +08:00
|
|
|
if (error)
|
|
|
|
goto out;
|
|
|
|
|
2022-05-27 08:31:34 +08:00
|
|
|
xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
|
|
|
|
"EXPERIMENTAL online scrub feature in use. Use at your own risk!");
|
2017-10-18 12:37:35 +08:00
|
|
|
|
2022-11-07 09:03:16 +08:00
|
|
|
sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
|
2021-09-17 03:19:00 +08:00
|
|
|
if (!sc) {
|
|
|
|
error = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
sc->mp = mp;
|
|
|
|
sc->file = file;
|
|
|
|
sc->sm = sm;
|
|
|
|
sc->ops = &meta_scrub_ops[sm->sm_type];
|
|
|
|
sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
|
2017-10-18 12:37:35 +08:00
|
|
|
retry_op:
|
2020-03-25 14:03:24 +08:00
|
|
|
/*
|
2021-03-23 00:51:51 +08:00
|
|
|
* When repairs are allowed, prevent freezing or readonly remount while
|
|
|
|
* scrub is running with a real transaction.
|
2020-03-25 14:03:24 +08:00
|
|
|
*/
|
2021-03-23 00:51:51 +08:00
|
|
|
if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
|
2021-09-17 03:19:00 +08:00
|
|
|
error = mnt_want_write_file(sc->file);
|
2021-03-23 00:51:51 +08:00
|
|
|
if (error)
|
2021-09-17 03:19:00 +08:00
|
|
|
goto out_sc;
|
2021-03-23 00:51:51 +08:00
|
|
|
}
|
2020-03-25 14:03:24 +08:00
|
|
|
|
2017-10-18 12:37:35 +08:00
|
|
|
/* Set up for the operation. */
|
2021-09-17 03:19:00 +08:00
|
|
|
error = sc->ops->setup(sc);
|
2023-04-12 09:59:59 +08:00
|
|
|
if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
|
|
|
|
goto try_harder;
|
2023-04-12 10:00:00 +08:00
|
|
|
if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
|
|
|
|
goto need_drain;
|
2017-10-18 12:37:35 +08:00
|
|
|
if (error)
|
|
|
|
goto out_teardown;
|
|
|
|
|
|
|
|
/* Scrub for errors. */
|
2021-09-17 03:19:00 +08:00
|
|
|
error = sc->ops->scrub(sc);
|
2023-04-12 09:59:59 +08:00
|
|
|
if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
|
|
|
|
goto try_harder;
|
2023-04-12 10:00:00 +08:00
|
|
|
if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
|
|
|
|
goto need_drain;
|
2023-04-12 09:59:59 +08:00
|
|
|
if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
|
2017-10-18 12:37:35 +08:00
|
|
|
goto out_teardown;
|
|
|
|
|
2021-09-17 03:19:00 +08:00
|
|
|
xchk_update_health(sc);
|
2019-04-16 23:22:00 +08:00
|
|
|
|
2021-09-17 03:19:00 +08:00
|
|
|
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
|
|
|
|
!(sc->flags & XREP_ALREADY_FIXED)) {
|
2018-05-14 21:34:36 +08:00
|
|
|
bool needs_fix;
|
|
|
|
|
|
|
|
/* Let debug users force us into the repair routines. */
|
|
|
|
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
|
2021-09-17 03:19:00 +08:00
|
|
|
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
|
2018-05-14 21:34:36 +08:00
|
|
|
|
2021-09-17 03:19:00 +08:00
|
|
|
needs_fix = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
|
|
|
|
XFS_SCRUB_OFLAG_XCORRUPT |
|
|
|
|
XFS_SCRUB_OFLAG_PREEN));
|
2018-05-14 21:34:36 +08:00
|
|
|
/*
|
|
|
|
* If userspace asked for a repair but it wasn't necessary,
|
|
|
|
* report that back to userspace.
|
|
|
|
*/
|
|
|
|
if (!needs_fix) {
|
2021-09-17 03:19:00 +08:00
|
|
|
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
|
2018-05-14 21:34:36 +08:00
|
|
|
goto out_nofix;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it's broken, userspace wants us to fix it, and we haven't
|
|
|
|
* already tried to fix it, then attempt a repair.
|
|
|
|
*/
|
2021-09-17 03:19:00 +08:00
|
|
|
error = xrep_attempt(sc);
|
2018-05-14 21:34:36 +08:00
|
|
|
if (error == -EAGAIN) {
|
2019-04-16 23:21:59 +08:00
|
|
|
/*
|
|
|
|
* Either the repair function succeeded or it couldn't
|
|
|
|
* get all the resources it needs; either way, we go
|
|
|
|
* back to the beginning and call the scrub function.
|
|
|
|
*/
|
2021-09-17 03:19:00 +08:00
|
|
|
error = xchk_teardown(sc, 0);
|
2018-05-14 21:34:36 +08:00
|
|
|
if (error) {
|
2018-07-20 03:29:11 +08:00
|
|
|
xrep_failure(mp);
|
2021-09-17 03:19:00 +08:00
|
|
|
goto out_sc;
|
2018-05-14 21:34:36 +08:00
|
|
|
}
|
|
|
|
goto retry_op;
|
|
|
|
}
|
|
|
|
}
|
2017-10-18 12:37:35 +08:00
|
|
|
|
2018-05-14 21:34:36 +08:00
|
|
|
out_nofix:
|
2021-09-17 03:19:00 +08:00
|
|
|
xchk_postmortem(sc);
|
2017-10-18 12:37:35 +08:00
|
|
|
out_teardown:
|
2021-09-17 03:19:00 +08:00
|
|
|
error = xchk_teardown(sc, error);
|
|
|
|
out_sc:
|
2022-11-07 09:03:16 +08:00
|
|
|
kfree(sc);
|
2017-10-18 12:37:35 +08:00
|
|
|
out:
|
2021-04-08 08:59:39 +08:00
|
|
|
trace_xchk_done(XFS_I(file_inode(file)), sm, error);
|
2017-10-18 12:37:35 +08:00
|
|
|
if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
|
|
|
|
sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
|
|
|
|
error = 0;
|
|
|
|
}
|
|
|
|
return error;
|
2023-04-12 10:00:00 +08:00
|
|
|
need_drain:
|
|
|
|
error = xchk_teardown(sc, 0);
|
|
|
|
if (error)
|
|
|
|
goto out_sc;
|
|
|
|
sc->flags |= XCHK_NEED_DRAIN;
|
|
|
|
goto retry_op;
|
2023-04-12 09:59:59 +08:00
|
|
|
try_harder:
|
|
|
|
/*
|
|
|
|
* Scrubbers return -EDEADLOCK to mean 'try harder'. Tear down
|
|
|
|
* everything we hold, then set up again with preparation for
|
|
|
|
* worst-case scenarios.
|
|
|
|
*/
|
|
|
|
error = xchk_teardown(sc, 0);
|
|
|
|
if (error)
|
|
|
|
goto out_sc;
|
|
|
|
sc->flags |= XCHK_TRY_HARDER;
|
|
|
|
goto retry_op;
|
2017-10-18 12:37:34 +08:00
|
|
|
}
|