OpenCloudOS-Kernel/fs/xfs/scrub/btree.c

811 lines
21 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
#include "scrub/trace.h"
/* btree scrubbing */
/*
* Check for btree operation errors. See the section about handling
* operational errors in common.c.
*/
static bool
__xchk_btree_process_error(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
int level,
int *error,
__u32 errflag,
void *ret_ip)
{
if (*error == 0)
return true;
switch (*error) {
case -EDEADLOCK:
case -ECHRNG:
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
sc->sm->sm_flags |= errflag;
*error = 0;
xfs: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix the following warnings by replacing /* fall through */ comments, and its variants, with the new pseudo-keyword macro fallthrough: fs/xfs/libxfs/xfs_alloc.c:3167:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_da_btree.c:286:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_ag_resv.c:346:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/libxfs/xfs_ag_resv.c:388:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_bmap_util.c:246:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_export.c:88:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_export.c:96:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_file.c:867:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_ioctl.c:562:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_ioctl.c:1548:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_iomap.c:1040:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_inode.c:852:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_log.c:2627:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/xfs_trans_buf.c:298:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/scrub/bmap.c:275:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/scrub/btree.c:48:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/scrub/common.c:85:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/scrub/common.c:138:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/scrub/common.c:698:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/scrub/dabtree.c:51:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/scrub/repair.c:951:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] fs/xfs/scrub/agheader.c:89:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] Notice that Clang doesn't recognize /* fall through */ comments as implicit fall-through markings, so in order to globally enable -Wimplicit-fallthrough for Clang, these comments need to be replaced with fallthrough; in the whole codebase. Link: https://github.com/KSPP/linux/issues/115 Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
2021-04-21 06:54:36 +08:00
fallthrough;
default:
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
trace_xchk_ifork_btree_op_error(sc, cur, level,
*error, ret_ip);
else
trace_xchk_btree_op_error(sc, cur, level,
*error, ret_ip);
break;
}
return false;
}
bool
xchk_btree_process_error(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
int level,
int *error)
{
return __xchk_btree_process_error(sc, cur, level, error,
XFS_SCRUB_OFLAG_CORRUPT, __return_address);
}
bool
xchk_btree_xref_process_error(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
int level,
int *error)
{
return __xchk_btree_process_error(sc, cur, level, error,
XFS_SCRUB_OFLAG_XFAIL, __return_address);
}
/* Record btree block corruption. */
static void
__xchk_btree_set_corrupt(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
int level,
__u32 errflag,
void *ret_ip)
{
sc->sm->sm_flags |= errflag;
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
trace_xchk_ifork_btree_error(sc, cur, level,
ret_ip);
else
trace_xchk_btree_error(sc, cur, level,
ret_ip);
}
void
xchk_btree_set_corrupt(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
int level)
{
__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT,
__return_address);
}
void
xchk_btree_xref_set_corrupt(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
int level)
{
__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT,
__return_address);
}
void
xchk_btree_set_preen(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
int level)
{
__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
__return_address);
}
/*
* Make sure this record is in order and doesn't stray outside of the parent
* keys.
*/
STATIC void
xchk_btree_rec(
struct xchk_btree *bs)
{
struct xfs_btree_cur *cur = bs->cur;
union xfs_btree_rec *rec;
union xfs_btree_key key;
union xfs_btree_key hkey;
union xfs_btree_key *keyp;
struct xfs_btree_block *block;
struct xfs_btree_block *keyblock;
struct xfs_buf *bp;
block = xfs_btree_get_block(cur, 0, &bp);
rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block);
trace_xchk_btree_rec(bs->sc, cur, 0);
/* Are all records across all record blocks in order? */
if (bs->lastrec_valid &&
!cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec))
xchk_btree_set_corrupt(bs->sc, cur, 0);
memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len);
bs->lastrec_valid = true;
if (cur->bc_nlevels == 1)
return;
/* Is low_key(rec) at least as large as the parent low key? */
cur->bc_ops->init_key_from_rec(&key, rec);
keyblock = xfs_btree_get_block(cur, 1, &bp);
keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
if (xfs_btree_keycmp_lt(cur, &key, keyp))
xchk_btree_set_corrupt(bs->sc, cur, 1);
if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
return;
/* Is high_key(rec) no larger than the parent high key? */
cur->bc_ops->init_high_key_from_rec(&hkey, rec);
keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock);
if (xfs_btree_keycmp_lt(cur, keyp, &hkey))
xchk_btree_set_corrupt(bs->sc, cur, 1);
}
/*
* Make sure this key is in order and doesn't stray outside of the parent
* keys.
*/
STATIC void
xchk_btree_key(
struct xchk_btree *bs,
int level)
{
struct xfs_btree_cur *cur = bs->cur;
union xfs_btree_key *key;
union xfs_btree_key *keyp;
struct xfs_btree_block *block;
struct xfs_btree_block *keyblock;
struct xfs_buf *bp;
block = xfs_btree_get_block(cur, level, &bp);
key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
trace_xchk_btree_key(bs->sc, cur, level);
/* Are all low keys across all node blocks in order? */
if (bs->lastkey[level - 1].valid &&
!cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key))
xchk_btree_set_corrupt(bs->sc, cur, level);
memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len);
bs->lastkey[level - 1].valid = true;
if (level + 1 >= cur->bc_nlevels)
return;
/* Is this block's low key at least as large as the parent low key? */
keyblock = xfs_btree_get_block(cur, level + 1, &bp);
keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock);
if (xfs_btree_keycmp_lt(cur, key, keyp))
xchk_btree_set_corrupt(bs->sc, cur, level);
if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
return;
/* Is this block's high key no larger than the parent high key? */
key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block);
keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
keyblock);
if (xfs_btree_keycmp_lt(cur, keyp, key))
xchk_btree_set_corrupt(bs->sc, cur, level);
}
/*
* Check a btree pointer. Returns true if it's ok to use this pointer.
* Callers do not need to set the corrupt flag.
*/
static bool
xchk_btree_ptr_ok(
struct xchk_btree *bs,
int level,
union xfs_btree_ptr *ptr)
{
bool res;
/* A btree rooted in an inode has no block pointer to the root. */
if ((bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
level == bs->cur->bc_nlevels)
return true;
/* Otherwise, check the pointers. */
if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
res = xfs_btree_check_lptr(bs->cur, be64_to_cpu(ptr->l), level);
else
res = xfs_btree_check_sptr(bs->cur, be32_to_cpu(ptr->s), level);
if (!res)
xchk_btree_set_corrupt(bs->sc, bs->cur, level);
return res;
}
/* Check that a btree block's sibling matches what we expect it. */
STATIC int
xchk_btree_block_check_sibling(
struct xchk_btree *bs,
int level,
int direction,
union xfs_btree_ptr *sibling)
{
struct xfs_btree_cur *cur = bs->cur;
struct xfs_btree_block *pblock;
struct xfs_buf *pbp;
struct xfs_btree_cur *ncur = NULL;
union xfs_btree_ptr *pp;
int success;
int error;
error = xfs_btree_dup_cursor(cur, &ncur);
if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) ||
!ncur)
return error;
/*
* If the pointer is null, we shouldn't be able to move the upper
* level pointer anywhere.
*/
if (xfs_btree_ptr_is_null(cur, sibling)) {
if (direction > 0)
error = xfs_btree_increment(ncur, level + 1, &success);
else
error = xfs_btree_decrement(ncur, level + 1, &success);
if (error == 0 && success)
xchk_btree_set_corrupt(bs->sc, cur, level);
error = 0;
goto out;
}
/* Increment upper level pointer. */
if (direction > 0)
error = xfs_btree_increment(ncur, level + 1, &success);
else
error = xfs_btree_decrement(ncur, level + 1, &success);
if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error))
goto out;
if (!success) {
xchk_btree_set_corrupt(bs->sc, cur, level + 1);
goto out;
}
/* Compare upper level pointer to sibling pointer. */
pblock = xfs_btree_get_block(ncur, level + 1, &pbp);
pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock);
if (!xchk_btree_ptr_ok(bs, level + 1, pp))
goto out;
if (pbp)
xchk_buffer_recheck(bs->sc, pbp);
if (xfs_btree_diff_two_ptrs(cur, pp, sibling))
xchk_btree_set_corrupt(bs->sc, cur, level);
out:
xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR);
return error;
}
/* Check the siblings of a btree block. */
STATIC int
xchk_btree_block_check_siblings(
struct xchk_btree *bs,
struct xfs_btree_block *block)
{
struct xfs_btree_cur *cur = bs->cur;
union xfs_btree_ptr leftsib;
union xfs_btree_ptr rightsib;
int level;
int error = 0;
xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB);
xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB);
level = xfs_btree_get_level(block);
/* Root block should never have siblings. */
if (level == cur->bc_nlevels - 1) {
if (!xfs_btree_ptr_is_null(cur, &leftsib) ||
!xfs_btree_ptr_is_null(cur, &rightsib))
xchk_btree_set_corrupt(bs->sc, cur, level);
goto out;
}
/*
* Does the left & right sibling pointers match the adjacent
* parent level pointers?
* (These function absorbs error codes for us.)
*/
error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib);
if (error)
return error;
error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib);
if (error)
return error;
out:
return error;
}
struct check_owner {
struct list_head list;
xfs_daddr_t daddr;
int level;
};
/*
* Make sure this btree block isn't in the free list and that there's
* an rmap record for it.
*/
STATIC int
xchk_btree_check_block_owner(
struct xchk_btree *bs,
int level,
xfs_daddr_t daddr)
{
xfs_agnumber_t agno;
xfs_agblock_t agbno;
xfs_btnum_t btnum;
bool init_sa;
int error = 0;
if (!bs->cur)
return 0;
btnum = bs->cur->bc_btnum;
agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr);
agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr);
init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS;
if (init_sa) {
error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa);
if (!xchk_btree_xref_process_error(bs->sc, bs->cur,
level, &error))
xfs: fix perag structure refcounting error when scrub fails The kernel test robot found the following bug when running xfs/355 to scrub a bmap btree: XFS: Assertion failed: !sa->pag, file: fs/xfs/scrub/common.c, line: 412 ------------[ cut here ]------------ kernel BUG at fs/xfs/xfs_message.c:110! invalid opcode: 0000 [#1] SMP PTI CPU: 2 PID: 1415 Comm: xfs_scrub Not tainted 5.14.0-rc4-00021-g48c6615cc557 #1 Hardware name: Hewlett-Packard p6-1451cx/2ADA, BIOS 8.15 02/05/2013 RIP: 0010:assfail+0x23/0x28 [xfs] RSP: 0018:ffffc9000aacb890 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffffc9000aacbcc8 RCX: 0000000000000000 RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffffc09e7dcd RBP: ffffc9000aacbc80 R08: ffff8881fdf17d50 R09: 0000000000000000 R10: 000000000000000a R11: f000000000000000 R12: 0000000000000000 R13: ffff88820c7ed000 R14: 0000000000000001 R15: ffffc9000aacb980 FS: 00007f185b955700(0000) GS:ffff8881fdf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7f6ef43000 CR3: 000000020de38002 CR4: 00000000001706e0 Call Trace: xchk_ag_read_headers+0xda/0x100 [xfs] xchk_ag_init+0x15/0x40 [xfs] xchk_btree_check_block_owner+0x76/0x180 [xfs] xchk_btree_get_block+0xd0/0x140 [xfs] xchk_btree+0x32e/0x440 [xfs] xchk_bmap_btree+0xd4/0x140 [xfs] xchk_bmap+0x1eb/0x3c0 [xfs] xfs_scrub_metadata+0x227/0x4c0 [xfs] xfs_ioc_scrub_metadata+0x50/0xc0 [xfs] xfs_file_ioctl+0x90c/0xc40 [xfs] __x64_sys_ioctl+0x83/0xc0 do_syscall_64+0x3b/0xc0 The unusual handling of errors while initializing struct xchk_ag is the root cause here. Since the beginning of xfs_scrub, the goal of xchk_ag_read_headers has been to read all three AG header buffers and attach them both to the xchk_ag structure and the scrub transaction. Corruption errors on any of the three headers doesn't necessarily trigger an immediate return to userspace, because xfs_scrub can also tell us to /fix/ the problem. In other words, it's possible for the xchk_ag init functions to return an error code and a partially filled out structure so that scrub can use however much information it managed to pull. Before 5.15, it was sufficient to cancel (or commit) the scrub transaction on the way out of the scrub code to release the buffers. Ccommit 48c6615cc557 added a reference to the perag structure to struct xchk_ag. Since perag structures are not attached to transactions like buffers are, this adds the requirement that the perag ref be released explicitly. The scrub teardown function xchk_teardown was amended to do this for the xchk_ag embedded in struct xfs_scrub. Unfortunately, I forgot that certain parts of the scrub code probe multiple AGs and therefore handle the initialization and cleanup on their own. Specifically, the bmbt scrubber will initialize it long enough to cross-reference AG metadata for btree blocks and for the extent mappings in the bmbt. If one of the AG headers is corrupt, the init function returns with a live perag structure reference and some of the AG header buffers. If an error occurs, the cross referencing will be noted as XCORRUPTion and skipped, but the main scrub process will move on to the next record. It is now necessary to release the perag reference before we try to analyze something from a different AG, or else we'll trip over the assertion noted above. Fixes: 48c6615cc557 ("xfs: grab active perag ref when reading AG headers") Reported-by: kernel test robot <oliver.sang@intel.com> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>
2021-08-20 03:07:49 +08:00
goto out_free;
}
xchk_xref_is_used_space(bs->sc, agbno, 1);
/*
* The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we
* have to nullify it (to shut down further block owner checks) if
* self-xref encounters problems.
*/
if (!bs->sc->sa.bno_cur && btnum == XFS_BTNUM_BNO)
bs->cur = NULL;
xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo);
if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP)
bs->cur = NULL;
xfs: fix perag structure refcounting error when scrub fails The kernel test robot found the following bug when running xfs/355 to scrub a bmap btree: XFS: Assertion failed: !sa->pag, file: fs/xfs/scrub/common.c, line: 412 ------------[ cut here ]------------ kernel BUG at fs/xfs/xfs_message.c:110! invalid opcode: 0000 [#1] SMP PTI CPU: 2 PID: 1415 Comm: xfs_scrub Not tainted 5.14.0-rc4-00021-g48c6615cc557 #1 Hardware name: Hewlett-Packard p6-1451cx/2ADA, BIOS 8.15 02/05/2013 RIP: 0010:assfail+0x23/0x28 [xfs] RSP: 0018:ffffc9000aacb890 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffffc9000aacbcc8 RCX: 0000000000000000 RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffffc09e7dcd RBP: ffffc9000aacbc80 R08: ffff8881fdf17d50 R09: 0000000000000000 R10: 000000000000000a R11: f000000000000000 R12: 0000000000000000 R13: ffff88820c7ed000 R14: 0000000000000001 R15: ffffc9000aacb980 FS: 00007f185b955700(0000) GS:ffff8881fdf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7f6ef43000 CR3: 000000020de38002 CR4: 00000000001706e0 Call Trace: xchk_ag_read_headers+0xda/0x100 [xfs] xchk_ag_init+0x15/0x40 [xfs] xchk_btree_check_block_owner+0x76/0x180 [xfs] xchk_btree_get_block+0xd0/0x140 [xfs] xchk_btree+0x32e/0x440 [xfs] xchk_bmap_btree+0xd4/0x140 [xfs] xchk_bmap+0x1eb/0x3c0 [xfs] xfs_scrub_metadata+0x227/0x4c0 [xfs] xfs_ioc_scrub_metadata+0x50/0xc0 [xfs] xfs_file_ioctl+0x90c/0xc40 [xfs] __x64_sys_ioctl+0x83/0xc0 do_syscall_64+0x3b/0xc0 The unusual handling of errors while initializing struct xchk_ag is the root cause here. Since the beginning of xfs_scrub, the goal of xchk_ag_read_headers has been to read all three AG header buffers and attach them both to the xchk_ag structure and the scrub transaction. Corruption errors on any of the three headers doesn't necessarily trigger an immediate return to userspace, because xfs_scrub can also tell us to /fix/ the problem. In other words, it's possible for the xchk_ag init functions to return an error code and a partially filled out structure so that scrub can use however much information it managed to pull. Before 5.15, it was sufficient to cancel (or commit) the scrub transaction on the way out of the scrub code to release the buffers. Ccommit 48c6615cc557 added a reference to the perag structure to struct xchk_ag. Since perag structures are not attached to transactions like buffers are, this adds the requirement that the perag ref be released explicitly. The scrub teardown function xchk_teardown was amended to do this for the xchk_ag embedded in struct xfs_scrub. Unfortunately, I forgot that certain parts of the scrub code probe multiple AGs and therefore handle the initialization and cleanup on their own. Specifically, the bmbt scrubber will initialize it long enough to cross-reference AG metadata for btree blocks and for the extent mappings in the bmbt. If one of the AG headers is corrupt, the init function returns with a live perag structure reference and some of the AG header buffers. If an error occurs, the cross referencing will be noted as XCORRUPTion and skipped, but the main scrub process will move on to the next record. It is now necessary to release the perag reference before we try to analyze something from a different AG, or else we'll trip over the assertion noted above. Fixes: 48c6615cc557 ("xfs: grab active perag ref when reading AG headers") Reported-by: kernel test robot <oliver.sang@intel.com> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>
2021-08-20 03:07:49 +08:00
out_free:
if (init_sa)
xchk_ag_free(bs->sc, &bs->sc->sa);
return error;
}
/* Check the owner of a btree block. */
STATIC int
xchk_btree_check_owner(
struct xchk_btree *bs,
int level,
struct xfs_buf *bp)
{
struct xfs_btree_cur *cur = bs->cur;
/*
* In theory, xfs_btree_get_block should only give us a null buffer
* pointer for the root of a root-in-inode btree type, but we need
* to check defensively here in case the cursor state is also screwed
* up.
*/
if (bp == NULL) {
if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE))
xchk_btree_set_corrupt(bs->sc, bs->cur, level);
return 0;
}
/*
* We want to cross-reference each btree block with the bnobt
* and the rmapbt. We cannot cross-reference the bnobt or
* rmapbt while scanning the bnobt or rmapbt, respectively,
* because we cannot alter the cursor and we'd prefer not to
* duplicate cursors. Therefore, save the buffer daddr for
* later scanning.
*/
if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
struct check_owner *co;
co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
if (!co)
return -ENOMEM;
INIT_LIST_HEAD(&co->list);
co->level = level;
co->daddr = xfs_buf_daddr(bp);
list_add_tail(&co->list, &bs->to_check);
return 0;
}
return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp));
}
/* Decide if we want to check minrecs of a btree block in the inode root. */
static inline bool
xchk_btree_check_iroot_minrecs(
struct xchk_btree *bs)
{
/*
* xfs_bmap_add_attrfork_btree had an implementation bug wherein it
* would miscalculate the space required for the data fork bmbt root
* when adding an attr fork, and promote the iroot contents to an
* external block unnecessarily. This went unnoticed for many years
* until scrub found filesystems in this state. Inode rooted btrees are
* not supposed to have immediate child blocks that are small enough
* that the contents could fit in the inode root, but we can't fail
* existing filesystems, so instead we disable the check for data fork
* bmap btrees when there's an attr fork.
*/
if (bs->cur->bc_btnum == XFS_BTNUM_BMAP &&
bs->cur->bc_ino.whichfork == XFS_DATA_FORK &&
xfs_inode_has_attr_fork(bs->sc->ip))
return false;
return true;
}
/*
* Check that this btree block has at least minrecs records or is one of the
* special blocks that don't require that.
*/
STATIC void
xchk_btree_check_minrecs(
struct xchk_btree *bs,
int level,
struct xfs_btree_block *block)
{
struct xfs_btree_cur *cur = bs->cur;
unsigned int root_level = cur->bc_nlevels - 1;
unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
/* More records than minrecs means the block is ok. */
if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
return;
/*
* For btrees rooted in the inode, it's possible that the root block
* contents spilled into a regular ondisk block because there wasn't
* enough space in the inode root. The number of records in that
* child block might be less than the standard minrecs, but that's ok
* provided that there's only one direct child of the root.
*/
if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
level == cur->bc_nlevels - 2) {
struct xfs_btree_block *root_block;
struct xfs_buf *root_bp;
int root_maxrecs;
root_block = xfs_btree_get_block(cur, root_level, &root_bp);
root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
if (xchk_btree_check_iroot_minrecs(bs) &&
(be16_to_cpu(root_block->bb_numrecs) != 1 ||
numrecs <= root_maxrecs))
xchk_btree_set_corrupt(bs->sc, cur, level);
return;
}
/*
* Otherwise, only the root level is allowed to have fewer than minrecs
* records or keyptrs.
*/
if (level < root_level)
xchk_btree_set_corrupt(bs->sc, cur, level);
}
/*
* If this btree block has a parent, make sure that the parent's keys capture
* the keyspace contained in this block.
*/
STATIC void
xchk_btree_block_check_keys(
struct xchk_btree *bs,
int level,
struct xfs_btree_block *block)
{
union xfs_btree_key block_key;
union xfs_btree_key *block_high_key;
union xfs_btree_key *parent_low_key, *parent_high_key;
struct xfs_btree_cur *cur = bs->cur;
struct xfs_btree_block *parent_block;
struct xfs_buf *bp;
if (level == cur->bc_nlevels - 1)
return;
xfs_btree_get_keys(cur, block, &block_key);
/* Make sure the low key of this block matches the parent. */
parent_block = xfs_btree_get_block(cur, level + 1, &bp);
parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
parent_block);
if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, level);
return;
}
if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
return;
/* Make sure the high key of this block matches the parent. */
parent_high_key = xfs_btree_high_key_addr(cur,
cur->bc_levels[level + 1].ptr, parent_block);
block_high_key = xfs_btree_high_key_from_key(cur, &block_key);
if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key))
xchk_btree_set_corrupt(bs->sc, bs->cur, level);
}
/*
* Grab and scrub a btree block given a btree pointer. Returns block
* and buffer pointers (if applicable) if they're ok to use.
*/
STATIC int
xchk_btree_get_block(
struct xchk_btree *bs,
int level,
union xfs_btree_ptr *pp,
struct xfs_btree_block **pblock,
struct xfs_buf **pbp)
{
xfs_failaddr_t failed_at;
int error;
*pblock = NULL;
*pbp = NULL;
error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock);
if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) ||
!*pblock)
return error;
xfs_btree_get_block(bs->cur, level, pbp);
if (bs->cur->bc_flags & XFS_BTREE_LONG_PTRS)
failed_at = __xfs_btree_check_lblock(bs->cur, *pblock,
level, *pbp);
else
failed_at = __xfs_btree_check_sblock(bs->cur, *pblock,
level, *pbp);
if (failed_at) {
xchk_btree_set_corrupt(bs->sc, bs->cur, level);
return 0;
}
if (*pbp)
xchk_buffer_recheck(bs->sc, *pbp);
xchk_btree_check_minrecs(bs, level, *pblock);
/*
* Check the block's owner; this function absorbs error codes
* for us.
*/
error = xchk_btree_check_owner(bs, level, *pbp);
if (error)
return error;
/*
* Check the block's siblings; this function absorbs error codes
* for us.
*/
error = xchk_btree_block_check_siblings(bs, *pblock);
if (error)
return error;
xchk_btree_block_check_keys(bs, level, *pblock);
return 0;
}
/*
* Check that the low and high keys of this block match the keys stored
* in the parent block.
*/
STATIC void
xchk_btree_block_keys(
struct xchk_btree *bs,
int level,
struct xfs_btree_block *block)
{
union xfs_btree_key block_keys;
struct xfs_btree_cur *cur = bs->cur;
union xfs_btree_key *high_bk;
union xfs_btree_key *parent_keys;
union xfs_btree_key *high_pk;
struct xfs_btree_block *parent_block;
struct xfs_buf *bp;
if (level >= cur->bc_nlevels - 1)
return;
/* Calculate the keys for this block. */
xfs_btree_get_keys(cur, block, &block_keys);
/* Obtain the parent's copy of the keys for this block. */
parent_block = xfs_btree_get_block(cur, level + 1, &bp);
parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr,
parent_block);
if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys))
xchk_btree_set_corrupt(bs->sc, cur, 1);
if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
return;
/* Get high keys */
high_bk = xfs_btree_high_key_from_key(cur, &block_keys);
high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr,
parent_block);
if (xfs_btree_keycmp_ne(cur, high_bk, high_pk))
xchk_btree_set_corrupt(bs->sc, cur, 1);
}
/*
* Visit all nodes and leaves of a btree. Check that all pointers and
* records are in order, that the keys reflect the records, and use a callback
* so that the caller can verify individual records.
*/
int
xchk_btree(
struct xfs_scrub *sc,
struct xfs_btree_cur *cur,
xchk_btree_rec_fn scrub_fn,
const struct xfs_owner_info *oinfo,
void *private)
{
union xfs_btree_ptr ptr;
struct xchk_btree *bs;
union xfs_btree_ptr *pp;
union xfs_btree_rec *recp;
struct xfs_btree_block *block;
struct xfs_buf *bp;
struct check_owner *co;
struct check_owner *n;
size_t cur_sz;
int level;
int error = 0;
/*
* Allocate the btree scrub context from the heap, because this
* structure can get rather large. Don't let a caller feed us a
* totally absurd size.
*/
cur_sz = xchk_btree_sizeof(cur->bc_nlevels);
if (cur_sz > PAGE_SIZE) {
xchk_btree_set_corrupt(sc, cur, 0);
return 0;
}
bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
if (!bs)
return -ENOMEM;
bs->cur = cur;
bs->scrub_rec = scrub_fn;
bs->oinfo = oinfo;
bs->private = private;
bs->sc = sc;
/* Initialize scrub state */
INIT_LIST_HEAD(&bs->to_check);
/*
* Load the root of the btree. The helper function absorbs
* error codes for us.
*/
level = cur->bc_nlevels - 1;
cur->bc_ops->init_ptr_from_cur(cur, &ptr);
if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
goto out;
error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
if (error || !block)
goto out;
cur->bc_levels[level].ptr = 1;
while (level < cur->bc_nlevels) {
block = xfs_btree_get_block(cur, level, &bp);
if (level == 0) {
/* End of leaf, pop back towards the root. */
if (cur->bc_levels[level].ptr >
be16_to_cpu(block->bb_numrecs)) {
xchk_btree_block_keys(bs, level, block);
if (level < cur->bc_nlevels - 1)
cur->bc_levels[level + 1].ptr++;
level++;
continue;
}
/* Records in order for scrub? */
xchk_btree_rec(bs);
/* Call out to the record checker. */
recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
block);
error = bs->scrub_rec(bs, recp);
if (error)
break;
if (xchk_should_terminate(sc, &error) ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
break;
cur->bc_levels[level].ptr++;
continue;
}
/* End of node, pop back towards the root. */
if (cur->bc_levels[level].ptr >
be16_to_cpu(block->bb_numrecs)) {
xchk_btree_block_keys(bs, level, block);
if (level < cur->bc_nlevels - 1)
cur->bc_levels[level + 1].ptr++;
level++;
continue;
}
/* Keys in order for scrub? */
xchk_btree_key(bs, level);
/* Drill another level deeper. */
pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
if (!xchk_btree_ptr_ok(bs, level, pp)) {
cur->bc_levels[level].ptr++;
continue;
}
level--;
error = xchk_btree_get_block(bs, level, pp, &block, &bp);
if (error || !block)
goto out;
cur->bc_levels[level].ptr = 1;
}
out:
/* Process deferred owner checks on btree blocks. */
list_for_each_entry_safe(co, n, &bs->to_check, list) {
if (!error && bs->cur)
error = xchk_btree_check_block_owner(bs, co->level,
co->daddr);
list_del(&co->list);
kfree(co);
}
kfree(bs);
return error;
}