xfs: reload the last iunlink item
It turns out that there are some serious bugs in how xfs handles the unlinked inode lists. Way back before 4.14, there was a bug where a ro mount of a dirty filesystem would recover the log bug neglect to purge the unlinked list. This leads to clean unmounted filesystems with unlinked inodes. Starting around 5.15, we also converted the codebase to maintain a doubly-linked incore unlinked list. However, we never provided the ability to load the incore list from disk. If someone tries to allocate an O_TMPFILE file on a clean fs with a pre-existing unlinked list or even deletes a file, the code will fail and the fs shuts down. This first part of the correction effort adds the ability to load the first inode in the bucket when unlinking a file; and to load the next inode in the list when inactivating (freeing) an inode. This has been lightly tested with fstests. Enjoy! Signed-off-by: Darrick J. Wong <djwong@kernel.org> -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQ2qTKExjcn+O1o2YRKO3ySh0YRpgUCZQChOQAKCRBKO3ySh0YR plJvAQC0s843w2nvluXlIE8P9nBqk2ht6zwNOJpiZbWnf0zeLAD/a6v0HVVLbGN5 qHVd/abQ5QIW55Ybm3Qko6PKvV4Nlgo= =WcRN -----END PGP SIGNATURE----- Merge tag 'fix-iunlink-6.6_2023-09-12' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.6-fixesA xfs: reload the last iunlink item It turns out that there are some serious bugs in how xfs handles the unlinked inode lists. Way back before 4.14, there was a bug where a ro mount of a dirty filesystem would recover the log bug neglect to purge the unlinked list. This leads to clean unmounted filesystems with unlinked inodes. Starting around 5.15, we also converted the codebase to maintain a doubly-linked incore unlinked list. However, we never provided the ability to load the incore list from disk. If someone tries to allocate an O_TMPFILE file on a clean fs with a pre-existing unlinked list or even deletes a file, the code will fail and the fs shuts down. This first part of the correction effort adds the ability to load the first inode in the bucket when unlinking a file; and to load the next inode in the list when inactivating (freeing) an inode. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org> * tag 'fix-iunlink-6.6_2023-09-12' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: load uncached unlinked inodes into memory on demand
This commit is contained in:
commit
fffcdcc31f
|
@ -1828,12 +1828,17 @@ xfs_iunlink_lookup(
|
|||
|
||||
rcu_read_lock();
|
||||
ip = radix_tree_lookup(&pag->pag_ici_root, agino);
|
||||
if (!ip) {
|
||||
/* Caller can handle inode not being in memory. */
|
||||
rcu_read_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inode not in memory or in RCU freeing limbo should not happen.
|
||||
* Warn about this and let the caller handle the failure.
|
||||
* Inode in RCU freeing limbo should not happen. Warn about this and
|
||||
* let the caller handle the failure.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!ip || !ip->i_ino)) {
|
||||
if (WARN_ON_ONCE(!ip->i_ino)) {
|
||||
rcu_read_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1842,7 +1847,10 @@ xfs_iunlink_lookup(
|
|||
return ip;
|
||||
}
|
||||
|
||||
/* Update the prev pointer of the next agino. */
|
||||
/*
|
||||
* Update the prev pointer of the next agino. Returns -ENOLINK if the inode
|
||||
* is not in cache.
|
||||
*/
|
||||
static int
|
||||
xfs_iunlink_update_backref(
|
||||
struct xfs_perag *pag,
|
||||
|
@ -1857,7 +1865,8 @@ xfs_iunlink_update_backref(
|
|||
|
||||
ip = xfs_iunlink_lookup(pag, next_agino);
|
||||
if (!ip)
|
||||
return -EFSCORRUPTED;
|
||||
return -ENOLINK;
|
||||
|
||||
ip->i_prev_unlinked = prev_agino;
|
||||
return 0;
|
||||
}
|
||||
|
@ -1901,6 +1910,62 @@ xfs_iunlink_update_bucket(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Load the inode @next_agino into the cache and set its prev_unlinked pointer
|
||||
* to @prev_agino. Caller must hold the AGI to synchronize with other changes
|
||||
* to the unlinked list.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_iunlink_reload_next(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_buf *agibp,
|
||||
xfs_agino_t prev_agino,
|
||||
xfs_agino_t next_agino)
|
||||
{
|
||||
struct xfs_perag *pag = agibp->b_pag;
|
||||
struct xfs_mount *mp = pag->pag_mount;
|
||||
struct xfs_inode *next_ip = NULL;
|
||||
xfs_ino_t ino;
|
||||
int error;
|
||||
|
||||
ASSERT(next_agino != NULLAGINO);
|
||||
|
||||
#ifdef DEBUG
|
||||
rcu_read_lock();
|
||||
next_ip = radix_tree_lookup(&pag->pag_ici_root, next_agino);
|
||||
ASSERT(next_ip == NULL);
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
|
||||
xfs_info_ratelimited(mp,
|
||||
"Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating recovery.",
|
||||
next_agino, pag->pag_agno);
|
||||
|
||||
/*
|
||||
* Use an untrusted lookup just to be cautious in case the AGI has been
|
||||
* corrupted and now points at a free inode. That shouldn't happen,
|
||||
* but we'd rather shut down now since we're already running in a weird
|
||||
* situation.
|
||||
*/
|
||||
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino);
|
||||
error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &next_ip);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* If this is not an unlinked inode, something is very wrong. */
|
||||
if (VFS_I(next_ip)->i_nlink != 0) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto rele;
|
||||
}
|
||||
|
||||
next_ip->i_prev_unlinked = prev_agino;
|
||||
trace_xfs_iunlink_reload_next(next_ip);
|
||||
rele:
|
||||
ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE));
|
||||
xfs_irele(next_ip);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_iunlink_insert_inode(
|
||||
struct xfs_trans *tp,
|
||||
|
@ -1932,6 +1997,8 @@ xfs_iunlink_insert_inode(
|
|||
* inode.
|
||||
*/
|
||||
error = xfs_iunlink_update_backref(pag, agino, next_agino);
|
||||
if (error == -ENOLINK)
|
||||
error = xfs_iunlink_reload_next(tp, agibp, agino, next_agino);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
|
@ -2026,6 +2093,9 @@ xfs_iunlink_remove_inode(
|
|||
*/
|
||||
error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked,
|
||||
ip->i_next_unlinked);
|
||||
if (error == -ENOLINK)
|
||||
error = xfs_iunlink_reload_next(tp, agibp, ip->i_prev_unlinked,
|
||||
ip->i_next_unlinked);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
|
|
|
@ -3824,6 +3824,31 @@ TRACE_EVENT(xfs_iunlink_update_dinode,
|
|||
__entry->new_ptr)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xfs_iunlink_reload_next,
|
||||
TP_PROTO(struct xfs_inode *ip),
|
||||
TP_ARGS(ip),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_agnumber_t, agno)
|
||||
__field(xfs_agino_t, agino)
|
||||
__field(xfs_agino_t, prev_agino)
|
||||
__field(xfs_agino_t, next_agino)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = ip->i_mount->m_super->s_dev;
|
||||
__entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino);
|
||||
__entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino);
|
||||
__entry->prev_agino = ip->i_prev_unlinked;
|
||||
__entry->next_agino = ip->i_next_unlinked;
|
||||
),
|
||||
TP_printk("dev %d:%d agno 0x%x agino 0x%x prev_unlinked 0x%x next_unlinked 0x%x",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->agno,
|
||||
__entry->agino,
|
||||
__entry->prev_agino,
|
||||
__entry->next_agino)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfs_ag_inode_class,
|
||||
TP_PROTO(struct xfs_inode *ip),
|
||||
TP_ARGS(ip),
|
||||
|
|
Loading…
Reference in New Issue