Changes for this release:

- Various cleanups and code fixes
 - Implement lazytime as a mount option
 - Convert various on-disk metadata checks from asserts to -EFSCORRUPTED
 - Fix accounting problems with the rmap per-ag reservations
 - Refactorings and cleanups for xfs_log_force
 - Various bugfixes for the reflink code
 - Work around v5 AGFL padding problems to prevent fs shutdowns
 - Establish inode fork verifiers to inspect on-disk metadata correctness
 - Various online scrub fixes
 - Fix v5 swapext blowing up on deleted inodes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCgAGBQJawZs5AAoJEPh/dxk0SrTrUbAQAKCT0zaYDHViC6p0yxVMTa1z
 7fivnwtKNYc2LiihV6wPp+Hj5YtTGExYncJOLuTsAIuBZ6px+jlV9bpA8X9mWgbN
 e5XXyqz1O8nn/5iBwKRQm2yFdSnsQQfWXNm0XPNTuPGxuzlzxF/rpFN4UlWGdZul
 tigHom5gZD//GYfYHrsOb/7CIRGw90ebpqM3Nt4eAi5o0H5eK46sHKUYtAngSfPm
 FdPHJwmw5Kx+yZW5EdR+ELbLqGsBKsOfsp9SG+un0R+kvj/CKC2ovgwS6tuU+gsi
 MRD8C0zHlz4ikQrmJ0bV+no7T+9bC8fQDIZu0h7dQ1acWb2F1Epr1LRIxNH/1bLi
 qbtchVZkCNXiV0GMQ2iNo1cDJO3AICsQwTuktpoUMU1QOWgQenvzdZCUOQAUqne6
 xwnrCq19UbmNlCdkRWChrVn9Gb7FNYVhe15W/y0qZhzJxWam6yIzKBm91Zc/XLp8
 L5VUc+FVmtSiHXpEVttSwVeMSzhDfG6qOL42dFmw7xwh7JO/vXi0MlxjGe215ApS
 lhBWjEOGB9kbUxMjhqS5KsFn8E1DhL0AMD7N53z7eBTh5Eani81ytf1PzXWhvLbI
 1auY0+7cVggXFltcW6rfAJFC0EEuw6wsx86rl3G+dQ9vmlhy4zaWlt0EJEGmNC90
 Kw4GpFLDmtV93K++lD1C
 =fdIf
 -----END PGP SIGNATURE-----

Merge tag 'xfs-4.17-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Darrick Wong:
 "Here's the first round of fixes for XFS for 4.17.

  The biggest new features this time around are the addition of lazytime
  support, further enhancement of the on-disk inode metadata verifiers,
  and a patch to smooth over some of the AGFL padding problems that have
  intermittently plagued users since 4.5. I forsee sending a second pull
  request next week with further bug fixes and speedups in the online
  scrub code and elsewhere.

  This series has been run through a full xfstests run over the weekend
  and through a quick xfstests run against this morning's master, with
  no major failures reported.

  Summary of changes for this release:

   - Various cleanups and code fixes

   - Implement lazytime as a mount option

   - Convert various on-disk metadata checks from asserts to -EFSCORRUPTED

   - Fix accounting problems with the rmap per-ag reservations

   - Refactorings and cleanups for xfs_log_force

   - Various bugfixes for the reflink code

   - Work around v5 AGFL padding problems to prevent fs shutdowns

   - Establish inode fork verifiers to inspect on-disk metadata
     correctness

   - Various online scrub fixes

   - Fix v5 swapext blowing up on deleted inodes"

* tag 'xfs-4.17-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (49 commits)
  xfs: do not log/recover swapext extent owner changes for deleted inodes
  xfs: clean up xfs_mount allocation and dynamic initializers
  xfs: remove dead inode version setting code
  xfs: catch inode allocation state mismatch corruption
  xfs: xfs_scrub_iallocbt_xref_rmap_inodes should use xref_set_corrupt
  xfs: flag inode corruption if parent ptr doesn't get us a real inode
  xfs: don't accept inode buffers with suspicious unlinked chains
  xfs: move inode extent size hint validation to libxfs
  xfs: record inode buf errors as a xref error in inobt scrubber
  xfs: remove xfs_buf parameter from inode scrub methods
  xfs: inode scrubber shouldn't bother with raw checks
  xfs: bmap scrubber should do rmap xref with bmap for sparse files
  xfs: refactor inode buffer verifier error logging
  xfs: refactor inode verifier error logging
  xfs: refactor bmap record validation
  xfs: sanity-check the unused space before trying to use it
  xfs: detect agfl count corruption and reset agfl
  xfs: unwind the try_again loop in xfs_log_force
  xfs: refactor xfs_log_force_lsn
  xfs: minor cleanup for xfs_reflink_end_cow
  ...
This commit is contained in:
Linus Torvalds 2018-04-04 12:44:02 -07:00
commit 547c43d777
71 changed files with 1386 additions and 1188 deletions

View File

@ -346,9 +346,8 @@ void inc_nlink(struct inode *inode)
}
EXPORT_SYMBOL(inc_nlink);
void address_space_init_once(struct address_space *mapping)
static void __address_space_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
spin_lock_init(&mapping->tree_lock);
init_rwsem(&mapping->i_mmap_rwsem);
@ -356,6 +355,12 @@ void address_space_init_once(struct address_space *mapping)
spin_lock_init(&mapping->private_lock);
mapping->i_mmap = RB_ROOT_CACHED;
}
void address_space_init_once(struct address_space *mapping)
{
memset(mapping, 0, sizeof(*mapping));
__address_space_init_once(mapping);
}
EXPORT_SYMBOL(address_space_init_once);
/*
@ -371,7 +376,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_io_list);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
address_space_init_once(&inode->i_data);
__address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
}
EXPORT_SYMBOL(inode_init_once);
@ -1533,7 +1538,6 @@ retry:
if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
atomic_inc(&inode->i_count);
inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
trace_writeback_lazytime_iput(inode);
mark_inode_dirty_sync(inode);

View File

@ -192,12 +192,8 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
if (!file->f_op->fsync)
return -EINVAL;
if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
spin_lock(&inode->i_lock);
inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
if (!datasync && (inode->i_state & I_DIRTY_TIME))
mark_inode_dirty_sync(inode);
}
return file->f_op->fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);

View File

@ -46,13 +46,13 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
}
void *
kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
kmem_alloc_large(size_t size, xfs_km_flags_t flags)
{
unsigned nofs_flag = 0;
void *ptr;
gfp_t lflags;
ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
ptr = kmem_alloc(size, flags | KM_MAYFAIL);
if (ptr)
return ptr;
@ -67,7 +67,7 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
nofs_flag = memalloc_nofs_save();
lflags = kmem_flags_convert(flags);
ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL);
ptr = __vmalloc(size, lflags, PAGE_KERNEL);
if (flags & KM_NOFS)
memalloc_nofs_restore(nofs_flag);

View File

@ -71,7 +71,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
}
extern void *kmem_alloc(size_t, xfs_km_flags_t);
extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
static inline void kmem_free(const void *ptr)
{
@ -85,6 +85,12 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
return kmem_alloc(size, flags | KM_ZERO);
}
static inline void *
kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
{
return kmem_alloc_large(size, flags | KM_ZERO);
}
/*
* Zone interfaces
*/

View File

@ -95,13 +95,13 @@ xfs_ag_resv_critical(
switch (type) {
case XFS_AG_RESV_METADATA:
avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved;
avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
orig = pag->pag_meta_resv.ar_asked;
break;
case XFS_AG_RESV_AGFL:
case XFS_AG_RESV_RMAPBT:
avail = pag->pagf_freeblks + pag->pagf_flcount -
pag->pag_meta_resv.ar_reserved;
orig = pag->pag_agfl_resv.ar_asked;
orig = pag->pag_rmapbt_resv.ar_asked;
break;
default:
ASSERT(0);
@ -126,10 +126,10 @@ xfs_ag_resv_needed(
{
xfs_extlen_t len;
len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved;
len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
switch (type) {
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
case XFS_AG_RESV_RMAPBT:
len -= xfs_perag_resv(pag, type)->ar_reserved;
break;
case XFS_AG_RESV_NONE:
@ -160,10 +160,11 @@ __xfs_ag_resv_free(
if (pag->pag_agno == 0)
pag->pag_mount->m_ag_max_usable += resv->ar_asked;
/*
* AGFL blocks are always considered "free", so whatever
* was reserved at mount time must be given back at umount.
* RMAPBT blocks come from the AGFL and AGFL blocks are always
* considered "free", so whatever was reserved at mount time must be
* given back at umount.
*/
if (type == XFS_AG_RESV_AGFL)
if (type == XFS_AG_RESV_RMAPBT)
oldresv = resv->ar_orig_reserved;
else
oldresv = resv->ar_reserved;
@ -185,7 +186,7 @@ xfs_ag_resv_free(
int error;
int err2;
error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL);
error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
if (err2 && !error)
error = err2;
@ -284,15 +285,15 @@ xfs_ag_resv_init(
}
}
/* Create the AGFL metadata reservation */
if (pag->pag_agfl_resv.ar_asked == 0) {
/* Create the RMAPBT metadata reservation */
if (pag->pag_rmapbt_resv.ar_asked == 0) {
ask = used = 0;
error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
if (error)
goto out;
}
@ -304,7 +305,7 @@ xfs_ag_resv_init(
return error;
ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
pag->pagf_freeblks + pag->pagf_flcount);
#endif
out:
@ -325,8 +326,10 @@ xfs_ag_resv_alloc_extent(
trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
switch (type) {
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
return;
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
break;
default:
@ -341,7 +344,7 @@ xfs_ag_resv_alloc_extent(
len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
resv->ar_reserved -= len;
if (type == XFS_AG_RESV_AGFL)
if (type == XFS_AG_RESV_RMAPBT)
return;
/* Allocations of reserved blocks only need on-disk sb updates... */
xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
@ -365,8 +368,10 @@ xfs_ag_resv_free_extent(
trace_xfs_ag_resv_free_extent(pag, type, len);
switch (type) {
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
return;
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
break;
default:
@ -379,7 +384,7 @@ xfs_ag_resv_free_extent(
leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
resv->ar_reserved += leftover;
if (type == XFS_AG_RESV_AGFL)
if (type == XFS_AG_RESV_RMAPBT)
return;
/* Freeing into the reserved pool only requires on-disk update... */
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);

View File

@ -32,4 +32,35 @@ void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
struct xfs_trans *tp, xfs_extlen_t len);
/*
* RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from
* the AGFL, they are allocated one at a time and the reservation updates don't
* require a transaction.
*/
static inline void
xfs_ag_resv_rmapbt_alloc(
struct xfs_mount *mp,
xfs_agnumber_t agno)
{
struct xfs_alloc_arg args = {0};
struct xfs_perag *pag;
args.len = 1;
pag = xfs_perag_get(mp, agno);
xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
xfs_perag_put(pag);
}
static inline void
xfs_ag_resv_rmapbt_free(
struct xfs_mount *mp,
xfs_agnumber_t agno)
{
struct xfs_perag *pag;
pag = xfs_perag_get(mp, agno);
xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
xfs_perag_put(pag);
}
#endif /* __XFS_AG_RESV_H__ */

View File

@ -53,6 +53,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
/*
* Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
* the beginning of the block for a proper header with the location information
* and CRC.
*/
unsigned int
xfs_agfl_size(
struct xfs_mount *mp)
{
unsigned int size = mp->m_sb.sb_sectsize;
if (xfs_sb_version_hascrc(&mp->m_sb))
size -= sizeof(struct xfs_agfl);
return size / sizeof(xfs_agblock_t);
}
unsigned int
xfs_refc_block(
struct xfs_mount *mp)
@ -550,7 +567,7 @@ xfs_agfl_verify(
if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
return __this_address;
for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
for (i = 0; i < xfs_agfl_size(mp); i++) {
if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
return __this_address;
@ -1564,7 +1581,6 @@ xfs_alloc_ag_vextent_small(
int *stat) /* status: 0-freelist, 1-normal/none */
{
struct xfs_owner_info oinfo;
struct xfs_perag *pag;
int error;
xfs_agblock_t fbno;
xfs_extlen_t flen;
@ -1616,18 +1632,13 @@ xfs_alloc_ag_vextent_small(
/*
* If we're feeding an AGFL block to something that
* doesn't live in the free space, we need to clear
* out the OWN_AG rmap and add the block back to
* the AGFL per-AG reservation.
* out the OWN_AG rmap.
*/
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
error = xfs_rmap_free(args->tp, args->agbp, args->agno,
fbno, 1, &oinfo);
if (error)
goto error0;
pag = xfs_perag_get(args->mp, args->agno);
xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
args->tp, 1);
xfs_perag_put(pag);
*stat = 0;
return 0;
@ -1911,14 +1922,12 @@ xfs_free_ag_extent(
XFS_STATS_INC(mp, xs_freex);
XFS_STATS_ADD(mp, xs_freeb, len);
trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
haveleft, haveright);
trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
return 0;
error0:
trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
-1, -1);
trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
if (cnt_cur)
@ -2053,6 +2062,93 @@ xfs_alloc_space_available(
return true;
}
/*
* Check the agfl fields of the agf for inconsistency or corruption. The purpose
* is to detect an agfl header padding mismatch between current and early v5
* kernels. This problem manifests as a 1-slot size difference between the
* on-disk flcount and the active [first, last] range of a wrapped agfl. This
* may also catch variants of agfl count corruption unrelated to padding. Either
* way, we'll reset the agfl and warn the user.
*
* Return true if a reset is required before the agfl can be used, false
* otherwise.
*/
static bool
xfs_agfl_needs_reset(
struct xfs_mount *mp,
struct xfs_agf *agf)
{
uint32_t f = be32_to_cpu(agf->agf_flfirst);
uint32_t l = be32_to_cpu(agf->agf_fllast);
uint32_t c = be32_to_cpu(agf->agf_flcount);
int agfl_size = xfs_agfl_size(mp);
int active;
/* no agfl header on v4 supers */
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
/*
* The agf read verifier catches severe corruption of these fields.
* Repeat some sanity checks to cover a packed -> unpacked mismatch if
* the verifier allows it.
*/
if (f >= agfl_size || l >= agfl_size)
return true;
if (c > agfl_size)
return true;
/*
* Check consistency between the on-disk count and the active range. An
* agfl padding mismatch manifests as an inconsistent flcount.
*/
if (c && l >= f)
active = l - f + 1;
else if (c)
active = agfl_size - f + l + 1;
else
active = 0;
return active != c;
}
/*
* Reset the agfl to an empty state. Ignore/drop any existing blocks since the
* agfl content cannot be trusted. Warn the user that a repair is required to
* recover leaked blocks.
*
* The purpose of this mechanism is to handle filesystems affected by the agfl
* header padding mismatch problem. A reset keeps the filesystem online with a
* relatively minor free space accounting inconsistency rather than suffer the
* inevitable crash from use of an invalid agfl block.
*/
static void
xfs_agfl_reset(
struct xfs_trans *tp,
struct xfs_buf *agbp,
struct xfs_perag *pag)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
ASSERT(pag->pagf_agflreset);
trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
xfs_warn(mp,
"WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
"Please unmount and run xfs_repair.",
pag->pag_agno, pag->pagf_flcount);
agf->agf_flfirst = 0;
agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
agf->agf_flcount = 0;
xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
XFS_AGF_FLCOUNT);
pag->pagf_flcount = 0;
pag->pagf_agflreset = false;
}
/*
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
@ -2114,6 +2210,10 @@ xfs_alloc_fix_freelist(
}
}
/* reset a padding mismatched agfl before final free space check */
if (pag->pagf_agflreset)
xfs_agfl_reset(tp, agbp, pag);
/* If there isn't enough total space or single-extent, reject it. */
need = xfs_alloc_min_freelist(mp, pag);
if (!xfs_alloc_space_available(args, need, flags))
@ -2266,10 +2366,11 @@ xfs_alloc_get_freelist(
bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
agf->agf_flfirst = 0;
pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
ASSERT(!pag->pagf_agflreset);
be32_add_cpu(&agf->agf_flcount, -1);
xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
@ -2377,10 +2478,11 @@ xfs_alloc_put_freelist(
be32_to_cpu(agf->agf_seqno), &agflbp)))
return error;
be32_add_cpu(&agf->agf_fllast, 1);
if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
agf->agf_fllast = 0;
pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
ASSERT(!pag->pagf_agflreset);
be32_add_cpu(&agf->agf_flcount, 1);
xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
@ -2395,7 +2497,7 @@ xfs_alloc_put_freelist(
xfs_alloc_log_agf(tp, agbp, logflags);
ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
@ -2428,9 +2530,9 @@ xfs_agf_verify(
if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
return __this_address;
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
@ -2588,6 +2690,7 @@ xfs_alloc_read_agf(
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
pag->pagf_init = 1;
pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
}
#ifdef DEBUG
else if (!XFS_FORCED_SHUTDOWN(mp)) {

View File

@ -26,6 +26,8 @@ struct xfs_trans;
extern struct workqueue_struct *xfs_alloc_wq;
unsigned int xfs_agfl_size(struct xfs_mount *mp);
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
*/

View File

@ -74,18 +74,13 @@ xfs_allocbt_alloc_block(
int error;
xfs_agblock_t bno;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
/* Allocate the new block from the freelist. If we can't, give up. */
error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
&bno, 1);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (error)
return error;
}
if (bno == NULLAGBLOCK) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@ -95,7 +90,6 @@ xfs_allocbt_alloc_block(
xfs_trans_agbtree_delta(cur->bc_tp, 1);
new->s = cpu_to_be32(bno);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}

View File

@ -1244,8 +1244,9 @@ xfs_iread_extents(
xfs_warn(ip->i_mount,
"corrupt dinode %Lu, (btree extents).",
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR(__func__,
XFS_ERRLEVEL_LOW, ip->i_mount, block);
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
__func__, block, sizeof(*block),
__this_address);
error = -EFSCORRUPTED;
goto out_brelse;
}
@ -1261,11 +1262,15 @@ xfs_iread_extents(
*/
frp = XFS_BMBT_REC_ADDR(mp, block, 1);
for (j = 0; j < num_recs; j++, frp++, i++) {
xfs_failaddr_t fa;
xfs_bmbt_disk_get_all(frp, &new);
if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
XFS_ERRLEVEL_LOW, mp);
fa = xfs_bmap_validate_extent(ip, whichfork, &new);
if (fa) {
error = -EFSCORRUPTED;
xfs_inode_verifier_error(ip, error,
"xfs_iread_extents(2)",
frp, sizeof(*frp), fa);
goto out_brelse;
}
xfs_iext_insert(ip, &icur, &new, state);
@ -6154,3 +6159,39 @@ xfs_bmap_finish_one(
return error;
}
/* Check that an inode's extent does not have invalid flags or bad ranges. */
xfs_failaddr_t
xfs_bmap_validate_extent(
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *irec)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fsblock_t endfsb;
bool isrt;
isrt = XFS_IS_REALTIME_INODE(ip);
endfsb = irec->br_startblock + irec->br_blockcount - 1;
if (isrt) {
if (!xfs_verify_rtbno(mp, irec->br_startblock))
return __this_address;
if (!xfs_verify_rtbno(mp, endfsb))
return __this_address;
} else {
if (!xfs_verify_fsbno(mp, irec->br_startblock))
return __this_address;
if (!xfs_verify_fsbno(mp, endfsb))
return __this_address;
if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
XFS_FSB_TO_AGNO(mp, endfsb))
return __this_address;
}
if (irec->br_state != XFS_EXT_NORM) {
if (whichfork != XFS_DATA_FORK)
return __this_address;
if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
return __this_address;
}
return NULL;
}

View File

@ -274,4 +274,7 @@ static inline int xfs_bmap_fork_to_state(int whichfork)
}
}
xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork,
struct xfs_bmbt_irec *irec);
#endif /* __XFS_BMAP_H__ */

View File

@ -272,10 +272,10 @@ xfs_bmbt_alloc_block(
cur->bc_private.b.dfops->dop_low = true;
}
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
ASSERT(args.len == 1);
cur->bc_private.b.firstblock = args.fsbno;
cur->bc_private.b.allocated++;
@ -286,12 +286,10 @@ xfs_bmbt_alloc_block(
new->l = cpu_to_be64(args.fsbno);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}

View File

@ -118,18 +118,4 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
/*
* Check that the extent does not contain an invalid unwritten extent flag.
*/
static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork,
struct xfs_bmbt_irec *irec)
{
if (irec->br_state == XFS_EXT_NORM)
return true;
if (whichfork == XFS_DATA_FORK &&
xfs_sb_version_hasextflgbit(&mp->m_sb))
return true;
return false;
}
#endif /* __XFS_BMAP_BTREE_H__ */

View File

@ -1438,8 +1438,6 @@ xfs_btree_log_keys(
int first,
int last)
{
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
if (bp) {
xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
@ -1450,8 +1448,6 @@ xfs_btree_log_keys(
xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
xfs_ilog_fbroot(cur->bc_private.b.whichfork));
}
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
}
/*
@ -1464,15 +1460,12 @@ xfs_btree_log_recs(
int first,
int last)
{
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_rec_offset(cur, first),
xfs_btree_rec_offset(cur, last + 1) - 1);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
}
/*
@ -1485,8 +1478,6 @@ xfs_btree_log_ptrs(
int first, /* index of first pointer to log */
int last) /* index of last pointer to log */
{
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
if (bp) {
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
@ -1501,7 +1492,6 @@ xfs_btree_log_ptrs(
xfs_ilog_fbroot(cur->bc_private.b.whichfork));
}
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
}
/*
@ -1543,9 +1533,6 @@ xfs_btree_log_block(
XFS_BTREE_LBLOCK_CRC_LEN
};
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
if (bp) {
int nbits;
@ -1573,8 +1560,6 @@ xfs_btree_log_block(
xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
xfs_ilog_fbroot(cur->bc_private.b.whichfork));
}
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
}
/*
@ -1593,9 +1578,6 @@ xfs_btree_increment(
int error; /* error return value */
int lev;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGI(cur, level);
ASSERT(level < cur->bc_nlevels);
/* Read-ahead to the right at this level. */
@ -1671,17 +1653,14 @@ xfs_btree_increment(
cur->bc_ptrs[lev] = 1;
}
out1:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@ -1701,9 +1680,6 @@ xfs_btree_decrement(
int lev;
union xfs_btree_ptr ptr;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGI(cur, level);
ASSERT(level < cur->bc_nlevels);
/* Read-ahead to the left at this level. */
@ -1769,17 +1745,14 @@ xfs_btree_decrement(
cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
}
out1:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@ -1881,9 +1854,6 @@ xfs_btree_lookup(
union xfs_btree_ptr *pp; /* ptr to btree block */
union xfs_btree_ptr ptr; /* ptr to btree block */
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGI(cur, dir);
XFS_BTREE_STATS_INC(cur, lookup);
/* No such thing as a zero-level tree. */
@ -1929,7 +1899,6 @@ xfs_btree_lookup(
ASSERT(level == 0 && cur->bc_nlevels == 1);
cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@ -2004,7 +1973,6 @@ xfs_btree_lookup(
if (error)
goto error0;
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}
@ -2019,11 +1987,9 @@ xfs_btree_lookup(
*stat = 1;
else
*stat = 0;
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@ -2169,10 +2135,8 @@ __xfs_btree_updkeys(
trace_xfs_btree_updkeys(cur, level, bp);
#ifdef DEBUG
error = xfs_btree_check_block(cur, block, level, bp);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (error)
return error;
}
#endif
ptr = cur->bc_ptrs[level];
nlkey = xfs_btree_key_addr(cur, ptr, block);
@ -2224,9 +2188,6 @@ xfs_btree_update_keys(
if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
return __xfs_btree_updkeys(cur, level, block, bp, false);
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
/*
* Go up the tree from this level toward the root.
* At each level, update the key value to the value input.
@ -2241,10 +2202,8 @@ xfs_btree_update_keys(
block = xfs_btree_get_block(cur, level, &bp);
#ifdef DEBUG
error = xfs_btree_check_block(cur, block, level, bp);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (error)
return error;
}
#endif
ptr = cur->bc_ptrs[level];
kp = xfs_btree_key_addr(cur, ptr, block);
@ -2252,7 +2211,6 @@ xfs_btree_update_keys(
xfs_btree_log_keys(cur, bp, ptr, ptr);
}
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
@ -2272,9 +2230,6 @@ xfs_btree_update(
int ptr;
union xfs_btree_rec *rp;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGR(cur, rec);
/* Pick up the current block. */
block = xfs_btree_get_block(cur, 0, &bp);
@ -2307,11 +2262,9 @@ xfs_btree_update(
goto error0;
}
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@ -2339,9 +2292,6 @@ xfs_btree_lshift(
int error; /* error return value */
int i;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGI(cur, level);
if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
level == cur->bc_nlevels - 1)
goto out0;
@ -2500,21 +2450,17 @@ xfs_btree_lshift(
/* Slide the cursor value left one. */
cur->bc_ptrs[level]--;
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
error1:
XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
return error;
}
@ -2541,9 +2487,6 @@ xfs_btree_rshift(
int error; /* error return value */
int i; /* loop counter */
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGI(cur, level);
if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
(level == cur->bc_nlevels - 1))
goto out0;
@ -2676,21 +2619,17 @@ xfs_btree_rshift(
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
error1:
XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
return error;
}
@ -2726,9 +2665,6 @@ __xfs_btree_split(
int i;
#endif
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
XFS_BTREE_STATS_INC(cur, split);
/* Set up left block (current one). */
@ -2878,16 +2814,13 @@ __xfs_btree_split(
(*curp)->bc_ptrs[level + 1]++;
}
*ptrp = rptr;
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@ -2994,7 +2927,6 @@ xfs_btree_new_iroot(
int i; /* loop counter */
#endif
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_STATS_INC(cur, newroot);
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
@ -3008,10 +2940,9 @@ xfs_btree_new_iroot(
error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
if (error)
goto error0;
if (*stat == 0) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
if (*stat == 0)
return 0;
}
XFS_BTREE_STATS_INC(cur, alloc);
/* Copy the root into a real block. */
@ -3074,10 +3005,8 @@ xfs_btree_new_iroot(
*logflags |=
XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
*stat = 1;
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@ -3102,7 +3031,6 @@ xfs_btree_new_root(
union xfs_btree_ptr rptr;
union xfs_btree_ptr lptr;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_STATS_INC(cur, newroot);
/* initialise our start point from the cursor */
@ -3202,14 +3130,11 @@ xfs_btree_new_root(
xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
cur->bc_ptrs[cur->bc_nlevels] = nptr;
cur->bc_nlevels++;
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
out0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@ -3230,7 +3155,7 @@ xfs_btree_make_block_unfull(
if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
level == cur->bc_nlevels - 1) {
struct xfs_inode *ip = cur->bc_private.b.ip;
struct xfs_inode *ip = cur->bc_private.b.ip;
if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
/* A root block that can be made bigger. */
@ -3309,9 +3234,6 @@ xfs_btree_insrec(
#endif
xfs_daddr_t old_bn;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
ncur = NULL;
lkey = &nkey;
@ -3324,14 +3246,12 @@ xfs_btree_insrec(
error = xfs_btree_new_root(cur, stat);
xfs_btree_set_ptr_null(cur, ptrp);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return error;
}
/* If we're off the left edge, return failure. */
ptr = cur->bc_ptrs[level];
if (ptr == 0) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@ -3489,12 +3409,10 @@ xfs_btree_insrec(
*curp = ncur;
}
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@ -3572,11 +3490,9 @@ xfs_btree_insert(
}
} while (!xfs_btree_ptr_is_null(cur, &nptr));
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = i;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@ -3611,8 +3527,6 @@ xfs_btree_kill_iroot(
int i;
#endif
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
ASSERT(cur->bc_nlevels > 1);
@ -3670,19 +3584,15 @@ xfs_btree_kill_iroot(
#ifdef DEBUG
for (i = 0; i < numrecs; i++) {
error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (error)
return error;
}
}
#endif
xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
error = xfs_btree_free_block(cur, cbp);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (error)
return error;
}
cur->bc_bufs[level - 1] = NULL;
be16_add_cpu(&block->bb_level, -1);
@ -3690,7 +3600,6 @@ xfs_btree_kill_iroot(
XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
cur->bc_nlevels--;
out0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
@ -3706,7 +3615,6 @@ xfs_btree_kill_root(
{
int error;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_STATS_INC(cur, killroot);
/*
@ -3716,16 +3624,13 @@ xfs_btree_kill_root(
cur->bc_ops->set_root(cur, newroot, -1);
error = xfs_btree_free_block(cur, bp);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (error)
return error;
}
cur->bc_bufs[level] = NULL;
cur->bc_ra[level] = 0;
cur->bc_nlevels--;
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
@ -3744,7 +3649,6 @@ xfs_btree_dec_cursor(
return error;
}
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}
@ -3780,15 +3684,11 @@ xfs_btree_delrec(
struct xfs_btree_cur *tcur; /* temporary btree cursor */
int numrecs; /* temporary numrec count */
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_TRACE_ARGI(cur, level);
tcur = NULL;
/* Get the index of the entry being deleted, check for nothing there. */
ptr = cur->bc_ptrs[level];
if (ptr == 0) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@ -3805,7 +3705,6 @@ xfs_btree_delrec(
/* Fail if we're off the end of the block. */
if (ptr > numrecs) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@ -4080,7 +3979,7 @@ xfs_btree_delrec(
tcur = NULL;
if (level == 0)
cur->bc_ptrs[0]++;
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}
@ -4250,13 +4149,11 @@ xfs_btree_delrec(
* call updkeys directly.
*/
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
/* Return value means the next level up has something to do. */
*stat = 2;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (tcur)
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
return error;
@ -4277,8 +4174,6 @@ xfs_btree_delete(
int i;
bool joined = false;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
/*
* Go up the tree, starting at leaf level.
*
@ -4314,11 +4209,9 @@ xfs_btree_delete(
}
}
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = i;
return 0;
error0:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}

View File

@ -473,25 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b))
#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
/*
* Trace hooks. Currently not implemented as they need to be ported
* over to the generic tracing functionality, which is some effort.
*
* i,j = integer (32 bit)
* b = btree block buffer (xfs_buf_t)
* p = btree ptr
* r = btree record
* k = btree key
*/
#define XFS_BTREE_TRACE_ARGBI(c, b, i)
#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
#define XFS_BTREE_TRACE_ARGI(c, i)
#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
#define XFS_BTREE_TRACE_ARGIK(c, i, k)
#define XFS_BTREE_TRACE_ARGR(c, r)
#define XFS_BTREE_TRACE_CURSOR(c, t)
xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp,
unsigned int max_recs);

View File

@ -173,7 +173,7 @@ extern void xfs_dir2_data_log_unused(struct xfs_da_args *args,
extern void xfs_dir2_data_make_free(struct xfs_da_args *args,
struct xfs_buf *bp, xfs_dir2_data_aoff_t offset,
xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
extern void xfs_dir2_data_use_free(struct xfs_da_args *args,
extern int xfs_dir2_data_use_free(struct xfs_da_args *args,
struct xfs_buf *bp, struct xfs_dir2_data_unused *dup,
xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
int *needlogp, int *needscanp);

View File

@ -451,15 +451,19 @@ xfs_dir2_block_addname(
* No stale entries, will use enddup space to hold new leaf.
*/
if (!btp->stale) {
xfs_dir2_data_aoff_t aoff;
/*
* Mark the space needed for the new leaf entry, now in use.
*/
xfs_dir2_data_use_free(args, bp, enddup,
(xfs_dir2_data_aoff_t)
((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
sizeof(*blp)),
(xfs_dir2_data_aoff_t)sizeof(*blp),
&needlog, &needscan);
aoff = (xfs_dir2_data_aoff_t)((char *)enddup - (char *)hdr +
be16_to_cpu(enddup->length) - sizeof(*blp));
error = xfs_dir2_data_use_free(args, bp, enddup, aoff,
(xfs_dir2_data_aoff_t)sizeof(*blp), &needlog,
&needscan);
if (error)
return error;
/*
* Update the tail (entry count).
*/
@ -541,9 +545,11 @@ xfs_dir2_block_addname(
/*
* Mark space for the data entry used.
*/
xfs_dir2_data_use_free(args, bp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
error = xfs_dir2_data_use_free(args, bp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
if (error)
return error;
/*
* Create the new data entry.
*/
@ -997,8 +1003,10 @@ xfs_dir2_leaf_to_block(
/*
* Use up the space at the end of the block (blp/btp).
*/
xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size,
&needlog, &needscan);
error = xfs_dir2_data_use_free(args, dbp, dup,
args->geo->blksize - size, size, &needlog, &needscan);
if (error)
return error;
/*
* Initialize the block tail.
*/
@ -1110,18 +1118,14 @@ xfs_dir2_sf_to_block(
* Add block 0 to the inode.
*/
error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
if (error) {
kmem_free(sfp);
return error;
}
if (error)
goto out_free;
/*
* Initialize the data block, then convert it to block format.
*/
error = xfs_dir3_data_init(args, blkno, &bp);
if (error) {
kmem_free(sfp);
return error;
}
if (error)
goto out_free;
xfs_dir3_block_init(mp, tp, bp, dp);
hdr = bp->b_addr;
@ -1136,8 +1140,10 @@ xfs_dir2_sf_to_block(
*/
dup = dp->d_ops->data_unused_p(hdr);
needlog = needscan = 0;
xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
i, &needlog, &needscan);
error = xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
i, &needlog, &needscan);
if (error)
goto out_free;
ASSERT(needscan == 0);
/*
* Fill in the tail.
@ -1150,9 +1156,11 @@ xfs_dir2_sf_to_block(
/*
* Remove the freespace, we'll manage it.
*/
xfs_dir2_data_use_free(args, bp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
be16_to_cpu(dup->length), &needlog, &needscan);
error = xfs_dir2_data_use_free(args, bp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
be16_to_cpu(dup->length), &needlog, &needscan);
if (error)
goto out_free;
/*
* Create entry for .
*/
@ -1256,4 +1264,7 @@ xfs_dir2_sf_to_block(
xfs_dir2_block_log_tail(tp, bp);
xfs_dir3_data_check(dp, bp);
return 0;
out_free:
kmem_free(sfp);
return error;
}

View File

@ -932,10 +932,51 @@ xfs_dir2_data_make_free(
*needscanp = needscan;
}
/* Check our free data for obvious signs of corruption. */
static inline xfs_failaddr_t
xfs_dir2_data_check_free(
struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_data_unused *dup,
xfs_dir2_data_aoff_t offset,
xfs_dir2_data_aoff_t len)
{
if (hdr->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC) &&
hdr->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC) &&
hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) &&
hdr->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
return __this_address;
if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG)
return __this_address;
if (offset < (char *)dup - (char *)hdr)
return __this_address;
if (offset + len > (char *)dup + be16_to_cpu(dup->length) - (char *)hdr)
return __this_address;
if ((char *)dup - (char *)hdr !=
be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)))
return __this_address;
return NULL;
}
/* Sanity-check a new bestfree entry. */
static inline xfs_failaddr_t
xfs_dir2_data_check_new_free(
struct xfs_dir2_data_hdr *hdr,
struct xfs_dir2_data_free *dfp,
struct xfs_dir2_data_unused *newdup)
{
if (dfp == NULL)
return __this_address;
if (dfp->length != newdup->length)
return __this_address;
if (be16_to_cpu(dfp->offset) != (char *)newdup - (char *)hdr)
return __this_address;
return NULL;
}
/*
* Take a byte range out of an existing unused space and make it un-free.
*/
void
int
xfs_dir2_data_use_free(
struct xfs_da_args *args,
struct xfs_buf *bp,
@ -947,23 +988,19 @@ xfs_dir2_data_use_free(
{
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_free_t *dfp; /* bestfree pointer */
xfs_dir2_data_unused_t *newdup; /* new unused entry */
xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
struct xfs_dir2_data_free *bf;
xfs_failaddr_t fa;
int matchback; /* matches end of freespace */
int matchfront; /* matches start of freespace */
int needscan; /* need to regen bestfree */
xfs_dir2_data_unused_t *newdup; /* new unused entry */
xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
int oldlen; /* old unused entry's length */
struct xfs_dir2_data_free *bf;
hdr = bp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
ASSERT(offset >= (char *)dup - (char *)hdr);
ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
fa = xfs_dir2_data_check_free(hdr, dup, offset, len);
if (fa)
goto corrupt;
/*
* Look up the entry in the bestfree table.
*/
@ -1008,9 +1045,9 @@ xfs_dir2_data_use_free(
xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
needlogp);
ASSERT(dfp != NULL);
ASSERT(dfp->length == newdup->length);
ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup);
if (fa)
goto corrupt;
/*
* If we got inserted at the last slot,
* that means we don't know if there was a better
@ -1036,9 +1073,9 @@ xfs_dir2_data_use_free(
xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
needlogp);
ASSERT(dfp != NULL);
ASSERT(dfp->length == newdup->length);
ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup);
if (fa)
goto corrupt;
/*
* If we got inserted at the last slot,
* that means we don't know if there was a better
@ -1084,6 +1121,11 @@ xfs_dir2_data_use_free(
}
}
*needscanp = needscan;
return 0;
corrupt:
xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
hdr, __FILE__, __LINE__, fa);
return -EFSCORRUPTED;
}
/* Find the end of the entry data in a data/block format dir block. */

View File

@ -877,9 +877,13 @@ xfs_dir2_leaf_addname(
/*
* Mark the initial part of our freespace in use for the new entry.
*/
xfs_dir2_data_use_free(args, dbp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
error = xfs_dir2_data_use_free(args, dbp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
length, &needlog, &needscan);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
}
/*
* Initialize our new entry (at last).
*/
@ -1415,7 +1419,8 @@ xfs_dir2_leaf_removename(
oldbest = be16_to_cpu(bf[0].length);
ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
if (be16_to_cpu(bestsp[db]) != oldbest)
return -EFSCORRUPTED;
/*
* Mark the former data entry unused.
*/

View File

@ -387,8 +387,9 @@ xfs_dir2_leaf_to_node(
dp->d_ops->free_hdr_from_disk(&freehdr, free);
leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
ASSERT(be32_to_cpu(ltp->bestcount) <=
(uint)dp->i_d.di_size / args->geo->blksize);
if (be32_to_cpu(ltp->bestcount) >
(uint)dp->i_d.di_size / args->geo->blksize)
return -EFSCORRUPTED;
/*
* Copy freespace entries from the leaf block to the new block.
@ -1728,6 +1729,7 @@ xfs_dir2_node_addname_int(
__be16 *bests;
struct xfs_dir3_icfree_hdr freehdr;
struct xfs_dir2_data_free *bf;
xfs_dir2_data_aoff_t aoff;
dp = args->dp;
mp = dp->i_mount;
@ -2022,9 +2024,13 @@ xfs_dir2_node_addname_int(
/*
* Mark the first part of the unused space, inuse for us.
*/
xfs_dir2_data_use_free(args, dbp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length,
&needlog, &needscan);
if (error) {
xfs_trans_brelse(tp, dbp);
return error;
}
/*
* Fill in the new entry and log it.
*/

View File

@ -803,24 +803,13 @@ typedef struct xfs_agi {
&(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
(__be32 *)(bp)->b_addr)
/*
* Size of the AGFL. For CRC-enabled filesystes we steal a couple of
* slots in the beginning of the block for a proper header with the
* location information and CRC.
*/
#define XFS_AGFL_SIZE(mp) \
(((mp)->m_sb.sb_sectsize - \
(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
sizeof(struct xfs_agfl) : 0)) / \
sizeof(xfs_agblock_t))
typedef struct xfs_agfl {
__be32 agfl_magicnum;
__be32 agfl_seqno;
uuid_t agfl_uuid;
__be64 agfl_lsn;
__be32 agfl_crc;
__be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
__be32 agfl_bno[]; /* actually xfs_agfl_size(mp) */
} __attribute__((packed)) xfs_agfl_t;
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)

View File

@ -93,8 +93,6 @@ __xfs_inobt_alloc_block(
int error; /* error return value */
xfs_agblock_t sbno = be32_to_cpu(start->s);
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
@ -107,17 +105,14 @@ __xfs_inobt_alloc_block(
args.resv = resv;
error = xfs_alloc_vextent(&args);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (error)
return error;
}
if (args.fsbno == NULLFSBLOCK) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
ASSERT(args.len == 1);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
*stat = 1;

View File

@ -93,20 +93,26 @@ xfs_inode_buf_verify(
bool readahead)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
xfs_agnumber_t agno;
int i;
int ni;
/*
* Validate the magic number and version of every inode in the buffer
*/
agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
for (i = 0; i < ni; i++) {
int di_ok;
xfs_dinode_t *dip;
xfs_agino_t unlinked_ino;
dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
xfs_dinode_good_version(mp, dip->di_version);
xfs_dinode_good_version(mp, dip->di_version) &&
(unlinked_ino == NULLAGINO ||
xfs_verify_agino(mp, agno, unlinked_ino));
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP))) {
if (readahead) {
@ -115,16 +121,18 @@ xfs_inode_buf_verify(
return;
}
xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
#ifdef DEBUG
xfs_alert(mp,
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
(unsigned long long)bp->b_bn, i,
be16_to_cpu(dip->di_magic));
#endif
xfs_buf_verifier_error(bp, -EFSCORRUPTED,
__func__, dip, sizeof(*dip),
NULL);
return;
}
}
xfs_inobp_check(mp, bp);
}
@ -564,10 +572,7 @@ xfs_iread(
/* initialise the on-disk inode core */
memset(&ip->i_d, 0, sizeof(ip->i_d));
VFS_I(ip)->i_generation = prandom_u32();
if (xfs_sb_version_hascrc(&mp->m_sb))
ip->i_d.di_version = 3;
else
ip->i_d.di_version = 2;
ip->i_d.di_version = 3;
return 0;
}
@ -649,3 +654,108 @@ xfs_iread(
xfs_trans_brelse(tp, bp);
return error;
}
/*
* Validate di_extsize hint.
*
* The rules are documented at xfs_ioctl_setattr_check_extsize().
* These functions must be kept in sync with each other.
*/
xfs_failaddr_t
xfs_inode_validate_extsize(
struct xfs_mount *mp,
uint32_t extsize,
uint16_t mode,
uint16_t flags)
{
bool rt_flag;
bool hint_flag;
bool inherit_flag;
uint32_t extsize_bytes;
uint32_t blocksize_bytes;
rt_flag = (flags & XFS_DIFLAG_REALTIME);
hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
extsize_bytes = XFS_FSB_TO_B(mp, extsize);
if (rt_flag)
blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
else
blocksize_bytes = mp->m_sb.sb_blocksize;
if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
return __this_address;
if (hint_flag && !S_ISREG(mode))
return __this_address;
if (inherit_flag && !S_ISDIR(mode))
return __this_address;
if ((hint_flag || inherit_flag) && extsize == 0)
return __this_address;
if (!(hint_flag || inherit_flag) && extsize != 0)
return __this_address;
if (extsize_bytes % blocksize_bytes)
return __this_address;
if (extsize > MAXEXTLEN)
return __this_address;
if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
return __this_address;
return NULL;
}
/*
* Validate di_cowextsize hint.
*
* The rules are documented at xfs_ioctl_setattr_check_cowextsize().
* These functions must be kept in sync with each other.
*/
xfs_failaddr_t
xfs_inode_validate_cowextsize(
struct xfs_mount *mp,
uint32_t cowextsize,
uint16_t mode,
uint16_t flags,
uint64_t flags2)
{
bool rt_flag;
bool hint_flag;
uint32_t cowextsize_bytes;
rt_flag = (flags & XFS_DIFLAG_REALTIME);
hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
return __this_address;
if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
return __this_address;
if (hint_flag && cowextsize == 0)
return __this_address;
if (!hint_flag && cowextsize != 0)
return __this_address;
if (hint_flag && rt_flag)
return __this_address;
if (cowextsize_bytes % mp->m_sb.sb_blocksize)
return __this_address;
if (cowextsize > MAXEXTLEN)
return __this_address;
if (cowextsize > mp->m_sb.sb_agblocks / 2)
return __this_address;
return NULL;
}

View File

@ -84,5 +84,10 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_dinode *dip);
xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp,
uint32_t extsize, uint16_t mode, uint16_t flags);
xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
uint32_t cowextsize, uint16_t mode, uint16_t flags,
uint64_t flags2);
#endif /* __XFS_INODE_BUF_H__ */

View File

@ -195,8 +195,9 @@ xfs_iformat_local(
"corrupt inode %Lu (bad size %d for local fork, size = %d).",
(unsigned long long) ip->i_ino, size,
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
ip->i_mount, dip);
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_local", dip, sizeof(*dip),
__this_address);
return -EFSCORRUPTED;
}
@ -231,8 +232,9 @@ xfs_iformat_extents(
if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
(unsigned long long) ip->i_ino, nex);
XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
mp, dip);
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_extents(1)", dip, sizeof(*dip),
__this_address);
return -EFSCORRUPTED;
}
@ -245,10 +247,14 @@ xfs_iformat_extents(
xfs_iext_first(ifp, &icur);
for (i = 0; i < nex; i++, dp++) {
xfs_failaddr_t fa;
xfs_bmbt_disk_get_all(dp, &new);
if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
XFS_ERROR_REPORT("xfs_iformat_extents(2)",
XFS_ERRLEVEL_LOW, mp);
fa = xfs_bmap_validate_extent(ip, whichfork, &new);
if (fa) {
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_extents(2)",
dp, sizeof(*dp), fa);
return -EFSCORRUPTED;
}
@ -305,8 +311,9 @@ xfs_iformat_btree(
level == 0 || level > XFS_BTREE_MAXLEVELS) {
xfs_warn(mp, "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
mp, dip);
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_btree", dfp, size,
__this_address);
return -EFSCORRUPTED;
}
@ -595,7 +602,7 @@ xfs_iextents_copy(
for_each_xfs_iext(ifp, &icur, &rec) {
if (isnullstartblock(rec.br_startblock))
continue;
ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, &rec));
ASSERT(xfs_bmap_validate_extent(ip, whichfork, &rec) == NULL);
xfs_bmbt_disk_set_all(dp, &rec);
trace_xfs_write_extent(ip, &icur, state, _RET_IP_);
copied += sizeof(struct xfs_bmbt_rec);

View File

@ -79,8 +79,6 @@ xfs_refcountbt_alloc_block(
struct xfs_alloc_arg args; /* block allocation args */
int error; /* error return value */
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
@ -98,7 +96,6 @@ xfs_refcountbt_alloc_block(
trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
args.agbno, 1);
if (args.fsbno == NULLFSBLOCK) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@ -109,12 +106,10 @@ xfs_refcountbt_alloc_block(
be32_add_cpu(&agf->agf_refcount_blocks, 1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out_error:
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}

View File

@ -104,20 +104,15 @@ xfs_rmapbt_alloc_block(
int error;
xfs_agblock_t bno;
XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
/* Allocate the new block from the freelist. If we can't, give up. */
error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
&bno, 1);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (error)
return error;
}
trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
bno, 1);
if (bno == NULLAGBLOCK) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@ -130,7 +125,8 @@ xfs_rmapbt_alloc_block(
be32_add_cpu(&agf->agf_rmap_blocks, 1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_private.a.agno);
*stat = 1;
return 0;
}
@ -158,6 +154,8 @@ xfs_rmapbt_free_block(
XFS_EXTENT_BUSY_SKIP_DISCARD);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_private.a.agno);
return 0;
}

View File

@ -731,7 +731,6 @@ xfs_sb_mount_common(
struct xfs_sb *sbp)
{
mp->m_agfrotor = mp->m_agirotor = 0;
spin_lock_init(&mp->m_agirotor_lock);
mp->m_maxagi = mp->m_sb.sb_agcount;
mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;

View File

@ -80,7 +80,7 @@ xfs_scrub_walk_agfl(
}
/* first to the end */
for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) {
for (i = flfirst; i < xfs_agfl_size(mp); i++) {
error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
if (error)
return error;
@ -664,7 +664,7 @@ xfs_scrub_agf(
if (agfl_last > agfl_first)
fl_count = agfl_last - agfl_first + 1;
else
fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1;
fl_count = xfs_agfl_size(mp) - agfl_first + agfl_last + 1;
if (agfl_count != 0 && fl_count != agfl_count)
xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
@ -791,7 +791,7 @@ xfs_scrub_agfl(
/* Allocate buffer to ensure uniqueness of AGFL entries. */
agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
agflcount = be32_to_cpu(agf->agf_flcount);
if (agflcount > XFS_AGFL_SIZE(sc->mp)) {
if (agflcount > xfs_agfl_size(sc->mp)) {
xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
goto out;
}

View File

@ -98,7 +98,7 @@ xfs_scrub_xattr_listent(
if (flags & XFS_ATTR_INCOMPLETE) {
/* Incomplete attr key, just mark the inode for preening. */
xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino, NULL);
xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino);
return;
}

View File

@ -37,6 +37,7 @@
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
#include "xfs_refcount.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
@ -423,6 +424,169 @@ xfs_scrub_bmap_btree(
return error;
}
struct xfs_scrub_bmap_check_rmap_info {
struct xfs_scrub_context *sc;
int whichfork;
struct xfs_iext_cursor icur;
};
/* Can we find bmaps that fit this rmap? */
STATIC int
xfs_scrub_bmap_check_rmap(
struct xfs_btree_cur *cur,
struct xfs_rmap_irec *rec,
void *priv)
{
struct xfs_bmbt_irec irec;
struct xfs_scrub_bmap_check_rmap_info *sbcri = priv;
struct xfs_ifork *ifp;
struct xfs_scrub_context *sc = sbcri->sc;
bool have_map;
/* Is this even the right fork? */
if (rec->rm_owner != sc->ip->i_ino)
return 0;
if ((sbcri->whichfork == XFS_ATTR_FORK) ^
!!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
return 0;
if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
return 0;
/* Now look up the bmbt record. */
ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork);
if (!ifp) {
xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
rec->rm_offset);
goto out;
}
have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
&sbcri->icur, &irec);
if (!have_map)
xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
rec->rm_offset);
/*
* bmap extent record lengths are constrained to 2^21 blocks in length
* because of space constraints in the on-disk metadata structure.
* However, rmap extent record lengths are constrained only by AG
* length, so we have to loop through the bmbt to make sure that the
* entire rmap is covered by bmbt records.
*/
while (have_map) {
if (irec.br_startoff != rec->rm_offset)
xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
rec->rm_offset);
if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
cur->bc_private.a.agno, rec->rm_startblock))
xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
rec->rm_offset);
if (irec.br_blockcount > rec->rm_blockcount)
xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
rec->rm_offset);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
break;
rec->rm_startblock += irec.br_blockcount;
rec->rm_offset += irec.br_blockcount;
rec->rm_blockcount -= irec.br_blockcount;
if (rec->rm_blockcount == 0)
break;
have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
if (!have_map)
xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
rec->rm_offset);
}
out:
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return XFS_BTREE_QUERY_RANGE_ABORT;
return 0;
}
/* Make sure each rmap has a corresponding bmbt entry. */
STATIC int
xfs_scrub_bmap_check_ag_rmaps(
struct xfs_scrub_context *sc,
int whichfork,
xfs_agnumber_t agno)
{
struct xfs_scrub_bmap_check_rmap_info sbcri;
struct xfs_btree_cur *cur;
struct xfs_buf *agf;
int error;
error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf);
if (error)
return error;
cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno);
if (!cur) {
error = -ENOMEM;
goto out_agf;
}
sbcri.sc = sc;
sbcri.whichfork = whichfork;
error = xfs_rmap_query_all(cur, xfs_scrub_bmap_check_rmap, &sbcri);
if (error == XFS_BTREE_QUERY_RANGE_ABORT)
error = 0;
xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
out_agf:
xfs_trans_brelse(sc->tp, agf);
return error;
}
/* Make sure each rmap has a corresponding bmbt entry. */
STATIC int
xfs_scrub_bmap_check_rmaps(
struct xfs_scrub_context *sc,
int whichfork)
{
loff_t size;
xfs_agnumber_t agno;
int error;
if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) ||
whichfork == XFS_COW_FORK ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
return 0;
/* Don't support realtime rmap checks yet. */
if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK)
return 0;
/*
* Only do this for complex maps that are in btree format, or for
* situations where we would seem to have a size but zero extents.
* The inode repair code can zap broken iforks, which means we have
* to flag this bmap as corrupt if there are rmaps that need to be
* reattached.
*/
switch (whichfork) {
case XFS_DATA_FORK:
size = i_size_read(VFS_I(sc->ip));
break;
case XFS_ATTR_FORK:
size = XFS_IFORK_Q(sc->ip);
break;
default:
size = 0;
break;
}
if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE &&
(size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0))
return 0;
for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
error = xfs_scrub_bmap_check_ag_rmaps(sc, whichfork, agno);
if (error)
return error;
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
break;
}
return 0;
}
/*
* Scrub an inode fork's block mappings.
*
@ -457,16 +621,16 @@ xfs_scrub_bmap(
goto out;
/* No CoW forks on non-reflink inodes/filesystems. */
if (!xfs_is_reflink_inode(ip)) {
xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
goto out;
}
break;
case XFS_ATTR_FORK:
if (!ifp)
goto out;
goto out_check_rmap;
if (!xfs_sb_version_hasattr(&mp->m_sb) &&
!xfs_sb_version_hasattr2(&mp->m_sb))
xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
break;
default:
ASSERT(whichfork == XFS_DATA_FORK);
@ -534,6 +698,10 @@ xfs_scrub_bmap(
goto out;
}
out_check_rmap:
error = xfs_scrub_bmap_check_rmaps(sc, whichfork);
if (!xfs_scrub_fblock_xref_process_error(sc, whichfork, 0, &error))
goto out;
out:
return error;
}

View File

@ -213,12 +213,10 @@ xfs_scrub_block_set_preen(
void
xfs_scrub_ino_set_preen(
struct xfs_scrub_context *sc,
xfs_ino_t ino,
struct xfs_buf *bp)
xfs_ino_t ino)
{
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0,
__return_address);
trace_xfs_scrub_ino_preen(sc, ino, __return_address);
}
/* Record a corrupt block. */
@ -249,22 +247,20 @@ xfs_scrub_block_xref_set_corrupt(
void
xfs_scrub_ino_set_corrupt(
struct xfs_scrub_context *sc,
xfs_ino_t ino,
struct xfs_buf *bp)
xfs_ino_t ino)
{
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
trace_xfs_scrub_ino_error(sc, ino, __return_address);
}
/* Record a corruption while cross-referencing with an inode. */
void
xfs_scrub_ino_xref_set_corrupt(
struct xfs_scrub_context *sc,
xfs_ino_t ino,
struct xfs_buf *bp)
xfs_ino_t ino)
{
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
trace_xfs_scrub_ino_error(sc, ino, __return_address);
}
/* Record corruption in a block indexed by a file fork. */
@ -296,12 +292,10 @@ xfs_scrub_fblock_xref_set_corrupt(
void
xfs_scrub_ino_set_warning(
struct xfs_scrub_context *sc,
xfs_ino_t ino,
struct xfs_buf *bp)
xfs_ino_t ino)
{
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0,
__return_address);
trace_xfs_scrub_ino_warning(sc, ino, __return_address);
}
/* Warn about a block indexed by a file fork that needs review. */
@ -619,7 +613,7 @@ xfs_scrub_checkpoint_log(
{
int error;
error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
error = xfs_log_force(mp, XFS_LOG_SYNC);
if (error)
return error;
xfs_ail_push_all_sync(mp->m_ail);

View File

@ -63,25 +63,22 @@ bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc,
void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
struct xfs_buf *bp);
void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
struct xfs_buf *bp);
void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino);
void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc,
struct xfs_buf *bp);
void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
struct xfs_buf *bp);
void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino);
void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
xfs_fileoff_t offset);
void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc,
struct xfs_buf *bp);
void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
struct xfs_buf *bp);
void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc,
xfs_ino_t ino);
void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
int whichfork, xfs_fileoff_t offset);
void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
struct xfs_buf *bp);
void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino);
void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
xfs_fileoff_t offset);

View File

@ -781,7 +781,7 @@ xfs_scrub_directory(
/* Plausible size? */
if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) {
xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
goto out;
}

View File

@ -259,7 +259,8 @@ xfs_scrub_iallocbt_check_freemask(
error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
&dip, &bp, 0, 0);
if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error))
if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 0,
&error))
continue;
/* Which inodes are free? */
@ -433,7 +434,7 @@ xfs_scrub_iallocbt_xref_rmap_inodes(
if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
if (blocks != inode_blocks)
xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
}
/* Scrub the inode btrees for some AG. */

View File

@ -89,67 +89,21 @@ out:
/* Inode core */
/*
* Validate di_extsize hint.
*
* The rules are documented at xfs_ioctl_setattr_check_extsize().
* These functions must be kept in sync with each other.
*/
/* Validate di_extsize hint. */
STATIC void
xfs_scrub_inode_extsize(
struct xfs_scrub_context *sc,
struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino,
uint16_t mode,
uint16_t flags)
{
struct xfs_mount *mp = sc->mp;
bool rt_flag;
bool hint_flag;
bool inherit_flag;
uint32_t extsize;
uint32_t extsize_bytes;
uint32_t blocksize_bytes;
xfs_failaddr_t fa;
rt_flag = (flags & XFS_DIFLAG_REALTIME);
hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
extsize = be32_to_cpu(dip->di_extsize);
extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
if (rt_flag)
blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
else
blocksize_bytes = mp->m_sb.sb_blocksize;
if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
goto bad;
if (hint_flag && !S_ISREG(mode))
goto bad;
if (inherit_flag && !S_ISDIR(mode))
goto bad;
if ((hint_flag || inherit_flag) && extsize == 0)
goto bad;
if (!(hint_flag || inherit_flag) && extsize != 0)
goto bad;
if (extsize_bytes % blocksize_bytes)
goto bad;
if (extsize > MAXEXTLEN)
goto bad;
if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
goto bad;
return;
bad:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
mode, flags);
if (fa)
xfs_scrub_ino_set_corrupt(sc, ino);
}
/*
@ -161,58 +115,25 @@ bad:
STATIC void
xfs_scrub_inode_cowextsize(
struct xfs_scrub_context *sc,
struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino,
uint16_t mode,
uint16_t flags,
uint64_t flags2)
{
struct xfs_mount *mp = sc->mp;
bool rt_flag;
bool hint_flag;
uint32_t extsize;
uint32_t extsize_bytes;
xfs_failaddr_t fa;
rt_flag = (flags & XFS_DIFLAG_REALTIME);
hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
extsize = be32_to_cpu(dip->di_cowextsize);
extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
goto bad;
if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
goto bad;
if (hint_flag && extsize == 0)
goto bad;
if (!hint_flag && extsize != 0)
goto bad;
if (hint_flag && rt_flag)
goto bad;
if (extsize_bytes % mp->m_sb.sb_blocksize)
goto bad;
if (extsize > MAXEXTLEN)
goto bad;
if (extsize > mp->m_sb.sb_agblocks / 2)
goto bad;
return;
bad:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
fa = xfs_inode_validate_cowextsize(sc->mp,
be32_to_cpu(dip->di_cowextsize), mode, flags,
flags2);
if (fa)
xfs_scrub_ino_set_corrupt(sc, ino);
}
/* Make sure the di_flags make sense for the inode. */
STATIC void
xfs_scrub_inode_flags(
struct xfs_scrub_context *sc,
struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino,
uint16_t mode,
@ -251,14 +172,13 @@ xfs_scrub_inode_flags(
return;
bad:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
}
/* Make sure the di_flags2 make sense for the inode. */
STATIC void
xfs_scrub_inode_flags2(
struct xfs_scrub_context *sc,
struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino,
uint16_t mode,
@ -295,14 +215,13 @@ xfs_scrub_inode_flags2(
return;
bad:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
}
/* Scrub all the ondisk inode fields. */
STATIC void
xfs_scrub_dinode(
struct xfs_scrub_context *sc,
struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino)
{
@ -333,7 +252,7 @@ xfs_scrub_dinode(
/* mode is recognized */
break;
default:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
}
@ -344,22 +263,22 @@ xfs_scrub_dinode(
* We autoconvert v1 inodes into v2 inodes on writeout,
* so just mark this inode for preening.
*/
xfs_scrub_ino_set_preen(sc, ino, bp);
xfs_scrub_ino_set_preen(sc, ino);
break;
case 2:
case 3:
if (dip->di_onlink != 0)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
if (dip->di_mode == 0 && sc->ip)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
if (dip->di_projid_hi != 0 &&
!xfs_sb_version_hasprojid32bit(&mp->m_sb))
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
default:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
return;
}
@ -369,40 +288,40 @@ xfs_scrub_dinode(
*/
if (dip->di_uid == cpu_to_be32(-1U) ||
dip->di_gid == cpu_to_be32(-1U))
xfs_scrub_ino_set_warning(sc, ino, bp);
xfs_scrub_ino_set_warning(sc, ino);
/* di_format */
switch (dip->di_format) {
case XFS_DINODE_FMT_DEV:
if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
!S_ISFIFO(mode) && !S_ISSOCK(mode))
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_LOCAL:
if (!S_ISDIR(mode) && !S_ISLNK(mode))
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_EXTENTS:
if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_BTREE:
if (!S_ISREG(mode) && !S_ISDIR(mode))
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_UUID:
default:
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
}
/* di_[amc]time.nsec */
if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
/*
* di_size. xfs_dinode_verify checks for things that screw up
@ -411,19 +330,19 @@ xfs_scrub_dinode(
*/
isize = be64_to_cpu(dip->di_size);
if (isize & (1ULL << 63))
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
/* Devices, fifos, and sockets must have zero size */
if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
/* Directories can't be larger than the data section size (32G) */
if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
/* Symlinks can't be larger than SYMLINK_MAXLEN */
if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
/*
* Warn if the running kernel can't handle the kinds of offsets
@ -432,7 +351,7 @@ xfs_scrub_dinode(
* overly large offsets, flag the inode for admin review.
*/
if (isize >= mp->m_super->s_maxbytes)
xfs_scrub_ino_set_warning(sc, ino, bp);
xfs_scrub_ino_set_warning(sc, ino);
/* di_nblocks */
if (flags2 & XFS_DIFLAG2_REFLINK) {
@ -447,15 +366,15 @@ xfs_scrub_dinode(
*/
if (be64_to_cpu(dip->di_nblocks) >=
mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
} else {
if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
}
xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags);
xfs_scrub_inode_flags(sc, dip, ino, mode, flags);
xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags);
xfs_scrub_inode_extsize(sc, dip, ino, mode, flags);
/* di_nextents */
nextents = be32_to_cpu(dip->di_nextents);
@ -463,31 +382,31 @@ xfs_scrub_dinode(
switch (dip->di_format) {
case XFS_DINODE_FMT_EXTENTS:
if (nextents > fork_recs)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_BTREE:
if (nextents <= fork_recs)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
default:
if (nextents != 0)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
}
/* di_forkoff */
if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
if (dip->di_anextents != 0 && dip->di_forkoff == 0)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
/* di_aformat */
if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
dip->di_aformat != XFS_DINODE_FMT_BTREE)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
/* di_anextents */
nextents = be16_to_cpu(dip->di_anextents);
@ -495,92 +414,26 @@ xfs_scrub_dinode(
switch (dip->di_aformat) {
case XFS_DINODE_FMT_EXTENTS:
if (nextents > fork_recs)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_BTREE:
if (nextents <= fork_recs)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
break;
default:
if (nextents != 0)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
}
if (dip->di_version >= 3) {
if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
xfs_scrub_ino_set_corrupt(sc, ino);
xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2);
xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags,
flags2);
}
}
/* Map and read a raw inode. */
STATIC int
xfs_scrub_inode_map_raw(
struct xfs_scrub_context *sc,
xfs_ino_t ino,
struct xfs_buf **bpp,
struct xfs_dinode **dipp)
{
struct xfs_imap imap;
struct xfs_mount *mp = sc->mp;
struct xfs_buf *bp = NULL;
struct xfs_dinode *dip;
int error;
error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED);
if (error == -EINVAL) {
/*
* Inode could have gotten deleted out from under us;
* just forget about it.
*/
error = -ENOENT;
goto out;
}
if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
XFS_INO_TO_AGBNO(mp, ino), &error))
goto out;
error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp,
NULL);
if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
XFS_INO_TO_AGBNO(mp, ino), &error))
goto out;
/*
* Is this really an inode? We disabled verifiers in the above
* xfs_trans_read_buf call because the inode buffer verifier
* fails on /any/ inode record in the inode cluster with a bad
* magic or version number, not just the one that we're
* checking. Therefore, grab the buffer unconditionally, attach
* the inode verifiers by hand, and run the inode verifier only
* on the one inode we want.
*/
bp->b_ops = &xfs_inode_buf_ops;
dip = xfs_buf_offset(bp, imap.im_boffset);
if (xfs_dinode_verify(mp, ino, dip) != NULL ||
!xfs_dinode_good_version(mp, dip->di_version)) {
xfs_scrub_ino_set_corrupt(sc, ino, bp);
goto out_buf;
}
/* ...and is it the one we asked for? */
if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) {
error = -ENOENT;
goto out_buf;
}
*dipp = dip;
*bpp = bp;
out:
return error;
out_buf:
xfs_trans_brelse(sc->tp, bp);
return error;
}
/*
* Make sure the finobt doesn't think this inode is free.
* We don't have to check the inobt ourselves because we got the inode via
@ -645,18 +498,18 @@ xfs_scrub_inode_xref_bmap(
if (!xfs_scrub_should_check_xref(sc, &error, NULL))
return;
if (nextents < be32_to_cpu(dip->di_nextents))
xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
&nextents, &acount);
if (!xfs_scrub_should_check_xref(sc, &error, NULL))
return;
if (nextents != be16_to_cpu(dip->di_anextents))
xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
/* Check nblocks against the inode. */
if (count + acount != be64_to_cpu(dip->di_nblocks))
xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
}
/* Cross-reference with the other btrees. */
@ -700,8 +553,7 @@ xfs_scrub_inode_xref(
static void
xfs_scrub_inode_check_reflink_iflag(
struct xfs_scrub_context *sc,
xfs_ino_t ino,
struct xfs_buf *bp)
xfs_ino_t ino)
{
struct xfs_mount *mp = sc->mp;
bool has_shared;
@ -716,9 +568,9 @@ xfs_scrub_inode_check_reflink_iflag(
XFS_INO_TO_AGBNO(mp, ino), &error))
return;
if (xfs_is_reflink_inode(sc->ip) && !has_shared)
xfs_scrub_ino_set_preen(sc, ino, bp);
xfs_scrub_ino_set_preen(sc, ino);
else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
xfs_scrub_ino_set_corrupt(sc, ino, bp);
xfs_scrub_ino_set_corrupt(sc, ino);
}
/* Scrub an inode. */
@ -727,43 +579,33 @@ xfs_scrub_inode(
struct xfs_scrub_context *sc)
{
struct xfs_dinode di;
struct xfs_buf *bp = NULL;
struct xfs_dinode *dip;
xfs_ino_t ino;
int error = 0;
/* Did we get the in-core inode, or are we doing this manually? */
if (sc->ip) {
ino = sc->ip->i_ino;
xfs_inode_to_disk(sc->ip, &di, 0);
dip = &di;
} else {
/* Map & read inode. */
ino = sc->sm->sm_ino;
error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip);
if (error || !bp)
goto out;
/*
* If sc->ip is NULL, that means that the setup function called
* xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED
* and a NULL inode, so flag the corruption error and return.
*/
if (!sc->ip) {
xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino);
return 0;
}
xfs_scrub_dinode(sc, bp, dip, ino);
/* Scrub the inode core. */
xfs_inode_to_disk(sc->ip, &di, 0);
xfs_scrub_dinode(sc, &di, sc->ip->i_ino);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out;
/* Now let's do the things that require a live inode. */
if (!sc->ip)
goto out;
/*
* Look for discrepancies between file's data blocks and the reflink
* iflag. We already checked the iflag against the file mode when
* we scrubbed the dinode.
*/
if (S_ISREG(VFS_I(sc->ip)->i_mode))
xfs_scrub_inode_check_reflink_iflag(sc, ino, bp);
xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino);
xfs_scrub_inode_xref(sc, ino, dip);
xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di);
out:
if (bp)
xfs_trans_brelse(sc->tp, bp);
return error;
}

View File

@ -167,8 +167,18 @@ xfs_scrub_parent_validate(
* if the parent pointer erroneously points to a file, we
* can't use DONTCACHE here because DONTCACHE inodes can trigger
* immediate inactive cleanup of the inode.
*
* If _iget returns -EINVAL then the parent inode number is garbage
* and the directory is corrupt. If the _iget returns -EFSCORRUPTED
* or -EFSBADCRC then the parent is corrupt which is a cross
* referencing error. Any other error is an operational error.
*/
error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
if (error == -EINVAL) {
error = -EFSCORRUPTED;
xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
goto out;
}
if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out;
if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {

View File

@ -219,7 +219,7 @@ xfs_scrub_quota(
/* Look for problem extents. */
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
goto out_unlock_inode;
}
max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;

View File

@ -116,8 +116,7 @@ xfs_scrub_xref_is_used_rt_space(
if (!xfs_scrub_should_check_xref(sc, &error, NULL))
goto out_unlock;
if (is_free)
xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino,
NULL);
xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino);
out_unlock:
xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
}

View File

@ -174,53 +174,32 @@ DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error);
DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen);
DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, xfs_daddr_t daddr,
void *ret_ip),
TP_ARGS(sc, ino, daddr, ret_ip),
TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, void *ret_ip),
TP_ARGS(sc, ino, ret_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned int, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, bno)
__field(void *, ret_ip)
),
TP_fast_assign(
xfs_fsblock_t fsbno;
xfs_agnumber_t agno;
xfs_agblock_t bno;
if (daddr) {
fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr);
agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
} else {
agno = XFS_INO_TO_AGNO(sc->mp, ino);
bno = XFS_AGINO_TO_AGBNO(sc->mp,
XFS_INO_TO_AGINO(sc->mp, ino));
}
__entry->dev = sc->mp->m_super->s_dev;
__entry->ino = ino;
__entry->type = sc->sm->sm_type;
__entry->agno = agno;
__entry->bno = bno;
__entry->ret_ip = ret_ip;
),
TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS",
TP_printk("dev %d:%d ino 0x%llx type %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->type,
__entry->agno,
__entry->bno,
__entry->ret_ip)
)
#define DEFINE_SCRUB_INO_ERROR_EVENT(name) \
DEFINE_EVENT(xfs_scrub_ino_error_class, name, \
TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \
xfs_daddr_t daddr, void *ret_ip), \
TP_ARGS(sc, ino, daddr, ret_ip))
void *ret_ip), \
TP_ARGS(sc, ino, ret_ip))
DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error);
DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen);

View File

@ -209,7 +209,8 @@ xfs_setfilesize_trans_alloc(
struct xfs_trans *tp;
int error;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
XFS_TRANS_NOFS, &tp);
if (error)
return error;
@ -1330,21 +1331,20 @@ xfs_get_blocks(
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
&imap, &nimaps, XFS_BMAPI_ENTIRE);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
if (error)
goto out_unlock;
if (nimaps) {
trace_xfs_get_blocks_found(ip, offset, size,
imap.br_state == XFS_EXT_UNWRITTEN ?
XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
xfs_iunlock(ip, lockmode);
} else {
if (!nimaps) {
trace_xfs_get_blocks_notfound(ip, offset, size);
goto out_unlock;
}
trace_xfs_get_blocks_found(ip, offset, size,
imap.br_state == XFS_EXT_UNWRITTEN ?
XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
xfs_iunlock(ip, lockmode);
/* trim mapping down to size requested */
xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);

View File

@ -1208,18 +1208,15 @@ xfs_free_file_space(
/*
* Now that we've unmap all full blocks we'll have to zero out any
* partial block at the beginning and/or end. xfs_zero_range is
* smart enough to skip any holes, including those we just created,
* but we must take care not to zero beyond EOF and enlarge i_size.
* partial block at the beginning and/or end. iomap_zero_range is smart
* enough to skip any holes, including those we just created, but we
* must take care not to zero beyond EOF and enlarge i_size.
*/
if (offset >= XFS_ISIZE(ip))
return 0;
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
return xfs_zero_range(ip, offset, len, NULL);
return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
}
/*
@ -1899,17 +1896,28 @@ xfs_swap_extents(
* performed with log redo items!
*/
if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
int w = XFS_DATA_FORK;
uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w);
uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w);
/*
* Conceptually this shouldn't affect the shape of either
* bmbt, but since we atomically move extents one by one,
* we reserve enough space to rebuild both trees.
* Conceptually this shouldn't affect the shape of either bmbt,
* but since we atomically move extents one by one, we reserve
* enough space to rebuild both trees.
*/
resblks = XFS_SWAP_RMAP_SPACE_RES(mp,
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK),
XFS_DATA_FORK) +
XFS_SWAP_RMAP_SPACE_RES(mp,
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
XFS_DATA_FORK);
resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w);
resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
/*
* Handle the corner case where either inode might straddle the
* btree format boundary. If so, the inode could bounce between
* btree <-> extent format on unmap -> remap cycles, freeing and
* allocating a bmapbt block each time.
*/
if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1))
resblks += XFS_IFORK_MAXEXT(ip, w);
if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1))
resblks += XFS_IFORK_MAXEXT(tip, w);
}
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
if (error)
@ -2003,11 +2011,11 @@ xfs_swap_extents(
ip->i_cowfp = tip->i_cowfp;
tip->i_cowfp = cowfp;
if (ip->i_cowfp && ip->i_cnextents)
if (ip->i_cowfp && ip->i_cowfp->if_bytes)
xfs_inode_set_cowblocks_tag(ip);
else
xfs_inode_clear_cowblocks_tag(ip);
if (tip->i_cowfp && tip->i_cnextents)
if (tip->i_cowfp && tip->i_cowfp->if_bytes)
xfs_inode_set_cowblocks_tag(tip);
else
xfs_inode_clear_cowblocks_tag(tip);

View File

@ -1708,7 +1708,7 @@ xfs_buftarg_isolate(
* zero. If the value is already zero, we need to reclaim the
* buffer, otherwise it gets another trip through the LRU.
*/
if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
spin_unlock(&bp->b_lock);
return LRU_ROTATE;
}

View File

@ -460,7 +460,7 @@ xfs_buf_item_unpin(
list_del_init(&bp->b_li_list);
bp->b_iodone = NULL;
} else {
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bp);
ASSERT(bp->b_log_item == NULL);
@ -1057,12 +1057,12 @@ xfs_buf_do_callbacks_fail(
lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
li_bio_list);
ailp = lip->li_ailp;
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
if (lip->li_ops->iop_error)
lip->li_ops->iop_error(lip, bp);
}
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
}
static bool
@ -1226,7 +1226,7 @@ xfs_buf_iodone(
*
* Either way, AIL is useless if we're forcing a shutdown.
*/
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
xfs_buf_item_free(BUF_ITEM(lip));
}
@ -1246,7 +1246,7 @@ xfs_buf_resubmit_failed_buffers(
/*
* Clear XFS_LI_FAILED flag from all items before resubmit
*
* XFS_LI_FAILED set/clear is protected by xa_lock, caller this
* XFS_LI_FAILED set/clear is protected by ail_lock, caller this
* function already have it acquired
*/
list_for_each_entry(lip, &bp->b_li_list, li_bio_list)

View File

@ -394,8 +394,6 @@ xfs_qm_dqalloc(
error1:
xfs_defer_cancel(&dfops);
error0:
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
return error;
}
@ -920,7 +918,7 @@ xfs_qm_dqflush_done(
(lip->li_flags & XFS_LI_FAILED))) {
/* xfs_trans_ail_delete() drops the AIL lock. */
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
if (lip->li_lsn == qip->qli_flush_lsn) {
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
} else {
@ -930,7 +928,7 @@ xfs_qm_dqflush_done(
*/
if (lip->li_flags & XFS_LI_FAILED)
xfs_clear_li_failed(lip);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
}
}

View File

@ -157,8 +157,9 @@ xfs_dquot_item_error(
STATIC uint
xfs_qm_dquot_logitem_push(
struct xfs_log_item *lip,
struct list_head *buffer_list) __releases(&lip->li_ailp->xa_lock)
__acquires(&lip->li_ailp->xa_lock)
struct list_head *buffer_list)
__releases(&lip->li_ailp->ail_lock)
__acquires(&lip->li_ailp->ail_lock)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
struct xfs_buf *bp = lip->li_buf;
@ -205,7 +206,7 @@ xfs_qm_dquot_logitem_push(
goto out_unlock;
}
spin_unlock(&lip->li_ailp->xa_lock);
spin_unlock(&lip->li_ailp->ail_lock);
error = xfs_qm_dqflush(dqp, &bp);
if (error) {
@ -217,7 +218,7 @@ xfs_qm_dquot_logitem_push(
xfs_buf_relse(bp);
}
spin_lock(&lip->li_ailp->xa_lock);
spin_lock(&lip->li_ailp->ail_lock);
out_unlock:
xfs_dqunlock(dqp);
return rval;
@ -400,7 +401,7 @@ xfs_qm_qoffend_logitem_committed(
* Delete the qoff-start logitem from the AIL.
* xfs_trans_ail_delete() drops the AIL lock.
*/
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
kmem_free(qfs->qql_item.li_lv_shadow);

View File

@ -342,6 +342,43 @@ xfs_corruption_error(
xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
}
/*
* Warnings specifically for verifier errors. Differentiate CRC vs. invalid
* values, and omit the stack trace unless the error level is tuned high.
*/
void
xfs_buf_verifier_error(
struct xfs_buf *bp,
int error,
const char *name,
void *buf,
size_t bufsz,
xfs_failaddr_t failaddr)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
xfs_failaddr_t fa;
int sz;
fa = failaddr ? failaddr : __return_address;
__xfs_buf_ioerror(bp, error, fa);
xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx %s",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
fa, bp->b_ops->name, bp->b_bn, name);
xfs_alert(mp, "Unmount and run xfs_repair");
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz);
xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
sz);
xfs_hex_dump(buf, sz);
}
if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
xfs_stack_trace();
}
/*
* Warnings specifically for verifier errors. Differentiate CRC vs. invalid
* values, and omit the stack trace unless the error level is tuned high.
@ -352,26 +389,8 @@ xfs_verifier_error(
int error,
xfs_failaddr_t failaddr)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
xfs_failaddr_t fa;
fa = failaddr ? failaddr : __return_address;
__xfs_buf_ioerror(bp, error, fa);
xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
fa, bp->b_ops->name, bp->b_bn);
xfs_alert(mp, "Unmount and run xfs_repair");
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
XFS_CORRUPTION_DUMP_LEN);
xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN);
}
if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
xfs_stack_trace();
return xfs_buf_verifier_error(bp, error, "", xfs_buf_offset(bp, 0),
XFS_CORRUPTION_DUMP_LEN, failaddr);
}
/*

View File

@ -26,6 +26,9 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
extern void xfs_corruption_error(const char *tag, int level,
struct xfs_mount *mp, void *p, const char *filename,
int linenum, xfs_failaddr_t failaddr);
extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error,
const char *name, void *buf, size_t bufsz,
xfs_failaddr_t failaddr);
extern void xfs_verifier_error(struct xfs_buf *bp, int error,
xfs_failaddr_t failaddr);
extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,

View File

@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(
if (!lsn)
return 0;
return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
const struct export_operations xfs_export_operations = {

View File

@ -611,10 +611,9 @@ xfs_extent_busy_flush(
unsigned busy_gen)
{
DEFINE_WAIT (wait);
int log_flushed = 0, error;
int error;
trace_xfs_log_force(mp, 0, _THIS_IP_);
error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed);
error = xfs_log_force(mp, XFS_LOG_SYNC);
if (error)
return;

View File

@ -48,20 +48,6 @@
static const struct vm_operations_struct xfs_file_vm_ops;
/*
* Clear the specified ranges to zero through either the pagecache or DAX.
* Holes and unwritten extents will be left as-is as they already are zeroed.
*/
int
xfs_zero_range(
struct xfs_inode *ip,
xfs_off_t pos,
xfs_off_t count,
bool *did_zero)
{
return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
}
int
xfs_update_prealloc_flags(
struct xfs_inode *ip,
@ -122,7 +108,7 @@ xfs_dir_fsync(
if (!lsn)
return 0;
return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
STATIC int
@ -182,7 +168,7 @@ xfs_file_fsync(
}
if (lsn) {
error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
ip->i_itemp->ili_fsync_fields = 0;
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@ -300,31 +286,6 @@ xfs_file_read_iter(
return ret;
}
/*
* Zero any on disk space between the current EOF and the new, larger EOF.
*
* This handles the normal case of zeroing the remainder of the last block in
* the file and the unusual case of zeroing blocks out beyond the size of the
* file. This second case only happens with fixed size extents and when the
* system crashes before the inode size was updated but after blocks were
* allocated.
*
* Expects the iolock to be held exclusive, and will take the ilock internally.
*/
int /* error (positive) */
xfs_zero_eof(
struct xfs_inode *ip,
xfs_off_t offset, /* starting I/O offset */
xfs_fsize_t isize, /* current inode size */
bool *did_zeroing)
{
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(offset > isize);
trace_xfs_zero_eof(ip, isize, offset - isize);
return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
}
/*
* Common pre-write limit and setup checks.
*
@ -344,6 +305,7 @@ xfs_file_aio_write_checks(
ssize_t error = 0;
size_t count = iov_iter_count(from);
bool drained_dio = false;
loff_t isize;
restart:
error = generic_write_checks(iocb, from);
@ -380,7 +342,8 @@ restart:
* and hence be able to correctly determine if we need to run zeroing.
*/
spin_lock(&ip->i_flags_lock);
if (iocb->ki_pos > i_size_read(inode)) {
isize = i_size_read(inode);
if (iocb->ki_pos > isize) {
spin_unlock(&ip->i_flags_lock);
if (!drained_dio) {
if (*iolock == XFS_IOLOCK_SHARED) {
@ -401,7 +364,10 @@ restart:
drained_dio = true;
goto restart;
}
error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
NULL, &xfs_iomap_ops);
if (error)
return error;
} else

View File

@ -217,7 +217,7 @@ xfs_growfs_data_private(
}
agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(bp);

View File

@ -483,7 +483,28 @@ xfs_iget_cache_miss(
trace_xfs_iget_miss(ip);
if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
/*
* If we are allocating a new inode, then check what was returned is
* actually a free, empty inode. If we are not allocating an inode,
* the check we didn't find a free inode.
*/
if (flags & XFS_IGET_CREATE) {
if (VFS_I(ip)->i_mode != 0) {
xfs_warn(mp,
"Corruption detected! Free inode 0x%llx not marked free on disk",
ino);
error = -EFSCORRUPTED;
goto out_destroy;
}
if (ip->i_d.di_nblocks != 0) {
xfs_warn(mp,
"Corruption detected! Free inode 0x%llx has blocks allocated!",
ino);
error = -EFSCORRUPTED;
goto out_destroy;
}
} else if (VFS_I(ip)->i_mode == 0) {
error = -ENOENT;
goto out_destroy;
}

View File

@ -1872,6 +1872,7 @@ xfs_inactive(
xfs_inode_t *ip)
{
struct xfs_mount *mp;
struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
int error;
int truncate = 0;
@ -1892,6 +1893,10 @@ xfs_inactive(
if (mp->m_flags & XFS_MOUNT_RDONLY)
return;
/* Try to clean out the cow blocks if there are any. */
if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0)
xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
if (VFS_I(ip)->i_nlink != 0) {
/*
* force is true because we are evicting an inode from the
@ -2470,6 +2475,10 @@ xfs_ifree(
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
/* Don't attempt to replay owner changes for a deleted inode */
ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER);
/*
* Bump the generation count so no one will be confused
* by reincarnations of this inode.
@ -2497,7 +2506,7 @@ xfs_iunpin(
trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
/* Give the log a push to start the unpinning I/O */
xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
}

View File

@ -443,10 +443,6 @@ enum xfs_prealloc_flags {
int xfs_update_prealloc_flags(struct xfs_inode *ip,
enum xfs_prealloc_flags flags);
int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
xfs_fsize_t isize, bool *did_zeroing);
int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
bool *did_zero);
/* from xfs_iops.c */
extern void xfs_setup_inode(struct xfs_inode *ip);

View File

@ -502,8 +502,8 @@ STATIC uint
xfs_inode_item_push(
struct xfs_log_item *lip,
struct list_head *buffer_list)
__releases(&lip->li_ailp->xa_lock)
__acquires(&lip->li_ailp->xa_lock)
__releases(&lip->li_ailp->ail_lock)
__acquires(&lip->li_ailp->ail_lock)
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
@ -562,7 +562,7 @@ xfs_inode_item_push(
ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
spin_unlock(&lip->li_ailp->xa_lock);
spin_unlock(&lip->li_ailp->ail_lock);
error = xfs_iflush(ip, &bp);
if (!error) {
@ -571,7 +571,7 @@ xfs_inode_item_push(
xfs_buf_relse(bp);
}
spin_lock(&lip->li_ailp->xa_lock);
spin_lock(&lip->li_ailp->ail_lock);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return rval;
@ -579,9 +579,6 @@ out_unlock:
/*
* Unlock the inode associated with the inode log item.
* Clear the fields of the inode and inode log item that
* are specific to the current transaction. If the
* hold flags is set, do not unlock the inode.
*/
STATIC void
xfs_inode_item_unlock(
@ -637,10 +634,6 @@ xfs_inode_item_committed(
return lsn;
}
/*
* XXX rcc - this one really has to do something. Probably needs
* to stamp in a new field in the incore inode.
*/
STATIC void
xfs_inode_item_committing(
struct xfs_log_item *lip,
@ -759,7 +752,7 @@ xfs_iflush_done(
bool mlip_changed = false;
/* this is an opencoded batch version of xfs_trans_ail_delete */
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
list_for_each_entry(blip, &tmp, li_bio_list) {
if (INODE_ITEM(blip)->ili_logged &&
blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
@ -770,15 +763,15 @@ xfs_iflush_done(
}
if (mlip_changed) {
if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
xlog_assign_tail_lsn_locked(ailp->xa_mount);
if (list_empty(&ailp->xa_ail))
wake_up_all(&ailp->xa_empty);
if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
xlog_assign_tail_lsn_locked(ailp->ail_mount);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
}
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
if (mlip_changed)
xfs_log_space_wake(ailp->xa_mount);
xfs_log_space_wake(ailp->ail_mount);
}
/*

View File

@ -46,6 +46,7 @@
#include <linux/security.h>
#include <linux/iomap.h>
#include <linux/slab.h>
#include <linux/iversion.h>
/*
* Directories have different lock order w.r.t. mmap_sem compared to regular
@ -874,7 +875,9 @@ xfs_setattr_size(
* truncate.
*/
if (newsize > oldsize) {
error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
error = iomap_zero_range(inode, oldsize, newsize - oldsize,
&did_zeroing, &xfs_iomap_ops);
} else {
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_iomap_ops);
@ -1052,11 +1055,21 @@ xfs_vn_update_time(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
int log_flags = XFS_ILOG_TIMESTAMP;
struct xfs_trans *tp;
int error;
trace_xfs_update_time(ip);
if (inode->i_sb->s_flags & SB_LAZYTIME) {
if (!((flags & S_VERSION) &&
inode_maybe_inc_iversion(inode, false)))
return generic_update_time(inode, now, flags);
/* Capture the iversion update that just occurred */
log_flags |= XFS_ILOG_CORE;
}
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
if (error)
return error;
@ -1070,7 +1083,7 @@ xfs_vn_update_time(
inode->i_atime = *now;
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
xfs_trans_log_inode(tp, ip, log_flags);
return xfs_trans_commit(tp);
}

View File

@ -869,7 +869,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
return 0;
}
error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
error = xfs_log_force(mp, XFS_LOG_SYNC);
ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
#ifdef DEBUG
@ -1149,7 +1149,7 @@ xlog_assign_tail_lsn_locked(
struct xfs_log_item *lip;
xfs_lsn_t tail_lsn;
assert_spin_locked(&mp->m_ail->xa_lock);
assert_spin_locked(&mp->m_ail->ail_lock);
/*
* To make sure we always have a valid LSN for the log tail we keep
@ -1172,9 +1172,9 @@ xlog_assign_tail_lsn(
{
xfs_lsn_t tail_lsn;
spin_lock(&mp->m_ail->xa_lock);
spin_lock(&mp->m_ail->ail_lock);
tail_lsn = xlog_assign_tail_lsn_locked(mp);
spin_unlock(&mp->m_ail->xa_lock);
spin_unlock(&mp->m_ail->ail_lock);
return tail_lsn;
}
@ -3304,129 +3304,178 @@ xlog_state_switch_iclogs(
* not in the active nor dirty state.
*/
int
_xfs_log_force(
xfs_log_force(
struct xfs_mount *mp,
uint flags,
int *log_flushed)
uint flags)
{
struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
xfs_lsn_t lsn;
XFS_STATS_INC(mp, xs_log_force);
trace_xfs_log_force(mp, 0, _RET_IP_);
xlog_cil_force(log);
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
return -EIO;
}
if (iclog->ic_state & XLOG_STATE_IOERROR)
goto out_error;
/* If the head iclog is not active nor dirty, we just attach
* ourselves to the head and go to sleep.
*/
if (iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY) {
if (iclog->ic_state == XLOG_STATE_DIRTY ||
(iclog->ic_state == XLOG_STATE_ACTIVE &&
atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) {
/*
* If the head is dirty or (active and empty), then
* we need to look at the previous iclog. If the previous
* iclog is active or dirty we are done. There is nothing
* to sync out. Otherwise, we attach ourselves to the
* If the head is dirty or (active and empty), then we need to
* look at the previous iclog.
*
* If the previous iclog is active or dirty we are done. There
* is nothing to sync out. Otherwise, we attach ourselves to the
* previous iclog and go to sleep.
*/
if (iclog->ic_state == XLOG_STATE_DIRTY ||
(atomic_read(&iclog->ic_refcnt) == 0
&& iclog->ic_offset == 0)) {
iclog = iclog->ic_prev;
if (iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)
goto no_sleep;
else
goto maybe_sleep;
} else {
if (atomic_read(&iclog->ic_refcnt) == 0) {
/* We are the only one with access to this
* iclog. Flush it out now. There should
* be a roundoff of zero to show that someone
* has already taken care of the roundoff from
* the previous sync.
*/
atomic_inc(&iclog->ic_refcnt);
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
xlog_state_switch_iclogs(log, iclog, 0);
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
return -EIO;
if (log_flushed)
*log_flushed = 1;
spin_lock(&log->l_icloglock);
if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
iclog->ic_state != XLOG_STATE_DIRTY)
goto maybe_sleep;
else
goto no_sleep;
} else {
/* Someone else is writing to this iclog.
* Use its call to flush out the data. However,
* the other thread may not force out this LR,
* so we mark it WANT_SYNC.
*/
xlog_state_switch_iclogs(log, iclog, 0);
goto maybe_sleep;
}
}
}
/* By the time we come around again, the iclog could've been filled
* which would give it another lsn. If we have a new lsn, just
* return because the relevant data has been flushed.
*/
maybe_sleep:
if (flags & XFS_LOG_SYNC) {
/*
* We must check if we're shutting down here, before
* we wait, while we're holding the l_icloglock.
* Then we check again after waking up, in case our
* sleep was disturbed by a bad news.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
iclog = iclog->ic_prev;
if (iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)
goto out_unlock;
} else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
if (atomic_read(&iclog->ic_refcnt) == 0) {
/*
* We are the only one with access to this iclog.
*
* Flush it out now. There should be a roundoff of zero
* to show that someone has already taken care of the
* roundoff from the previous sync.
*/
atomic_inc(&iclog->ic_refcnt);
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
xlog_state_switch_iclogs(log, iclog, 0);
spin_unlock(&log->l_icloglock);
return -EIO;
}
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
* and the memory read should be atomic.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
return -EIO;
} else {
no_sleep:
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
return -EIO;
spin_lock(&log->l_icloglock);
if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
iclog->ic_state == XLOG_STATE_DIRTY)
goto out_unlock;
} else {
/*
* Someone else is writing to this iclog.
*
* Use its call to flush out the data. However, the
* other thread may not force out this LR, so we mark
* it WANT_SYNC.
*/
xlog_state_switch_iclogs(log, iclog, 0);
}
} else {
/*
* If the head iclog is not active nor dirty, we just attach
* ourselves to the head and go to sleep if necessary.
*/
;
}
if (!(flags & XFS_LOG_SYNC))
goto out_unlock;
if (iclog->ic_state & XLOG_STATE_IOERROR)
goto out_error;
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
if (iclog->ic_state & XLOG_STATE_IOERROR)
return -EIO;
return 0;
out_unlock:
spin_unlock(&log->l_icloglock);
return 0;
out_error:
spin_unlock(&log->l_icloglock);
return -EIO;
}
/*
* Wrapper for _xfs_log_force(), to be used when caller doesn't care
* about errors or whether the log was flushed or not. This is the normal
* interface to use when trying to unpin items or move the log forward.
*/
void
xfs_log_force(
xfs_mount_t *mp,
uint flags)
static int
__xfs_log_force_lsn(
struct xfs_mount *mp,
xfs_lsn_t lsn,
uint flags,
int *log_flushed,
bool already_slept)
{
trace_xfs_log_force(mp, 0, _RET_IP_);
_xfs_log_force(mp, flags, NULL);
struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
if (iclog->ic_state & XLOG_STATE_IOERROR)
goto out_error;
while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
iclog = iclog->ic_next;
if (iclog == log->l_iclog)
goto out_unlock;
}
if (iclog->ic_state == XLOG_STATE_DIRTY)
goto out_unlock;
if (iclog->ic_state == XLOG_STATE_ACTIVE) {
/*
* We sleep here if we haven't already slept (e.g. this is the
* first time we've looked at the correct iclog buf) and the
* buffer before us is going to be sync'ed. The reason for this
* is that if we are doing sync transactions here, by waiting
* for the previous I/O to complete, we can allow a few more
* transactions into this iclog before we close it down.
*
* Otherwise, we mark the buffer WANT_SYNC, and bump up the
* refcnt so we can release the log (which drops the ref count).
* The state switch keeps new transaction commits from using
* this buffer. When the current commits finish writing into
* the buffer, the refcount will drop to zero and the buffer
* will go out then.
*/
if (!already_slept &&
(iclog->ic_prev->ic_state &
(XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_prev->ic_write_wait,
&log->l_icloglock);
return -EAGAIN;
}
atomic_inc(&iclog->ic_refcnt);
xlog_state_switch_iclogs(log, iclog, 0);
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
return -EIO;
if (log_flushed)
*log_flushed = 1;
spin_lock(&log->l_icloglock);
}
if (!(flags & XFS_LOG_SYNC) ||
(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
goto out_unlock;
if (iclog->ic_state & XLOG_STATE_IOERROR)
goto out_error;
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
if (iclog->ic_state & XLOG_STATE_IOERROR)
return -EIO;
return 0;
out_unlock:
spin_unlock(&log->l_icloglock);
return 0;
out_error:
spin_unlock(&log->l_icloglock);
return -EIO;
}
/*
@ -3438,135 +3487,32 @@ xfs_log_force(
* state and go to sleep or return.
* If it is in any other state, go to sleep or return.
*
* Synchronous forces are implemented with a signal variable. All callers
* to force a given lsn to disk will wait on a the sv attached to the
* specific in-core log. When given in-core log finally completes its
* write to disk, that thread will wake up all threads waiting on the
* sv.
* Synchronous forces are implemented with a wait queue. All callers trying
* to force a given lsn to disk must wait on the queue attached to the
* specific in-core log. When given in-core log finally completes its write
* to disk, that thread will wake up all threads waiting on the queue.
*/
int
_xfs_log_force_lsn(
xfs_log_force_lsn(
struct xfs_mount *mp,
xfs_lsn_t lsn,
uint flags,
int *log_flushed)
{
struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
int already_slept = 0;
int ret;
ASSERT(lsn != 0);
XFS_STATS_INC(mp, xs_log_force);
trace_xfs_log_force(mp, lsn, _RET_IP_);
lsn = xlog_cil_force_lsn(log, lsn);
lsn = xlog_cil_force_lsn(mp->m_log, lsn);
if (lsn == NULLCOMMITLSN)
return 0;
try_again:
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
return -EIO;
}
do {
if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
iclog = iclog->ic_next;
continue;
}
if (iclog->ic_state == XLOG_STATE_DIRTY) {
spin_unlock(&log->l_icloglock);
return 0;
}
if (iclog->ic_state == XLOG_STATE_ACTIVE) {
/*
* We sleep here if we haven't already slept (e.g.
* this is the first time we've looked at the correct
* iclog buf) and the buffer before us is going to
* be sync'ed. The reason for this is that if we
* are doing sync transactions here, by waiting for
* the previous I/O to complete, we can allow a few
* more transactions into this iclog before we close
* it down.
*
* Otherwise, we mark the buffer WANT_SYNC, and bump
* up the refcnt so we can release the log (which
* drops the ref count). The state switch keeps new
* transaction commits from using this buffer. When
* the current commits finish writing into the buffer,
* the refcount will drop to zero and the buffer will
* go out then.
*/
if (!already_slept &&
(iclog->ic_prev->ic_state &
(XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_prev->ic_write_wait,
&log->l_icloglock);
already_slept = 1;
goto try_again;
}
atomic_inc(&iclog->ic_refcnt);
xlog_state_switch_iclogs(log, iclog, 0);
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
return -EIO;
if (log_flushed)
*log_flushed = 1;
spin_lock(&log->l_icloglock);
}
if ((flags & XFS_LOG_SYNC) && /* sleep */
!(iclog->ic_state &
(XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
/*
* Don't wait on completion if we know that we've
* gotten a log write error.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
return -EIO;
}
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
* and the memory read should be atomic.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
return -EIO;
} else { /* just return */
spin_unlock(&log->l_icloglock);
}
return 0;
} while (iclog != log->l_iclog);
spin_unlock(&log->l_icloglock);
return 0;
}
/*
* Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
* about errors or whether the log was flushed or not. This is the normal
* interface to use when trying to unpin items or move the log forward.
*/
void
xfs_log_force_lsn(
xfs_mount_t *mp,
xfs_lsn_t lsn,
uint flags)
{
trace_xfs_log_force(mp, lsn, _RET_IP_);
_xfs_log_force_lsn(mp, lsn, flags, NULL);
ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
if (ret == -EAGAIN)
ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
return ret;
}
/*
@ -4035,7 +3981,7 @@ xfs_log_force_umount(
* to guarantee this.
*/
if (!logerror)
_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
xfs_log_force(mp, XFS_LOG_SYNC);
/*
* mark the filesystem and the as in a shutdown state and wake

View File

@ -129,18 +129,9 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
bool regrant);
int _xfs_log_force(struct xfs_mount *mp,
uint flags,
int *log_forced);
void xfs_log_force(struct xfs_mount *mp,
uint flags);
int _xfs_log_force_lsn(struct xfs_mount *mp,
xfs_lsn_t lsn,
uint flags,
int *log_forced);
void xfs_log_force_lsn(struct xfs_mount *mp,
xfs_lsn_t lsn,
uint flags);
int xfs_log_force(struct xfs_mount *mp, uint flags);
int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,
int *log_forced);
int xfs_log_mount(struct xfs_mount *mp,
struct xfs_buftarg *log_target,
xfs_daddr_t start_block,

View File

@ -202,7 +202,7 @@ xlog_cil_alloc_shadow_bufs(
*/
kmem_free(lip->li_lv_shadow);
lv = kmem_alloc(buf_size, KM_SLEEP|KM_NOFS);
lv = kmem_alloc_large(buf_size, KM_SLEEP | KM_NOFS);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip;

View File

@ -3173,13 +3173,6 @@ xlog_recover_inode_pass2(
/* recover the log dinode inode into the on disk inode */
xfs_log_dinode_to_disk(ldip, dip);
/* the rest is in on-disk format */
if (item->ri_buf[1].i_len > isize) {
memcpy((char *)dip + isize,
item->ri_buf[1].i_addr + isize,
item->ri_buf[1].i_len - isize);
}
fields = in_f->ilf_fields;
if (fields & XFS_ILOG_DEV)
xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
@ -3252,7 +3245,9 @@ xlog_recover_inode_pass2(
}
out_owner_change:
if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER))
/* Recover the swapext owner change unless inode has been deleted */
if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
(dip->di_mode != 0))
error = xfs_recover_inode_owner_change(mp, dip, in_f,
buffer_list);
/* re-generate the checksum. */
@ -3434,7 +3429,7 @@ xlog_recover_efi_pass2(
}
atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
spin_lock(&log->l_ailp->xa_lock);
spin_lock(&log->l_ailp->ail_lock);
/*
* The EFI has two references. One for the EFD and one for EFI to ensure
* it makes it into the AIL. Insert the EFI into the AIL directly and
@ -3477,7 +3472,7 @@ xlog_recover_efd_pass2(
* Search for the EFI with the id in the EFD format structure in the
* AIL.
*/
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_EFI) {
@ -3487,9 +3482,9 @@ xlog_recover_efd_pass2(
* Drop the EFD reference to the EFI. This
* removes the EFI from the AIL and frees it.
*/
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
xfs_efi_release(efip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
break;
}
}
@ -3497,7 +3492,7 @@ xlog_recover_efd_pass2(
}
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
return 0;
}
@ -3530,7 +3525,7 @@ xlog_recover_rui_pass2(
}
atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
spin_lock(&log->l_ailp->xa_lock);
spin_lock(&log->l_ailp->ail_lock);
/*
* The RUI has two references. One for the RUD and one for RUI to ensure
* it makes it into the AIL. Insert the RUI into the AIL directly and
@ -3570,7 +3565,7 @@ xlog_recover_rud_pass2(
* Search for the RUI with the id in the RUD format structure in the
* AIL.
*/
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_RUI) {
@ -3580,9 +3575,9 @@ xlog_recover_rud_pass2(
* Drop the RUD reference to the RUI. This
* removes the RUI from the AIL and frees it.
*/
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
xfs_rui_release(ruip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
break;
}
}
@ -3590,7 +3585,7 @@ xlog_recover_rud_pass2(
}
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
return 0;
}
@ -3646,7 +3641,7 @@ xlog_recover_cui_pass2(
}
atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
spin_lock(&log->l_ailp->xa_lock);
spin_lock(&log->l_ailp->ail_lock);
/*
* The CUI has two references. One for the CUD and one for CUI to ensure
* it makes it into the AIL. Insert the CUI into the AIL directly and
@ -3687,7 +3682,7 @@ xlog_recover_cud_pass2(
* Search for the CUI with the id in the CUD format structure in the
* AIL.
*/
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_CUI) {
@ -3697,9 +3692,9 @@ xlog_recover_cud_pass2(
* Drop the CUD reference to the CUI. This
* removes the CUI from the AIL and frees it.
*/
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
xfs_cui_release(cuip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
break;
}
}
@ -3707,7 +3702,7 @@ xlog_recover_cud_pass2(
}
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
return 0;
}
@ -3765,7 +3760,7 @@ xlog_recover_bui_pass2(
}
atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
spin_lock(&log->l_ailp->xa_lock);
spin_lock(&log->l_ailp->ail_lock);
/*
* The RUI has two references. One for the RUD and one for RUI to ensure
* it makes it into the AIL. Insert the RUI into the AIL directly and
@ -3806,7 +3801,7 @@ xlog_recover_bud_pass2(
* Search for the BUI with the id in the BUD format structure in the
* AIL.
*/
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_BUI) {
@ -3816,9 +3811,9 @@ xlog_recover_bud_pass2(
* Drop the BUD reference to the BUI. This
* removes the BUI from the AIL and frees it.
*/
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
xfs_bui_release(buip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
break;
}
}
@ -3826,7 +3821,7 @@ xlog_recover_bud_pass2(
}
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
return 0;
}
@ -4659,9 +4654,9 @@ xlog_recover_process_efi(
if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
return 0;
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
error = xfs_efi_recover(mp, efip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
return error;
}
@ -4677,9 +4672,9 @@ xlog_recover_cancel_efi(
efip = container_of(lip, struct xfs_efi_log_item, efi_item);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
xfs_efi_release(efip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
}
/* Recover the RUI if necessary. */
@ -4699,9 +4694,9 @@ xlog_recover_process_rui(
if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
return 0;
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
error = xfs_rui_recover(mp, ruip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
return error;
}
@ -4717,9 +4712,9 @@ xlog_recover_cancel_rui(
ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
xfs_rui_release(ruip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
}
/* Recover the CUI if necessary. */
@ -4740,9 +4735,9 @@ xlog_recover_process_cui(
if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
return 0;
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
error = xfs_cui_recover(mp, cuip, dfops);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
return error;
}
@ -4758,9 +4753,9 @@ xlog_recover_cancel_cui(
cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
xfs_cui_release(cuip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
}
/* Recover the BUI if necessary. */
@ -4781,9 +4776,9 @@ xlog_recover_process_bui(
if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
return 0;
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
error = xfs_bui_recover(mp, buip, dfops);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
return error;
}
@ -4799,9 +4794,9 @@ xlog_recover_cancel_bui(
buip = container_of(lip, struct xfs_bui_log_item, bui_item);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
xfs_bui_release(buip);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
}
/* Is this log item a deferred action intent? */
@ -4889,7 +4884,7 @@ xlog_recover_process_intents(
#endif
ailp = log->l_ailp;
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
@ -4943,7 +4938,7 @@ xlog_recover_process_intents(
}
out:
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
if (error)
xfs_defer_cancel(&dfops);
else
@ -4966,7 +4961,7 @@ xlog_recover_cancel_intents(
struct xfs_ail *ailp;
ailp = log->l_ailp;
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
/*
@ -5000,7 +4995,7 @@ xlog_recover_cancel_intents(
}
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
return error;
}
@ -5127,16 +5122,9 @@ xlog_recover_process_iunlinks(
xfs_agino_t agino;
int bucket;
int error;
uint mp_dmevmask;
mp = log->l_mp;
/*
* Prevent any DMAPI event from being sent while in this function.
*/
mp_dmevmask = mp->m_dmevmask;
mp->m_dmevmask = 0;
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
/*
* Find the agi for this ag.
@ -5172,8 +5160,6 @@ xlog_recover_process_iunlinks(
}
xfs_buf_rele(agibp);
}
mp->m_dmevmask = mp_dmevmask;
}
STATIC int

View File

@ -803,8 +803,6 @@ xfs_mountfs(
get_unaligned_be16(&sbp->sb_uuid.b[4]);
mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
mp->m_dmevmask = 0; /* not persistent; set after each mount */
error = xfs_da_mount(mp);
if (error) {
xfs_warn(mp, "Failed dir/attr init: %d", error);
@ -819,8 +817,6 @@ xfs_mountfs(
/*
* Allocate and initialize the per-ag data.
*/
spin_lock_init(&mp->m_perag_lock);
INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
if (error) {
xfs_warn(mp, "Failed per-ag init: %d", error);

View File

@ -138,7 +138,6 @@ typedef struct xfs_mount {
spinlock_t m_perag_lock; /* lock for m_perag_tree */
struct mutex m_growlock; /* growfs mutex */
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_dmevmask; /* DMI events for this FS */
uint64_t m_flags; /* global mount flags */
bool m_inotbt_nores; /* no per-AG finobt resv. */
int m_ialloc_inos; /* inodes in inode allocation */
@ -326,8 +325,9 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
/* per-AG block reservation data structures*/
enum xfs_ag_resv_type {
XFS_AG_RESV_NONE = 0,
XFS_AG_RESV_METADATA,
XFS_AG_RESV_AGFL,
XFS_AG_RESV_METADATA,
XFS_AG_RESV_RMAPBT,
};
struct xfs_ag_resv {
@ -353,6 +353,7 @@ typedef struct xfs_perag {
char pagi_inodeok; /* The agi is ok for inodes */
uint8_t pagf_levels[XFS_BTNUM_AGF];
/* # of levels in bno & cnt btree */
bool pagf_agflreset; /* agfl requires reset before use */
uint32_t pagf_flcount; /* count of blocks in freelist */
xfs_extlen_t pagf_freeblks; /* total free blocks */
xfs_extlen_t pagf_longest; /* longest free space */
@ -391,8 +392,8 @@ typedef struct xfs_perag {
/* Blocks reserved for all kinds of metadata. */
struct xfs_ag_resv pag_meta_resv;
/* Blocks reserved for just AGFL-based metadata. */
struct xfs_ag_resv pag_agfl_resv;
/* Blocks reserved for the reverse mapping btree. */
struct xfs_ag_resv pag_rmapbt_resv;
/* reference count */
uint8_t pagf_refcount_level;
@ -406,8 +407,8 @@ xfs_perag_resv(
switch (type) {
case XFS_AG_RESV_METADATA:
return &pag->pag_meta_resv;
case XFS_AG_RESV_AGFL:
return &pag->pag_agfl_resv;
case XFS_AG_RESV_RMAPBT:
return &pag->pag_rmapbt_resv;
default:
return NULL;
}

View File

@ -394,7 +394,7 @@ xfs_reflink_allocate_cow(
retry:
ASSERT(xfs_is_reflink_inode(ip));
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
* Even if the extent is not shared we might have a preallocation for
@ -668,7 +668,7 @@ xfs_reflink_cancel_cow_range(
/* Start a rolling transaction to remove the mappings */
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
0, 0, 0, &tp);
0, 0, XFS_TRANS_NOFS, &tp);
if (error)
goto out;
@ -741,7 +741,7 @@ xfs_reflink_end_cow(
(unsigned int)(end_fsb - offset_fsb),
XFS_DATA_FORK);
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
resblks, 0, XFS_TRANS_RESERVE, &tp);
resblks, 0, XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
if (error)
goto out;
@ -762,10 +762,8 @@ xfs_reflink_end_cow(
xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
/* Extent delete may have bumped ext forward */
if (!del.br_blockcount) {
xfs_iext_prev(ifp, &icur);
goto next_extent;
}
if (!del.br_blockcount)
goto prev_extent;
ASSERT(!isnullstartblock(got.br_startblock));
@ -774,10 +772,8 @@ xfs_reflink_end_cow(
* speculatively preallocated CoW extents that have been
* allocated but have not yet been involved in a write.
*/
if (got.br_state == XFS_EXT_UNWRITTEN) {
xfs_iext_prev(ifp, &icur);
goto next_extent;
}
if (got.br_state == XFS_EXT_UNWRITTEN)
goto prev_extent;
/* Unmap the old blocks in the data fork. */
xfs_defer_init(&dfops, &firstfsb);
@ -816,9 +812,12 @@ xfs_reflink_end_cow(
error = xfs_defer_finish(&tp, &dfops);
if (error)
goto out_defer;
next_extent:
if (!xfs_iext_get_extent(ifp, &icur, &got))
break;
continue;
prev_extent:
if (!xfs_iext_prev_extent(ifp, &icur, &got))
break;
}
error = xfs_trans_commit(tp);
@ -1061,7 +1060,7 @@ xfs_reflink_ag_has_free_space(
return 0;
pag = xfs_perag_get(mp, agno);
if (xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) ||
if (xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) ||
xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA))
error = -ENOSPC;
xfs_perag_put(pag);

View File

@ -972,7 +972,6 @@ xfs_fs_destroy_inode(
struct inode *inode)
{
struct xfs_inode *ip = XFS_I(inode);
int error;
trace_xfs_destroy_inode(ip);
@ -980,14 +979,6 @@ xfs_fs_destroy_inode(
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
if (xfs_is_reflink_inode(ip)) {
error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
xfs_warn(ip->i_mount,
"Error %d while evicting CoW blocks for inode %llu.",
error, ip->i_ino);
}
xfs_inactive(ip);
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
@ -1009,6 +1000,28 @@ xfs_fs_destroy_inode(
xfs_inode_set_reclaim_tag(ip);
}
static void
xfs_fs_dirty_inode(
struct inode *inode,
int flag)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
if (!(inode->i_sb->s_flags & SB_LAZYTIME))
return;
if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
return;
if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
return;
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
xfs_trans_commit(tp);
}
/*
* Slab object creation initialisation for the XFS inode.
* This covers only the idempotent fields in the XFS inode;
@ -1566,6 +1579,31 @@ xfs_destroy_percpu_counters(
percpu_counter_destroy(&mp->m_fdblocks);
}
static struct xfs_mount *
xfs_mount_alloc(
struct super_block *sb)
{
struct xfs_mount *mp;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
return NULL;
mp->m_super = sb;
spin_lock_init(&mp->m_sb_lock);
spin_lock_init(&mp->m_agirotor_lock);
INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
spin_lock_init(&mp->m_perag_lock);
mutex_init(&mp->m_growlock);
atomic_set(&mp->m_active_trans, 0);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
mp->m_kobj.kobject.kset = xfs_kset;
return mp;
}
STATIC int
xfs_fs_fill_super(
struct super_block *sb,
@ -1576,19 +1614,13 @@ xfs_fs_fill_super(
struct xfs_mount *mp = NULL;
int flags = 0, error = -ENOMEM;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
/*
* allocate mp and do all low-level struct initializations before we
* attach it to the super
*/
mp = xfs_mount_alloc(sb);
if (!mp)
goto out;
spin_lock_init(&mp->m_sb_lock);
mutex_init(&mp->m_growlock);
atomic_set(&mp->m_active_trans, 0);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
mp->m_kobj.kobject.kset = xfs_kset;
mp->m_super = sb;
sb->s_fs_info = mp;
error = xfs_parseargs(mp, (char *)data);
@ -1789,6 +1821,7 @@ xfs_fs_free_cached_objects(
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
.dirty_inode = xfs_fs_dirty_inode,
.drop_inode = xfs_fs_drop_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,

View File

@ -1477,7 +1477,7 @@ TRACE_EVENT(xfs_extent_busy_trim,
__entry->tlen)
);
TRACE_EVENT(xfs_agf,
DECLARE_EVENT_CLASS(xfs_agf_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
unsigned long caller_ip),
TP_ARGS(mp, agf, flags, caller_ip),
@ -1533,6 +1533,13 @@ TRACE_EVENT(xfs_agf,
__entry->longest,
(void *)__entry->caller_ip)
);
#define DEFINE_AGF_EVENT(name) \
DEFINE_EVENT(xfs_agf_class, name, \
TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, \
unsigned long caller_ip), \
TP_ARGS(mp, agf, flags, caller_ip))
DEFINE_AGF_EVENT(xfs_agf);
DEFINE_AGF_EVENT(xfs_agfl_reset);
TRACE_EVENT(xfs_free_extent,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,

View File

@ -119,8 +119,11 @@ xfs_trans_dup(
/* We gave our writer reference to the new transaction */
tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
ASSERT(tp->t_blk_res >= tp->t_blk_res_used);
ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
tp->t_blk_res = tp->t_blk_res_used;
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
tp->t_rtx_res = tp->t_rtx_res_used;
ntp->t_pflags = tp->t_pflags;
@ -344,13 +347,14 @@ xfs_trans_mod_sb(
break;
case XFS_TRANS_SB_FDBLOCKS:
/*
* Track the number of blocks allocated in the
* transaction. Make sure it does not exceed the
* number reserved.
* Track the number of blocks allocated in the transaction.
* Make sure it does not exceed the number reserved. If so,
* shutdown as this can lead to accounting inconsistency.
*/
if (delta < 0) {
tp->t_blk_res_used += (uint)-delta;
ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
if (tp->t_blk_res_used > tp->t_blk_res)
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
tp->t_fdblocks_delta += delta;
if (xfs_sb_version_haslazysbcount(&mp->m_sb))
@ -803,8 +807,8 @@ xfs_log_item_batch_insert(
{
int i;
spin_lock(&ailp->xa_lock);
/* xfs_trans_ail_update_bulk drops ailp->xa_lock */
spin_lock(&ailp->ail_lock);
/* xfs_trans_ail_update_bulk drops ailp->ail_lock */
xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
for (i = 0; i < nr_items; i++) {
@ -847,9 +851,9 @@ xfs_trans_committed_bulk(
struct xfs_ail_cursor cur;
int i = 0;
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
/* unpin all the log items */
for (lv = log_vector; lv; lv = lv->lv_next ) {
@ -869,7 +873,7 @@ xfs_trans_committed_bulk(
* object into the AIL as we are in a shutdown situation.
*/
if (aborted) {
ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
lip->li_ops->iop_unpin(lip, 1);
continue;
}
@ -883,11 +887,11 @@ xfs_trans_committed_bulk(
* not affect the AIL cursor the bulk insert path is
* using.
*/
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
xfs_trans_ail_update(ailp, lip, item_lsn);
else
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
lip->li_ops->iop_unpin(lip, 0);
continue;
}
@ -905,9 +909,9 @@ xfs_trans_committed_bulk(
if (i)
xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
}
/*
@ -966,7 +970,7 @@ __xfs_trans_commit(
* log out now and wait for it.
*/
if (sync) {
error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
XFS_STATS_INC(mp, xs_trans_sync);
} else {
XFS_STATS_INC(mp, xs_trans_async);

View File

@ -40,7 +40,7 @@ xfs_ail_check(
{
xfs_log_item_t *prev_lip;
if (list_empty(&ailp->xa_ail))
if (list_empty(&ailp->ail_head))
return;
/*
@ -48,11 +48,11 @@ xfs_ail_check(
*/
ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
if (&prev_lip->li_ail != &ailp->xa_ail)
if (&prev_lip->li_ail != &ailp->ail_head)
ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
if (&prev_lip->li_ail != &ailp->xa_ail)
if (&prev_lip->li_ail != &ailp->ail_head)
ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
@ -69,10 +69,10 @@ static xfs_log_item_t *
xfs_ail_max(
struct xfs_ail *ailp)
{
if (list_empty(&ailp->xa_ail))
if (list_empty(&ailp->ail_head))
return NULL;
return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail);
return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail);
}
/*
@ -84,7 +84,7 @@ xfs_ail_next(
struct xfs_ail *ailp,
xfs_log_item_t *lip)
{
if (lip->li_ail.next == &ailp->xa_ail)
if (lip->li_ail.next == &ailp->ail_head)
return NULL;
return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
@ -105,11 +105,11 @@ xfs_ail_min_lsn(
xfs_lsn_t lsn = 0;
xfs_log_item_t *lip;
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
lip = xfs_ail_min(ailp);
if (lip)
lsn = lip->li_lsn;
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
return lsn;
}
@ -124,11 +124,11 @@ xfs_ail_max_lsn(
xfs_lsn_t lsn = 0;
xfs_log_item_t *lip;
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
lip = xfs_ail_max(ailp);
if (lip)
lsn = lip->li_lsn;
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
return lsn;
}
@ -146,7 +146,7 @@ xfs_trans_ail_cursor_init(
struct xfs_ail_cursor *cur)
{
cur->item = NULL;
list_add_tail(&cur->list, &ailp->xa_cursors);
list_add_tail(&cur->list, &ailp->ail_cursors);
}
/*
@ -194,7 +194,7 @@ xfs_trans_ail_cursor_clear(
{
struct xfs_ail_cursor *cur;
list_for_each_entry(cur, &ailp->xa_cursors, list) {
list_for_each_entry(cur, &ailp->ail_cursors, list) {
if (cur->item == lip)
cur->item = (struct xfs_log_item *)
((uintptr_t)cur->item | 1);
@ -222,7 +222,7 @@ xfs_trans_ail_cursor_first(
goto out;
}
list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
list_for_each_entry(lip, &ailp->ail_head, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
goto out;
}
@ -241,7 +241,7 @@ __xfs_trans_ail_cursor_last(
{
xfs_log_item_t *lip;
list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
return lip;
}
@ -310,7 +310,7 @@ xfs_ail_splice(
if (lip)
list_splice(list, &lip->li_ail);
else
list_splice(list, &ailp->xa_ail);
list_splice(list, &ailp->ail_head);
}
/*
@ -335,17 +335,17 @@ xfsaild_push_item(
* If log item pinning is enabled, skip the push and track the item as
* pinned. This can help induce head-behind-tail conditions.
*/
if (XFS_TEST_ERROR(false, ailp->xa_mount, XFS_ERRTAG_LOG_ITEM_PIN))
if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
return XFS_ITEM_PINNED;
return lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
}
static long
xfsaild_push(
struct xfs_ail *ailp)
{
xfs_mount_t *mp = ailp->xa_mount;
xfs_mount_t *mp = ailp->ail_mount;
struct xfs_ail_cursor cur;
xfs_log_item_t *lip;
xfs_lsn_t lsn;
@ -360,30 +360,30 @@ xfsaild_push(
* buffers the last time we ran, force the log first and wait for it
* before pushing again.
*/
if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 &&
(!list_empty_careful(&ailp->xa_buf_list) ||
if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 &&
(!list_empty_careful(&ailp->ail_buf_list) ||
xfs_ail_min_lsn(ailp))) {
ailp->xa_log_flush = 0;
ailp->ail_log_flush = 0;
XFS_STATS_INC(mp, xs_push_ail_flush);
xfs_log_force(mp, XFS_LOG_SYNC);
}
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
/* barrier matches the xa_target update in xfs_ail_push() */
/* barrier matches the ail_target update in xfs_ail_push() */
smp_rmb();
target = ailp->xa_target;
ailp->xa_target_prev = target;
target = ailp->ail_target;
ailp->ail_target_prev = target;
lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
if (!lip) {
/*
* If the AIL is empty or our push has reached the end we are
* done now.
*/
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
goto out_done;
}
@ -404,7 +404,7 @@ xfsaild_push(
XFS_STATS_INC(mp, xs_push_ail_success);
trace_xfs_ail_push(lip);
ailp->xa_last_pushed_lsn = lsn;
ailp->ail_last_pushed_lsn = lsn;
break;
case XFS_ITEM_FLUSHING:
@ -423,7 +423,7 @@ xfsaild_push(
trace_xfs_ail_flushing(lip);
flushing++;
ailp->xa_last_pushed_lsn = lsn;
ailp->ail_last_pushed_lsn = lsn;
break;
case XFS_ITEM_PINNED:
@ -431,7 +431,7 @@ xfsaild_push(
trace_xfs_ail_pinned(lip);
stuck++;
ailp->xa_log_flush++;
ailp->ail_log_flush++;
break;
case XFS_ITEM_LOCKED:
XFS_STATS_INC(mp, xs_push_ail_locked);
@ -468,10 +468,10 @@ xfsaild_push(
lsn = lip->li_lsn;
}
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))
ailp->xa_log_flush++;
if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
ailp->ail_log_flush++;
if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
out_done:
@ -481,7 +481,7 @@ out_done:
* AIL before we start the next scan from the start of the AIL.
*/
tout = 50;
ailp->xa_last_pushed_lsn = 0;
ailp->ail_last_pushed_lsn = 0;
} else if (((stuck + flushing) * 100) / count > 90) {
/*
* Either there is a lot of contention on the AIL or we are
@ -494,7 +494,7 @@ out_done:
* the restart to issue a log force to unpin the stuck items.
*/
tout = 20;
ailp->xa_last_pushed_lsn = 0;
ailp->ail_last_pushed_lsn = 0;
} else {
/*
* Assume we have more work to do in a short while.
@ -536,26 +536,26 @@ xfsaild(
break;
}
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
/*
* Idle if the AIL is empty and we are not racing with a target
* update. We check the AIL after we set the task to a sleep
* state to guarantee that we either catch an xa_target update
* state to guarantee that we either catch an ail_target update
* or that a wake_up resets the state to TASK_RUNNING.
* Otherwise, we run the risk of sleeping indefinitely.
*
* The barrier matches the xa_target update in xfs_ail_push().
* The barrier matches the ail_target update in xfs_ail_push().
*/
smp_rmb();
if (!xfs_ail_min(ailp) &&
ailp->xa_target == ailp->xa_target_prev) {
spin_unlock(&ailp->xa_lock);
ailp->ail_target == ailp->ail_target_prev) {
spin_unlock(&ailp->ail_lock);
freezable_schedule();
tout = 0;
continue;
}
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
if (tout)
freezable_schedule_timeout(msecs_to_jiffies(tout));
@ -592,8 +592,8 @@ xfs_ail_push(
xfs_log_item_t *lip;
lip = xfs_ail_min(ailp);
if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0)
return;
/*
@ -601,10 +601,10 @@ xfs_ail_push(
* the XFS_AIL_PUSHING_BIT.
*/
smp_wmb();
xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);
smp_wmb();
wake_up_process(ailp->xa_task);
wake_up_process(ailp->ail_task);
}
/*
@ -630,18 +630,18 @@ xfs_ail_push_all_sync(
struct xfs_log_item *lip;
DEFINE_WAIT(wait);
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
while ((lip = xfs_ail_max(ailp)) != NULL) {
prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE);
ailp->xa_target = lip->li_lsn;
wake_up_process(ailp->xa_task);
spin_unlock(&ailp->xa_lock);
prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
ailp->ail_target = lip->li_lsn;
wake_up_process(ailp->ail_task);
spin_unlock(&ailp->ail_lock);
schedule();
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
}
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
finish_wait(&ailp->xa_empty, &wait);
finish_wait(&ailp->ail_empty, &wait);
}
/*
@ -672,7 +672,7 @@ xfs_trans_ail_update_bulk(
struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items,
int nr_items,
xfs_lsn_t lsn) __releases(ailp->xa_lock)
xfs_lsn_t lsn) __releases(ailp->ail_lock)
{
xfs_log_item_t *mlip;
int mlip_changed = 0;
@ -705,13 +705,13 @@ xfs_trans_ail_update_bulk(
xfs_ail_splice(ailp, cur, &tmp, lsn);
if (mlip_changed) {
if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
xlog_assign_tail_lsn_locked(ailp->xa_mount);
spin_unlock(&ailp->xa_lock);
if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
xlog_assign_tail_lsn_locked(ailp->ail_mount);
spin_unlock(&ailp->ail_lock);
xfs_log_space_wake(ailp->xa_mount);
xfs_log_space_wake(ailp->ail_mount);
} else {
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
}
}
@ -756,13 +756,13 @@ void
xfs_trans_ail_delete(
struct xfs_ail *ailp,
struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->xa_lock)
int shutdown_type) __releases(ailp->ail_lock)
{
struct xfs_mount *mp = ailp->xa_mount;
struct xfs_mount *mp = ailp->ail_mount;
bool mlip_changed;
if (!(lip->li_flags & XFS_LI_IN_AIL)) {
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
"%s: attempting to delete a log item that is not in the AIL",
@ -776,13 +776,13 @@ xfs_trans_ail_delete(
if (mlip_changed) {
if (!XFS_FORCED_SHUTDOWN(mp))
xlog_assign_tail_lsn_locked(mp);
if (list_empty(&ailp->xa_ail))
wake_up_all(&ailp->xa_empty);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
}
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
if (mlip_changed)
xfs_log_space_wake(ailp->xa_mount);
xfs_log_space_wake(ailp->ail_mount);
}
int
@ -795,16 +795,16 @@ xfs_trans_ail_init(
if (!ailp)
return -ENOMEM;
ailp->xa_mount = mp;
INIT_LIST_HEAD(&ailp->xa_ail);
INIT_LIST_HEAD(&ailp->xa_cursors);
spin_lock_init(&ailp->xa_lock);
INIT_LIST_HEAD(&ailp->xa_buf_list);
init_waitqueue_head(&ailp->xa_empty);
ailp->ail_mount = mp;
INIT_LIST_HEAD(&ailp->ail_head);
INIT_LIST_HEAD(&ailp->ail_cursors);
spin_lock_init(&ailp->ail_lock);
INIT_LIST_HEAD(&ailp->ail_buf_list);
init_waitqueue_head(&ailp->ail_empty);
ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
ailp->xa_mount->m_fsname);
if (IS_ERR(ailp->xa_task))
ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
ailp->ail_mount->m_fsname);
if (IS_ERR(ailp->ail_task))
goto out_free_ailp;
mp->m_ail = ailp;
@ -821,6 +821,6 @@ xfs_trans_ail_destroy(
{
struct xfs_ail *ailp = mp->m_ail;
kthread_stop(ailp->xa_task);
kthread_stop(ailp->ail_task);
kmem_free(ailp);
}

View File

@ -431,8 +431,8 @@ xfs_trans_brelse(
* If the fs has shutdown and we dropped the last reference, it may fall
* on us to release a (possibly dirty) bli if it never made it to the
* AIL (e.g., the aborted unpin already happened and didn't release it
* due to our reference). Since we're already shutdown and need xa_lock,
* just force remove from the AIL and release the bli here.
* due to our reference). Since we're already shutdown and need
* ail_lock, just force remove from the AIL and release the bli here.
*/
if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);

View File

@ -98,9 +98,23 @@ xfs_trans_log_inode(
xfs_inode_t *ip,
uint flags)
{
struct inode *inode = VFS_I(ip);
ASSERT(ip->i_itemp != NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
* Don't bother with i_lock for the I_DIRTY_TIME check here, as races
* don't matter - we either will need an extra transaction in 24 hours
* to log the timestamps, or will clear already cleared fields in the
* worst case.
*/
if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
spin_lock(&inode->i_lock);
inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
spin_unlock(&inode->i_lock);
}
/*
* Record the specific change for fdatasync optimisation. This
* allows fdatasync to skip log forces for inodes that are only

View File

@ -65,17 +65,17 @@ struct xfs_ail_cursor {
* Eventually we need to drive the locking in here as well.
*/
struct xfs_ail {
struct xfs_mount *xa_mount;
struct task_struct *xa_task;
struct list_head xa_ail;
xfs_lsn_t xa_target;
xfs_lsn_t xa_target_prev;
struct list_head xa_cursors;
spinlock_t xa_lock;
xfs_lsn_t xa_last_pushed_lsn;
int xa_log_flush;
struct list_head xa_buf_list;
wait_queue_head_t xa_empty;
struct xfs_mount *ail_mount;
struct task_struct *ail_task;
struct list_head ail_head;
xfs_lsn_t ail_target;
xfs_lsn_t ail_target_prev;
struct list_head ail_cursors;
spinlock_t ail_lock;
xfs_lsn_t ail_last_pushed_lsn;
int ail_log_flush;
struct list_head ail_buf_list;
wait_queue_head_t ail_empty;
};
/*
@ -84,7 +84,7 @@ struct xfs_ail {
void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items, int nr_items,
xfs_lsn_t lsn) __releases(ailp->xa_lock);
xfs_lsn_t lsn) __releases(ailp->ail_lock);
/*
* Return a pointer to the first item in the AIL. If the AIL is empty, then
* return NULL.
@ -93,7 +93,7 @@ static inline struct xfs_log_item *
xfs_ail_min(
struct xfs_ail *ailp)
{
return list_first_entry_or_null(&ailp->xa_ail, struct xfs_log_item,
return list_first_entry_or_null(&ailp->ail_head, struct xfs_log_item,
li_ail);
}
@ -101,14 +101,14 @@ static inline void
xfs_trans_ail_update(
struct xfs_ail *ailp,
struct xfs_log_item *lip,
xfs_lsn_t lsn) __releases(ailp->xa_lock)
xfs_lsn_t lsn) __releases(ailp->ail_lock)
{
xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
}
bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->xa_lock);
int shutdown_type) __releases(ailp->ail_lock);
static inline void
xfs_trans_ail_remove(
@ -117,12 +117,12 @@ xfs_trans_ail_remove(
{
struct xfs_ail *ailp = lip->li_ailp;
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
/* xfs_trans_ail_delete() drops the AIL lock */
if (lip->li_flags & XFS_LI_IN_AIL)
xfs_trans_ail_delete(ailp, lip, shutdown_type);
else
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
}
void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
@ -149,9 +149,9 @@ xfs_trans_ail_copy_lsn(
xfs_lsn_t *src)
{
ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */
spin_lock(&ailp->xa_lock);
spin_lock(&ailp->ail_lock);
*dst = *src;
spin_unlock(&ailp->xa_lock);
spin_unlock(&ailp->ail_lock);
}
#else
static inline void
@ -172,7 +172,7 @@ xfs_clear_li_failed(
struct xfs_buf *bp = lip->li_buf;
ASSERT(lip->li_flags & XFS_LI_IN_AIL);
lockdep_assert_held(&lip->li_ailp->xa_lock);
lockdep_assert_held(&lip->li_ailp->ail_lock);
if (lip->li_flags & XFS_LI_FAILED) {
lip->li_flags &= ~XFS_LI_FAILED;
@ -186,7 +186,7 @@ xfs_set_li_failed(
struct xfs_log_item *lip,
struct xfs_buf *bp)
{
lockdep_assert_held(&lip->li_ailp->xa_lock);
lockdep_assert_held(&lip->li_ailp->ail_lock);
if (!(lip->li_flags & XFS_LI_FAILED)) {
xfs_buf_hold(bp);