linux-sg2042/fs/xfs/libxfs/xfs_bmap.c

6270 lines
166 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_dir2.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_trans.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rtalloc.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_buf_item.h"
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
#include "xfs_trace.h"
#include "xfs_attr_leaf.h"
#include "xfs_filestream.h"
xfs: add owner field to extent allocation and freeing For the rmap btree to work, we have to feed the extent owner information to the the allocation and freeing functions. This information is what will end up in the rmap btree that tracks allocated extents. While we technically don't need the owner information when freeing extents, passing it allows us to validate that the extent we are removing from the rmap btree actually belonged to the owner we expected it to belong to. We also define a special set of owner values for internal metadata that would otherwise have no owner. This allows us to tell the difference between metadata owned by different per-ag btrees, as well as static fs metadata (e.g. AG headers) and internal journal blocks. There are also a couple of special cases we need to take care of - during EFI recovery, we don't actually know who the original owner was, so we need to pass a wildcard to indicate that we aren't checking the owner for validity. We also need special handling in growfs, as we "free" the space in the last AG when extending it, but because it's new space it has no actual owner... While touching the xfs_bmap_add_free() function, re-order the parameters to put the struct xfs_mount first. Extend the owner field to include both the owner type and some sort of index within the owner. The index field will be used to support reverse mappings when reflink is enabled. When we're freeing extents from an EFI, we don't have the owner information available (rmap updates have their own redo items). xfs_free_extent therefore doesn't need to do an rmap update. Make sure that the log replay code signals this correctly. This is based upon a patch originally from Dave Chinner. It has been extended to add more owner information with the intent of helping recovery operations when things go wrong (e.g. offset of user data block in a file). [dchinner: de-shout the xfs_rmap_*_owner helpers] [darrick: minor style fixes suggested by Christoph Hellwig] Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 09:33:42 +08:00
#include "xfs_rmap.h"
#include "xfs_ag.h"
xfs: set up per-AG free space reservations One unfortunate quirk of the reference count and reverse mapping btrees -- they can expand in size when blocks are written to *other* allocation groups if, say, one large extent becomes a lot of tiny extents. Since we don't want to start throwing errors in the middle of CoWing, we need to reserve some blocks to handle future expansion. The transaction block reservation counters aren't sufficient here because we have to have a reserve of blocks in every AG, not just somewhere in the filesystem. Therefore, create two per-AG block reservation pools. One feeds the AGFL so that rmapbt expansion always succeeds, and the other feeds all other metadata so that refcountbt expansion never fails. Use the count of how many reserved blocks we need to have on hand to create a virtual reservation in the AG. Through selective clamping of the maximum length of allocation requests and of the length of the longest free extent, we can make it look like there's less free space in the AG unless the reservation owner is asking for blocks. In other words, play some accounting tricks in-core to make sure that we always have blocks available. On the plus side, there's nothing to clean up if we crash, which is contrast to the strategy that the rough draft used (actually removing extents from the freespace btrees). Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 08:30:52 +08:00
#include "xfs_ag_resv.h"
#include "xfs_refcount.h"
xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-11-28 11:57:42 +08:00
#include "xfs_icache.h"
#include "xfs_iomap.h"
struct kmem_cache *xfs_bmap_intent_cache;
/*
* Miscellaneous helper functions
*/
/*
* Compute and fill in the value of the maximum depth of a bmap btree
* in this filesystem. Done once, during mount.
*/
void
xfs_bmap_compute_maxlevels(
xfs_mount_t *mp, /* file system mount structure */
int whichfork) /* data or attr fork */
{
int level; /* btree level */
uint maxblocks; /* max blocks at this level */
uint maxleafents; /* max leaf entries possible */
int maxrootrecs; /* max records in root block */
int minleafrecs; /* min records in leaf block */
int minnoderecs; /* min records in node block */
int sz; /* root block size */
/*
* The maximum number of extents in a file, hence the maximum number of
* leaf entries, is controlled by the size of the on-disk extent count,
* either a signed 32-bit number for the data fork, or a signed 16-bit
* number for the attr fork.
*
* Note that we can no longer assume that if we are in ATTR1 that the
* fork offset of all the inodes will be
* (xfs_default_attroffset(ip) >> 3) because we could have mounted with
* ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed
* but probably at various positions. Therefore, for both ATTR1 and
* ATTR2 we have to assume the worst case scenario of a minimum size
* available.
*/
if (whichfork == XFS_DATA_FORK) {
maxleafents = MAXEXTNUM;
sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
} else {
maxleafents = MAXAEXTNUM;
sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
}
maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
minleafrecs = mp->m_bmap_dmnr[0];
minnoderecs = mp->m_bmap_dmnr[1];
maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
for (level = 1; maxblocks > 1; level++) {
if (maxblocks <= maxrootrecs)
maxblocks = 1;
else
maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
}
mp->m_bm_maxlevels[whichfork] = level;
ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk());
}
unsigned int
xfs_bmap_compute_attr_offset(
struct xfs_mount *mp)
{
if (mp->m_sb.sb_inodesize == 256)
return XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
return XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
}
STATIC int /* error */
xfs_bmbt_lookup_eq(
struct xfs_btree_cur *cur,
struct xfs_bmbt_irec *irec,
int *stat) /* success/failure */
{
cur->bc_rec.b = *irec;
return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
}
STATIC int /* error */
xfs_bmbt_lookup_first(
struct xfs_btree_cur *cur,
int *stat) /* success/failure */
{
cur->bc_rec.b.br_startoff = 0;
cur->bc_rec.b.br_startblock = 0;
cur->bc_rec.b.br_blockcount = 0;
return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
}
/*
* Check if the inode needs to be converted to btree format.
*/
static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
return whichfork != XFS_COW_FORK &&
ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork);
}
/*
* Check if the inode should be converted to extent format.
*/
static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
return whichfork != XFS_COW_FORK &&
ifp->if_format == XFS_DINODE_FMT_BTREE &&
ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork);
}
/*
* Update the record referred to by cur to the value given by irec
* This either works (return 0) or gets an EFSCORRUPTED error.
*/
STATIC int
xfs_bmbt_update(
struct xfs_btree_cur *cur,
struct xfs_bmbt_irec *irec)
{
union xfs_btree_rec rec;
xfs_bmbt_disk_set_all(&rec.bmbt, irec);
return xfs_btree_update(cur, &rec);
}
/*
* Compute the worst-case number of indirect blocks that will be used
* for ip's delayed extent of length "len".
*/
STATIC xfs_filblks_t
xfs_bmap_worst_indlen(
xfs_inode_t *ip, /* incore inode pointer */
xfs_filblks_t len) /* delayed extent length */
{
int level; /* btree level number */
int maxrecs; /* maximum record count at this level */
xfs_mount_t *mp; /* mount structure */
xfs_filblks_t rval; /* return value */
mp = ip->i_mount;
maxrecs = mp->m_bmap_dmxr[0];
for (level = 0, rval = 0;
level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
level++) {
len += maxrecs - 1;
do_div(len, maxrecs);
rval += len;
if (len == 1)
return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
level - 1;
if (level == 0)
maxrecs = mp->m_bmap_dmxr[1];
}
return rval;
}
/*
* Calculate the default attribute fork offset for newly created inodes.
*/
uint
xfs_default_attroffset(
struct xfs_inode *ip)
{
if (ip->i_df.if_format == XFS_DINODE_FMT_DEV)
return roundup(sizeof(xfs_dev_t), 8);
return M_IGEO(ip->i_mount)->attr_fork_offset;
}
/*
* Helper routine to reset inode i_forkoff field when switching attribute fork
* from local to extent format - we reset it where possible to make space
* available for inline data fork extents.
xfs: xfs_bmap_add_attrfork_local is too generic When we are converting local data to an extent format as a result of adding an attribute, the type of data contained in the local fork determines the behaviour that needs to occur. xfs_bmap_add_attrfork_local() already handles the directory data case specially by using S_ISDIR() and calling out to xfs_dir2_sf_to_block(), but with verifiers we now need to handle each different type of metadata specially and different metadata formats require different verifiers (and eventually block header initialisation). There is only a single place that we add and attribute fork to the inode, but that is in the attribute code and it knows nothing about the specific contents of the data fork. It is only the case of local data that is the issue here, so adding code to hadnle this case in the attribute specific code is wrong. Hence we are really stuck trying to detect the data fork contents in xfs_bmap_add_attrfork_local() and performing the correct callout there. Luckily the current cases can be determined by S_IS* macros, and we can push the work off to data specific callouts, but each of those callouts does a lot of work in common with xfs_bmap_local_to_extents(). The only reason that this fails for symlinks right now is is that xfs_bmap_local_to_extents() assumes the data fork contains extent data, and so attaches a a bmap extent data verifier to the buffer and simply copies the data fork information straight into it. To fix this, allow us to pass a "formatting" callback into xfs_bmap_local_to_extents() which is responsible for setting the buffer type, initialising it and copying the data fork contents over to the new buffer. This allows callers to specify how they want to format the new buffer (which is necessary for the upcoming CRC enabled metadata blocks) and hence make xfs_bmap_local_to_extents() useful for any type of data fork content. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
2013-02-11 12:58:13 +08:00
*/
STATIC void
xfs_bmap_forkoff_reset(
xfs_inode_t *ip,
int whichfork)
xfs: xfs_bmap_add_attrfork_local is too generic When we are converting local data to an extent format as a result of adding an attribute, the type of data contained in the local fork determines the behaviour that needs to occur. xfs_bmap_add_attrfork_local() already handles the directory data case specially by using S_ISDIR() and calling out to xfs_dir2_sf_to_block(), but with verifiers we now need to handle each different type of metadata specially and different metadata formats require different verifiers (and eventually block header initialisation). There is only a single place that we add and attribute fork to the inode, but that is in the attribute code and it knows nothing about the specific contents of the data fork. It is only the case of local data that is the issue here, so adding code to hadnle this case in the attribute specific code is wrong. Hence we are really stuck trying to detect the data fork contents in xfs_bmap_add_attrfork_local() and performing the correct callout there. Luckily the current cases can be determined by S_IS* macros, and we can push the work off to data specific callouts, but each of those callouts does a lot of work in common with xfs_bmap_local_to_extents(). The only reason that this fails for symlinks right now is is that xfs_bmap_local_to_extents() assumes the data fork contains extent data, and so attaches a a bmap extent data verifier to the buffer and simply copies the data fork information straight into it. To fix this, allow us to pass a "formatting" callback into xfs_bmap_local_to_extents() which is responsible for setting the buffer type, initialising it and copying the data fork contents over to the new buffer. This allows callers to specify how they want to format the new buffer (which is necessary for the upcoming CRC enabled metadata blocks) and hence make xfs_bmap_local_to_extents() useful for any type of data fork content. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
2013-02-11 12:58:13 +08:00
{
if (whichfork == XFS_ATTR_FORK &&
ip->i_df.if_format != XFS_DINODE_FMT_DEV &&
ip->i_df.if_format != XFS_DINODE_FMT_BTREE) {
uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
if (dfl_forkoff > ip->i_forkoff)
ip->i_forkoff = dfl_forkoff;
}
xfs: xfs_bmap_add_attrfork_local is too generic When we are converting local data to an extent format as a result of adding an attribute, the type of data contained in the local fork determines the behaviour that needs to occur. xfs_bmap_add_attrfork_local() already handles the directory data case specially by using S_ISDIR() and calling out to xfs_dir2_sf_to_block(), but with verifiers we now need to handle each different type of metadata specially and different metadata formats require different verifiers (and eventually block header initialisation). There is only a single place that we add and attribute fork to the inode, but that is in the attribute code and it knows nothing about the specific contents of the data fork. It is only the case of local data that is the issue here, so adding code to hadnle this case in the attribute specific code is wrong. Hence we are really stuck trying to detect the data fork contents in xfs_bmap_add_attrfork_local() and performing the correct callout there. Luckily the current cases can be determined by S_IS* macros, and we can push the work off to data specific callouts, but each of those callouts does a lot of work in common with xfs_bmap_local_to_extents(). The only reason that this fails for symlinks right now is is that xfs_bmap_local_to_extents() assumes the data fork contains extent data, and so attaches a a bmap extent data verifier to the buffer and simply copies the data fork information straight into it. To fix this, allow us to pass a "formatting" callback into xfs_bmap_local_to_extents() which is responsible for setting the buffer type, initialising it and copying the data fork contents over to the new buffer. This allows callers to specify how they want to format the new buffer (which is necessary for the upcoming CRC enabled metadata blocks) and hence make xfs_bmap_local_to_extents() useful for any type of data fork content. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
2013-02-11 12:58:13 +08:00
}
#ifdef DEBUG
STATIC struct xfs_buf *
xfs_bmap_get_bp(
struct xfs_btree_cur *cur,
xfs_fsblock_t bno)
{
struct xfs_log_item *lip;
int i;
if (!cur)
return NULL;
for (i = 0; i < cur->bc_maxlevels; i++) {
if (!cur->bc_levels[i].bp)
break;
if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno)
return cur->bc_levels[i].bp;
}
/* Chase down all the log items to see if the bp is there */
list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip;
if (bip->bli_item.li_type == XFS_LI_BUF &&
xfs_buf_daddr(bip->bli_buf) == bno)
return bip->bli_buf;
}
return NULL;
}
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
STATIC void
xfs_check_block(
struct xfs_btree_block *block,
xfs_mount_t *mp,
int root,
short sz)
{
int i, j, dmxr;
__be64 *pp, *thispa; /* pointer to block address */
xfs_bmbt_key_t *prevp, *keyp;
ASSERT(be16_to_cpu(block->bb_level) > 0);
prevp = NULL;
for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
dmxr = mp->m_bmap_dmxr[0];
keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
if (prevp) {
ASSERT(be64_to_cpu(prevp->br_startoff) <
be64_to_cpu(keyp->br_startoff));
}
prevp = keyp;
/*
* Compare the block numbers to see if there are dups.
*/
if (root)
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
else
pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
if (root)
thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
else
thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
if (*thispa == *pp) {
xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
__func__, j, i,
(unsigned long long)be64_to_cpu(*thispa));
xfs_err(mp, "%s: ptrs are equal in node\n",
__func__);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
}
}
}
/*
* Check that the extents for the inode ip are in the right order in all
* btree leaves. THis becomes prohibitively expensive for large extent count
* files, so don't bother with inodes that have more than 10,000 extents in
* them. The btree record ordering checks will still be done, so for such large
* bmapbt constructs that is going to catch most corruptions.
*/
STATIC void
xfs_bmap_check_leaf_extents(
struct xfs_btree_cur *cur, /* btree cursor or null */
xfs_inode_t *ip, /* incore inode pointer */
int whichfork) /* data or attr fork */
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_btree_block *block; /* current btree block */
xfs_fsblock_t bno; /* block # of "block" */
struct xfs_buf *bp; /* buffer for "block" */
int error; /* error return value */
xfs_extnum_t i=0, j; /* index into the extents list */
int level; /* btree level, for checking */
__be64 *pp; /* pointer to block address */
xfs_bmbt_rec_t *ep; /* pointer to current extent */
xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
xfs_bmbt_rec_t *nextp; /* pointer to next extent */
int bp_release = 0;
if (ifp->if_format != XFS_DINODE_FMT_BTREE)
return;
/* skip large extent count inodes */
if (ip->i_df.if_nextents > 10000)
return;
bno = NULLFSBLOCK;
block = ifp->if_broot;
/*
* Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
*/
level = be16_to_cpu(block->bb_level);
ASSERT(level > 0);
xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
ASSERT(bno != NULLFSBLOCK);
ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
/*
* Go down the tree until leaf level is reached, following the first
* pointer (leftmost) at each level.
*/
while (level-- > 0) {
/* See if buf is in cur first */
bp_release = 0;
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
goto error_norelse;
}
block = XFS_BUF_TO_BLOCK(bp);
if (level == 0)
break;
/*
* Check this block for basic sanity (increasing keys and
* no duplicate blocks).
*/
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
xfs_check_block(block, mp, 0, 0);
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
error = -EFSCORRUPTED;
goto error0;
}
if (bp_release) {
bp_release = 0;
xfs_trans_brelse(NULL, bp);
}
}
/*
* Here with bp and block set to the leftmost leaf node in the tree.
*/
i = 0;
/*
* Loop over all leaf nodes checking that all extents are in the right order.
*/
for (;;) {
xfs_fsblock_t nextbno;
xfs_extnum_t num_recs;
num_recs = xfs_btree_get_numrecs(block);
/*
* Read-ahead the next leaf block, if any.
*/
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
/*
* Check all the extents to make sure they are OK.
* If we had a previous block, the last entry should
* conform with the first entry in this one.
*/
ep = XFS_BMBT_REC_ADDR(mp, block, 1);
if (i) {
ASSERT(xfs_bmbt_disk_get_startoff(&last) +
xfs_bmbt_disk_get_blockcount(&last) <=
xfs_bmbt_disk_get_startoff(ep));
}
for (j = 1; j < num_recs; j++) {
nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
ASSERT(xfs_bmbt_disk_get_startoff(ep) +
xfs_bmbt_disk_get_blockcount(ep) <=
xfs_bmbt_disk_get_startoff(nextp));
ep = nextp;
}
last = *ep;
i += num_recs;
if (bp_release) {
bp_release = 0;
xfs_trans_brelse(NULL, bp);
}
bno = nextbno;
/*
* If we've reached the end, stop.
*/
if (bno == NULLFSBLOCK)
break;
bp_release = 0;
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
goto error_norelse;
}
block = XFS_BUF_TO_BLOCK(bp);
}
return;
error0:
xfs_warn(mp, "%s: at error0", __func__);
if (bp_release)
xfs_trans_brelse(NULL, bp);
error_norelse:
xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
__func__, i);
xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return;
}
/*
* Validate that the bmbt_irecs being returned from bmapi are valid
* given the caller's original parameters. Specifically check the
* ranges of the returned irecs to ensure that they only extend beyond
* the given parameters if the XFS_BMAPI_ENTIRE flag was set.
*/
STATIC void
xfs_bmap_validate_ret(
xfs_fileoff_t bno,
xfs_filblks_t len,
int flags,
xfs_bmbt_irec_t *mval,
int nmap,
int ret_nmap)
{
int i; /* index to map values */
ASSERT(ret_nmap <= nmap);
for (i = 0; i < ret_nmap; i++) {
ASSERT(mval[i].br_blockcount > 0);
if (!(flags & XFS_BMAPI_ENTIRE)) {
ASSERT(mval[i].br_startoff >= bno);
ASSERT(mval[i].br_blockcount <= len);
ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
bno + len);
} else {
ASSERT(mval[i].br_startoff < bno + len);
ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
bno);
}
ASSERT(i == 0 ||
mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
mval[i].br_startoff);
ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
mval[i].br_startblock != HOLESTARTBLOCK);
ASSERT(mval[i].br_state == XFS_EXT_NORM ||
mval[i].br_state == XFS_EXT_UNWRITTEN);
}
}
#else
#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0)
#endif /* DEBUG */
/*
* Inode fork format manipulation functions
*/
/*
* Convert the inode format to extent format if it currently is in btree format,
* but the extent list is small enough that it fits into the extent format.
*
* Since the extents are already in-core, all we have to do is give up the space
* for the btree root and pitch the leaf block.
*/
STATIC int /* error */
xfs_bmap_btree_to_extents(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode pointer */
struct xfs_btree_cur *cur, /* btree cursor */
int *logflagsp, /* inode logging flags */
int whichfork) /* data or attr fork */
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_mount *mp = ip->i_mount;
struct xfs_btree_block *rblock = ifp->if_broot;
struct xfs_btree_block *cblock;/* child btree block */
xfs_fsblock_t cbno; /* child block number */
struct xfs_buf *cbp; /* child block's buffer */
int error; /* error return value */
__be64 *pp; /* ptr to block address */
xfs: add owner field to extent allocation and freeing For the rmap btree to work, we have to feed the extent owner information to the the allocation and freeing functions. This information is what will end up in the rmap btree that tracks allocated extents. While we technically don't need the owner information when freeing extents, passing it allows us to validate that the extent we are removing from the rmap btree actually belonged to the owner we expected it to belong to. We also define a special set of owner values for internal metadata that would otherwise have no owner. This allows us to tell the difference between metadata owned by different per-ag btrees, as well as static fs metadata (e.g. AG headers) and internal journal blocks. There are also a couple of special cases we need to take care of - during EFI recovery, we don't actually know who the original owner was, so we need to pass a wildcard to indicate that we aren't checking the owner for validity. We also need special handling in growfs, as we "free" the space in the last AG when extending it, but because it's new space it has no actual owner... While touching the xfs_bmap_add_free() function, re-order the parameters to put the struct xfs_mount first. Extend the owner field to include both the owner type and some sort of index within the owner. The index field will be used to support reverse mappings when reflink is enabled. When we're freeing extents from an EFI, we don't have the owner information available (rmap updates have their own redo items). xfs_free_extent therefore doesn't need to do an rmap update. Make sure that the log replay code signals this correctly. This is based upon a patch originally from Dave Chinner. It has been extended to add more owner information with the intent of helping recovery operations when things go wrong (e.g. offset of user data block in a file). [dchinner: de-shout the xfs_rmap_*_owner helpers] [darrick: minor style fixes suggested by Christoph Hellwig] Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 09:33:42 +08:00
struct xfs_owner_info oinfo;
/* check if we actually need the extent format first: */
if (!xfs_bmap_wants_extents(ip, whichfork))
return 0;
ASSERT(cur);
ASSERT(whichfork != XFS_COW_FORK);
ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
ASSERT(be16_to_cpu(rblock->bb_level) == 1);
ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
cbno = be64_to_cpu(*pp);
#ifdef DEBUG
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
return -EFSCORRUPTED;
#endif
error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
cblock = XFS_BUF_TO_BLOCK(cbp);
if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
return error;
xfs: add owner field to extent allocation and freeing For the rmap btree to work, we have to feed the extent owner information to the the allocation and freeing functions. This information is what will end up in the rmap btree that tracks allocated extents. While we technically don't need the owner information when freeing extents, passing it allows us to validate that the extent we are removing from the rmap btree actually belonged to the owner we expected it to belong to. We also define a special set of owner values for internal metadata that would otherwise have no owner. This allows us to tell the difference between metadata owned by different per-ag btrees, as well as static fs metadata (e.g. AG headers) and internal journal blocks. There are also a couple of special cases we need to take care of - during EFI recovery, we don't actually know who the original owner was, so we need to pass a wildcard to indicate that we aren't checking the owner for validity. We also need special handling in growfs, as we "free" the space in the last AG when extending it, but because it's new space it has no actual owner... While touching the xfs_bmap_add_free() function, re-order the parameters to put the struct xfs_mount first. Extend the owner field to include both the owner type and some sort of index within the owner. The index field will be used to support reverse mappings when reflink is enabled. When we're freeing extents from an EFI, we don't have the owner information available (rmap updates have their own redo items). xfs_free_extent therefore doesn't need to do an rmap update. Make sure that the log replay code signals this correctly. This is based upon a patch originally from Dave Chinner. It has been extended to add more owner information with the intent of helping recovery operations when things go wrong (e.g. offset of user data block in a file). [dchinner: de-shout the xfs_rmap_*_owner helpers] [darrick: minor style fixes suggested by Christoph Hellwig] Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 09:33:42 +08:00
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
ip->i_nblocks--;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
if (cur->bc_levels[0].bp == cbp)
cur->bc_levels[0].bp = NULL;
xfs_iroot_realloc(ip, -1, whichfork);
ASSERT(ifp->if_broot == NULL);
ifp->if_format = XFS_DINODE_FMT_EXTENTS;
*logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
return 0;
}
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
/*
* Convert an extents-format file into a btree-format file.
* The new file will have a root block (in the inode) and a single child block.
*/
STATIC int /* error */
xfs_bmap_extents_to_btree(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode pointer */
struct xfs_btree_cur **curp, /* cursor returned to caller */
int wasdel, /* converting a delayed alloc */
int *logflagsp, /* inode logging flags */
int whichfork) /* data or attr fork */
{
struct xfs_btree_block *ablock; /* allocated (child) bt block */
struct xfs_buf *abp; /* buffer for ablock */
struct xfs_alloc_arg args; /* allocation arguments */
struct xfs_bmbt_rec *arp; /* child record pointer */
struct xfs_btree_block *block; /* btree root block */
struct xfs_btree_cur *cur; /* bmap btree cursor */
int error; /* error return value */
struct xfs_ifork *ifp; /* inode fork pointer */
struct xfs_bmbt_key *kp; /* root block key pointer */
struct xfs_mount *mp; /* mount structure */
xfs_bmbt_ptr_t *pp; /* root block address pointer */
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec rec;
xfs_extnum_t cnt = 0;
mp = ip->i_mount;
ASSERT(whichfork != XFS_COW_FORK);
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS);
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
/*
xfs: fix error handling in xfs_bmap_extents_to_btree Commit 01239d77b9dd ("xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree") attempted to fix a null pointer dreference when a fuzzing corruption of some kind was found. This fix was flawed, resulting in assert failures like: XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715 ..... Call Trace: xfs_bmap_extents_to_btree+0x6b9/0x7b0 __xfs_bunmapi+0xae7/0xf00 ? xfs_log_reserve+0x1c8/0x290 xfs_reflink_remap_extent+0x20b/0x620 xfs_reflink_remap_blocks+0x7e/0x290 xfs_reflink_remap_range+0x311/0x530 vfs_dedupe_file_range_one+0xd7/0xe0 vfs_dedupe_file_range+0x15b/0x1a0 do_vfs_ioctl+0x267/0x6c0 The problem is that the error handling code now asserts that the inode fork is not in btree format before the error handling code undoes the modifications that put the fork back in extent format. Fix this by moving the assert back to after the xfs_iroot_realloc() call that returns the fork to extent format, and clean up the jump labels to be meaningful. Also, returning ENOSPC when xfs_btree_get_bufl() fails to instantiate the buffer that was allocated (the actual fix in the commit mentioned above) is incorrect. This is a fatal error - only an invalid block address or a filesystem shutdown can result in failing to get a buffer here. Hence change this to EFSCORRUPTED so that the higher layer knows this was a corruption related failure and should not treat it as an ENOSPC error. This should result in a shutdown (via cancelling a dirty transaction) which is necessary as we do not attempt to clean up the (invalid) block that we have already allocated. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2018-10-01 06:11:07 +08:00
* Make space in the inode incore. This needs to be undone if we fail
* to expand the root.
*/
xfs_iroot_realloc(ip, 1, whichfork);
/*
* Fill in the root.
*/
block = ifp->if_broot;
xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
XFS_BTREE_LONG_PTRS);
/*
* Need a cursor. Can't allocate until bb_level is filled in.
*/
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
/*
* Convert to a btree with two levels, one record in root.
*/
ifp->if_format = XFS_DINODE_FMT_BTREE;
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = mp;
xfs: add owner field to extent allocation and freeing For the rmap btree to work, we have to feed the extent owner information to the the allocation and freeing functions. This information is what will end up in the rmap btree that tracks allocated extents. While we technically don't need the owner information when freeing extents, passing it allows us to validate that the extent we are removing from the rmap btree actually belonged to the owner we expected it to belong to. We also define a special set of owner values for internal metadata that would otherwise have no owner. This allows us to tell the difference between metadata owned by different per-ag btrees, as well as static fs metadata (e.g. AG headers) and internal journal blocks. There are also a couple of special cases we need to take care of - during EFI recovery, we don't actually know who the original owner was, so we need to pass a wildcard to indicate that we aren't checking the owner for validity. We also need special handling in growfs, as we "free" the space in the last AG when extending it, but because it's new space it has no actual owner... While touching the xfs_bmap_add_free() function, re-order the parameters to put the struct xfs_mount first. Extend the owner field to include both the owner type and some sort of index within the owner. The index field will be used to support reverse mappings when reflink is enabled. When we're freeing extents from an EFI, we don't have the owner information available (rmap updates have their own redo items). xfs_free_extent therefore doesn't need to do an rmap update. Make sure that the log replay code signals this correctly. This is based upon a patch originally from Dave Chinner. It has been extended to add more owner information with the intent of helping recovery operations when things go wrong (e.g. offset of user data block in a file). [dchinner: de-shout the xfs_rmap_*_owner helpers] [darrick: minor style fixes suggested by Christoph Hellwig] Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 09:33:42 +08:00
xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
if (tp->t_firstblock == NULLFSBLOCK) {
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
} else if (tp->t_flags & XFS_TRANS_LOWMODE) {
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = tp->t_firstblock;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.fsbno = tp->t_firstblock;
}
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = wasdel;
*logflagsp = 0;
xfs: fix error handling in xfs_bmap_extents_to_btree Commit 01239d77b9dd ("xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree") attempted to fix a null pointer dreference when a fuzzing corruption of some kind was found. This fix was flawed, resulting in assert failures like: XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715 ..... Call Trace: xfs_bmap_extents_to_btree+0x6b9/0x7b0 __xfs_bunmapi+0xae7/0xf00 ? xfs_log_reserve+0x1c8/0x290 xfs_reflink_remap_extent+0x20b/0x620 xfs_reflink_remap_blocks+0x7e/0x290 xfs_reflink_remap_range+0x311/0x530 vfs_dedupe_file_range_one+0xd7/0xe0 vfs_dedupe_file_range+0x15b/0x1a0 do_vfs_ioctl+0x267/0x6c0 The problem is that the error handling code now asserts that the inode fork is not in btree format before the error handling code undoes the modifications that put the fork back in extent format. Fix this by moving the assert back to after the xfs_iroot_realloc() call that returns the fork to extent format, and clean up the jump labels to be meaningful. Also, returning ENOSPC when xfs_btree_get_bufl() fails to instantiate the buffer that was allocated (the actual fix in the commit mentioned above) is incorrect. This is a fatal error - only an invalid block address or a filesystem shutdown can result in failing to get a buffer here. Hence change this to EFSCORRUPTED so that the higher layer knows this was a corruption related failure and should not treat it as an ENOSPC error. This should result in a shutdown (via cancelling a dirty transaction) which is necessary as we do not attempt to clean up the (invalid) block that we have already allocated. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2018-10-01 06:11:07 +08:00
error = xfs_alloc_vextent(&args);
if (error)
goto out_root_realloc;
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
error = -ENOSPC;
xfs: fix error handling in xfs_bmap_extents_to_btree Commit 01239d77b9dd ("xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree") attempted to fix a null pointer dreference when a fuzzing corruption of some kind was found. This fix was flawed, resulting in assert failures like: XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715 ..... Call Trace: xfs_bmap_extents_to_btree+0x6b9/0x7b0 __xfs_bunmapi+0xae7/0xf00 ? xfs_log_reserve+0x1c8/0x290 xfs_reflink_remap_extent+0x20b/0x620 xfs_reflink_remap_blocks+0x7e/0x290 xfs_reflink_remap_range+0x311/0x530 vfs_dedupe_file_range_one+0xd7/0xe0 vfs_dedupe_file_range+0x15b/0x1a0 do_vfs_ioctl+0x267/0x6c0 The problem is that the error handling code now asserts that the inode fork is not in btree format before the error handling code undoes the modifications that put the fork back in extent format. Fix this by moving the assert back to after the xfs_iroot_realloc() call that returns the fork to extent format, and clean up the jump labels to be meaningful. Also, returning ENOSPC when xfs_btree_get_bufl() fails to instantiate the buffer that was allocated (the actual fix in the commit mentioned above) is incorrect. This is a fatal error - only an invalid block address or a filesystem shutdown can result in failing to get a buffer here. Hence change this to EFSCORRUPTED so that the higher layer knows this was a corruption related failure and should not treat it as an ENOSPC error. This should result in a shutdown (via cancelling a dirty transaction) which is necessary as we do not attempt to clean up the (invalid) block that we have already allocated. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2018-10-01 06:11:07 +08:00
goto out_root_realloc;
}
xfs: fix error handling in xfs_bmap_extents_to_btree Commit 01239d77b9dd ("xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree") attempted to fix a null pointer dreference when a fuzzing corruption of some kind was found. This fix was flawed, resulting in assert failures like: XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715 ..... Call Trace: xfs_bmap_extents_to_btree+0x6b9/0x7b0 __xfs_bunmapi+0xae7/0xf00 ? xfs_log_reserve+0x1c8/0x290 xfs_reflink_remap_extent+0x20b/0x620 xfs_reflink_remap_blocks+0x7e/0x290 xfs_reflink_remap_range+0x311/0x530 vfs_dedupe_file_range_one+0xd7/0xe0 vfs_dedupe_file_range+0x15b/0x1a0 do_vfs_ioctl+0x267/0x6c0 The problem is that the error handling code now asserts that the inode fork is not in btree format before the error handling code undoes the modifications that put the fork back in extent format. Fix this by moving the assert back to after the xfs_iroot_realloc() call that returns the fork to extent format, and clean up the jump labels to be meaningful. Also, returning ENOSPC when xfs_btree_get_bufl() fails to instantiate the buffer that was allocated (the actual fix in the commit mentioned above) is incorrect. This is a fatal error - only an invalid block address or a filesystem shutdown can result in failing to get a buffer here. Hence change this to EFSCORRUPTED so that the higher layer knows this was a corruption related failure and should not treat it as an ENOSPC error. This should result in a shutdown (via cancelling a dirty transaction) which is necessary as we do not attempt to clean up the (invalid) block that we have already allocated. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2018-10-01 06:11:07 +08:00
/*
* Allocation can't fail, the space was reserved.
*/
ASSERT(tp->t_firstblock == NULLFSBLOCK ||
args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
tp->t_firstblock = args.fsbno;
cur->bc_ino.allocated++;
ip->i_nblocks++;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, args.fsbno),
mp->m_bsize, 0, &abp);
if (error)
xfs: fix error handling in xfs_bmap_extents_to_btree Commit 01239d77b9dd ("xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree") attempted to fix a null pointer dreference when a fuzzing corruption of some kind was found. This fix was flawed, resulting in assert failures like: XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715 ..... Call Trace: xfs_bmap_extents_to_btree+0x6b9/0x7b0 __xfs_bunmapi+0xae7/0xf00 ? xfs_log_reserve+0x1c8/0x290 xfs_reflink_remap_extent+0x20b/0x620 xfs_reflink_remap_blocks+0x7e/0x290 xfs_reflink_remap_range+0x311/0x530 vfs_dedupe_file_range_one+0xd7/0xe0 vfs_dedupe_file_range+0x15b/0x1a0 do_vfs_ioctl+0x267/0x6c0 The problem is that the error handling code now asserts that the inode fork is not in btree format before the error handling code undoes the modifications that put the fork back in extent format. Fix this by moving the assert back to after the xfs_iroot_realloc() call that returns the fork to extent format, and clean up the jump labels to be meaningful. Also, returning ENOSPC when xfs_btree_get_bufl() fails to instantiate the buffer that was allocated (the actual fix in the commit mentioned above) is incorrect. This is a fatal error - only an invalid block address or a filesystem shutdown can result in failing to get a buffer here. Hence change this to EFSCORRUPTED so that the higher layer knows this was a corruption related failure and should not treat it as an ENOSPC error. This should result in a shutdown (via cancelling a dirty transaction) which is necessary as we do not attempt to clean up the (invalid) block that we have already allocated. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2018-10-01 06:11:07 +08:00
goto out_unreserve_dquot;
/*
* Fill in the child block.
*/
abp->b_ops = &xfs_bmbt_buf_ops;
ablock = XFS_BUF_TO_BLOCK(abp);
xfs_btree_init_block_int(mp, ablock, xfs_buf_daddr(abp),
XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
XFS_BTREE_LONG_PTRS);
for_each_xfs_iext(ifp, &icur, &rec) {
if (isnullstartblock(rec.br_startblock))
continue;
arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
xfs_bmbt_disk_set_all(arp, &rec);
cnt++;
}
ASSERT(cnt == ifp->if_nextents);
xfs_btree_set_numrecs(ablock, cnt);
/*
* Fill in the root key and pointer.
*/
kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
be16_to_cpu(block->bb_level)));
*pp = cpu_to_be64(args.fsbno);
/*
* Do all this logging at the end so that
* the root is at the right level.
*/
xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
ASSERT(*curp == NULL);
*curp = cur;
*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
return 0;
xfs: fix error handling in xfs_bmap_extents_to_btree Commit 01239d77b9dd ("xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree") attempted to fix a null pointer dreference when a fuzzing corruption of some kind was found. This fix was flawed, resulting in assert failures like: XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715 ..... Call Trace: xfs_bmap_extents_to_btree+0x6b9/0x7b0 __xfs_bunmapi+0xae7/0xf00 ? xfs_log_reserve+0x1c8/0x290 xfs_reflink_remap_extent+0x20b/0x620 xfs_reflink_remap_blocks+0x7e/0x290 xfs_reflink_remap_range+0x311/0x530 vfs_dedupe_file_range_one+0xd7/0xe0 vfs_dedupe_file_range+0x15b/0x1a0 do_vfs_ioctl+0x267/0x6c0 The problem is that the error handling code now asserts that the inode fork is not in btree format before the error handling code undoes the modifications that put the fork back in extent format. Fix this by moving the assert back to after the xfs_iroot_realloc() call that returns the fork to extent format, and clean up the jump labels to be meaningful. Also, returning ENOSPC when xfs_btree_get_bufl() fails to instantiate the buffer that was allocated (the actual fix in the commit mentioned above) is incorrect. This is a fatal error - only an invalid block address or a filesystem shutdown can result in failing to get a buffer here. Hence change this to EFSCORRUPTED so that the higher layer knows this was a corruption related failure and should not treat it as an ENOSPC error. This should result in a shutdown (via cancelling a dirty transaction) which is necessary as we do not attempt to clean up the (invalid) block that we have already allocated. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2018-10-01 06:11:07 +08:00
out_unreserve_dquot:
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs: fix error handling in xfs_bmap_extents_to_btree Commit 01239d77b9dd ("xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree") attempted to fix a null pointer dreference when a fuzzing corruption of some kind was found. This fix was flawed, resulting in assert failures like: XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715 ..... Call Trace: xfs_bmap_extents_to_btree+0x6b9/0x7b0 __xfs_bunmapi+0xae7/0xf00 ? xfs_log_reserve+0x1c8/0x290 xfs_reflink_remap_extent+0x20b/0x620 xfs_reflink_remap_blocks+0x7e/0x290 xfs_reflink_remap_range+0x311/0x530 vfs_dedupe_file_range_one+0xd7/0xe0 vfs_dedupe_file_range+0x15b/0x1a0 do_vfs_ioctl+0x267/0x6c0 The problem is that the error handling code now asserts that the inode fork is not in btree format before the error handling code undoes the modifications that put the fork back in extent format. Fix this by moving the assert back to after the xfs_iroot_realloc() call that returns the fork to extent format, and clean up the jump labels to be meaningful. Also, returning ENOSPC when xfs_btree_get_bufl() fails to instantiate the buffer that was allocated (the actual fix in the commit mentioned above) is incorrect. This is a fatal error - only an invalid block address or a filesystem shutdown can result in failing to get a buffer here. Hence change this to EFSCORRUPTED so that the higher layer knows this was a corruption related failure and should not treat it as an ENOSPC error. This should result in a shutdown (via cancelling a dirty transaction) which is necessary as we do not attempt to clean up the (invalid) block that we have already allocated. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2018-10-01 06:11:07 +08:00
out_root_realloc:
xfs_iroot_realloc(ip, -1, whichfork);
ifp->if_format = XFS_DINODE_FMT_EXTENTS;
xfs: fix error handling in xfs_bmap_extents_to_btree Commit 01239d77b9dd ("xfs: fix a null pointer dereference in xfs_bmap_extents_to_btree") attempted to fix a null pointer dreference when a fuzzing corruption of some kind was found. This fix was flawed, resulting in assert failures like: XFS: Assertion failed: ifp->if_broot == NULL, file: fs/xfs/libxfs/xfs_bmap.c, line: 715 ..... Call Trace: xfs_bmap_extents_to_btree+0x6b9/0x7b0 __xfs_bunmapi+0xae7/0xf00 ? xfs_log_reserve+0x1c8/0x290 xfs_reflink_remap_extent+0x20b/0x620 xfs_reflink_remap_blocks+0x7e/0x290 xfs_reflink_remap_range+0x311/0x530 vfs_dedupe_file_range_one+0xd7/0xe0 vfs_dedupe_file_range+0x15b/0x1a0 do_vfs_ioctl+0x267/0x6c0 The problem is that the error handling code now asserts that the inode fork is not in btree format before the error handling code undoes the modifications that put the fork back in extent format. Fix this by moving the assert back to after the xfs_iroot_realloc() call that returns the fork to extent format, and clean up the jump labels to be meaningful. Also, returning ENOSPC when xfs_btree_get_bufl() fails to instantiate the buffer that was allocated (the actual fix in the commit mentioned above) is incorrect. This is a fatal error - only an invalid block address or a filesystem shutdown can result in failing to get a buffer here. Hence change this to EFSCORRUPTED so that the higher layer knows this was a corruption related failure and should not treat it as an ENOSPC error. This should result in a shutdown (via cancelling a dirty transaction) which is necessary as we do not attempt to clean up the (invalid) block that we have already allocated. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2018-10-01 06:11:07 +08:00
ASSERT(ifp->if_broot == NULL);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
/*
* Convert a local file to an extents file.
* This code is out of bounds for data forks of regular files,
* since the file data needs to get logged so things will stay consistent.
* (The bmap-level manipulations are ok, though).
*/
void
xfs_bmap_local_to_extents_empty(
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(whichfork != XFS_COW_FORK);
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
ASSERT(ifp->if_bytes == 0);
ASSERT(ifp->if_nextents == 0);
xfs_bmap_forkoff_reset(ip, whichfork);
xfs: use a b+tree for the in-core extent list Replace the current linear list and the indirection array for the in-core extent list with a b+tree to avoid the need for larger memory allocations for the indirection array when lots of extents are present. The current extent list implementations leads to heavy pressure on the memory allocator when modifying files with a high extent count, and can lead to high latencies because of that. The replacement is a b+tree with a few quirks. The leaf nodes directly store the extent record in two u64 values. The encoding is a little bit different from the existing in-core extent records so that the start offset and length which are required for lookups can be retreived with simple mask operations. The inner nodes store a 64-bit key containing the start offset in the first half of the node, and the pointers to the next lower level in the second half. In either case we walk the node from the beginninig to the end and do a linear search, as that is more efficient for the low number of cache lines touched during a search (2 for the inner nodes, 4 for the leaf nodes) than a binary search. We store termination markers (zero length for the leaf nodes, an otherwise impossible high bit for the inner nodes) to terminate the key list / records instead of storing a count to use the available cache lines as efficiently as possible. One quirk of the algorithm is that while we normally split a node half and half like usual btree implementations we just spill over entries added at the very end of the list to a new node on its own. This means we get a 100% fill grade for the common cases of bulk insertion when reading an inode into memory, and when only sequentially appending to a file. The downside is a slightly higher chance of splits on the first random insertions. Both insert and removal manually recurse into the lower levels, but the bulk deletion of the whole tree is still implemented as a recursive function call, although one limited by the overall depth and with very little stack usage in every iteration. For the first few extents we dynamically grow the list from a single extent to the next powers of two until we have a first full leaf block and that building the actual tree. The code started out based on the generic lib/btree.c code from Joern Engel based on earlier work from Peter Zijlstra, but has since been rewritten beyond recognition. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-11-04 01:34:46 +08:00
ifp->if_u1.if_root = NULL;
ifp->if_height = 0;
ifp->if_format = XFS_DINODE_FMT_EXTENTS;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
STATIC int /* error */
xfs_bmap_local_to_extents(
xfs_trans_t *tp, /* transaction pointer */
xfs_inode_t *ip, /* incore inode pointer */
xfs_extlen_t total, /* total blocks needed by transaction */
int *logflagsp, /* inode logging flags */
int whichfork,
void (*init_fn)(struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
struct xfs_ifork *ifp))
{
int error = 0;
int flags; /* logging flags returned */
struct xfs_ifork *ifp; /* inode fork pointer */
xfs_alloc_arg_t args; /* allocation arguments */
struct xfs_buf *bp; /* buffer for extent block */
struct xfs_bmbt_irec rec;
struct xfs_iext_cursor icur;
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
/*
* We don't want to deal with the case of keeping inode data inline yet.
* So sending the data fork of a regular inode is invalid.
*/
ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
if (!ifp->if_bytes) {
xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
flags = XFS_ILOG_CORE;
goto done;
}
flags = 0;
error = 0;
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = ip->i_mount;
xfs: add owner field to extent allocation and freeing For the rmap btree to work, we have to feed the extent owner information to the the allocation and freeing functions. This information is what will end up in the rmap btree that tracks allocated extents. While we technically don't need the owner information when freeing extents, passing it allows us to validate that the extent we are removing from the rmap btree actually belonged to the owner we expected it to belong to. We also define a special set of owner values for internal metadata that would otherwise have no owner. This allows us to tell the difference between metadata owned by different per-ag btrees, as well as static fs metadata (e.g. AG headers) and internal journal blocks. There are also a couple of special cases we need to take care of - during EFI recovery, we don't actually know who the original owner was, so we need to pass a wildcard to indicate that we aren't checking the owner for validity. We also need special handling in growfs, as we "free" the space in the last AG when extending it, but because it's new space it has no actual owner... While touching the xfs_bmap_add_free() function, re-order the parameters to put the struct xfs_mount first. Extend the owner field to include both the owner type and some sort of index within the owner. The index field will be used to support reverse mappings when reflink is enabled. When we're freeing extents from an EFI, we don't have the owner information available (rmap updates have their own redo items). xfs_free_extent therefore doesn't need to do an rmap update. Make sure that the log replay code signals this correctly. This is based upon a patch originally from Dave Chinner. It has been extended to add more owner information with the intent of helping recovery operations when things go wrong (e.g. offset of user data block in a file). [dchinner: de-shout the xfs_rmap_*_owner helpers] [darrick: minor style fixes suggested by Christoph Hellwig] Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 09:33:42 +08:00
xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
/*
* Allocate a block. We know we need only one, since the
* file currently fits in an inode.
*/
if (tp->t_firstblock == NULLFSBLOCK) {
args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
args.type = XFS_ALLOCTYPE_START_BNO;
} else {
args.fsbno = tp->t_firstblock;
args.type = XFS_ALLOCTYPE_NEAR_BNO;
}
args.total = total;
args.minlen = args.maxlen = args.prod = 1;
error = xfs_alloc_vextent(&args);
if (error)
goto done;
/* Can't fail, the space was reserved. */
ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(args.len == 1);
tp->t_firstblock = args.fsbno;
error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
XFS_FSB_TO_DADDR(args.mp, args.fsbno),
args.mp->m_bsize, 0, &bp);
if (error)
goto done;
/*
* Initialize the block, copy the data and log the remote buffer.
*
* The callout is responsible for logging because the remote format
* might differ from the local format and thus we don't know how much to
* log here. Note that init_fn must also set the buffer log item type
* correctly.
*/
init_fn(tp, bp, ip, ifp);
/* account for the change in fork size */
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
flags |= XFS_ILOG_CORE;
xfs: use a b+tree for the in-core extent list Replace the current linear list and the indirection array for the in-core extent list with a b+tree to avoid the need for larger memory allocations for the indirection array when lots of extents are present. The current extent list implementations leads to heavy pressure on the memory allocator when modifying files with a high extent count, and can lead to high latencies because of that. The replacement is a b+tree with a few quirks. The leaf nodes directly store the extent record in two u64 values. The encoding is a little bit different from the existing in-core extent records so that the start offset and length which are required for lookups can be retreived with simple mask operations. The inner nodes store a 64-bit key containing the start offset in the first half of the node, and the pointers to the next lower level in the second half. In either case we walk the node from the beginninig to the end and do a linear search, as that is more efficient for the low number of cache lines touched during a search (2 for the inner nodes, 4 for the leaf nodes) than a binary search. We store termination markers (zero length for the leaf nodes, an otherwise impossible high bit for the inner nodes) to terminate the key list / records instead of storing a count to use the available cache lines as efficiently as possible. One quirk of the algorithm is that while we normally split a node half and half like usual btree implementations we just spill over entries added at the very end of the list to a new node on its own. This means we get a 100% fill grade for the common cases of bulk insertion when reading an inode into memory, and when only sequentially appending to a file. The downside is a slightly higher chance of splits on the first random insertions. Both insert and removal manually recurse into the lower levels, but the bulk deletion of the whole tree is still implemented as a recursive function call, although one limited by the overall depth and with very little stack usage in every iteration. For the first few extents we dynamically grow the list from a single extent to the next powers of two until we have a first full leaf block and that building the actual tree. The code started out based on the generic lib/btree.c code from Joern Engel based on earlier work from Peter Zijlstra, but has since been rewritten beyond recognition. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-11-04 01:34:46 +08:00
ifp->if_u1.if_root = NULL;
ifp->if_height = 0;
rec.br_startoff = 0;
rec.br_startblock = args.fsbno;
rec.br_blockcount = 1;
rec.br_state = XFS_EXT_NORM;
xfs_iext_first(ifp, &icur);
xfs_iext_insert(ip, &icur, &rec, 0);
ifp->if_nextents = 1;
ip->i_nblocks = 1;
xfs_trans_mod_dquot_byino(tp, ip,
XFS_TRANS_DQ_BCOUNT, 1L);
flags |= xfs_ilog_fext(whichfork);
done:
*logflagsp = flags;
return error;
}
/*
* Called from xfs_bmap_add_attrfork to handle btree format files.
*/
STATIC int /* error */
xfs_bmap_add_attrfork_btree(
xfs_trans_t *tp, /* transaction pointer */
xfs_inode_t *ip, /* incore inode pointer */
int *flags) /* inode logging flags */
{
struct xfs_btree_block *block = ip->i_df.if_broot;
struct xfs_btree_cur *cur; /* btree cursor */
int error; /* error return value */
xfs_mount_t *mp; /* file system mount struct */
int stat; /* newroot status */
mp = ip->i_mount;
if (XFS_BMAP_BMDR_SPACE(block) <= XFS_IFORK_DSIZE(ip))
*flags |= XFS_ILOG_DBROOT;
else {
cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
error = xfs_bmbt_lookup_first(cur, &stat);
if (error)
goto error0;
/* must be at least one entry */
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, stat != 1)) {
error = -EFSCORRUPTED;
goto error0;
}
if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
goto error0;
if (stat == 0) {
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
return -ENOSPC;
}
cur->bc_ino.allocated = 0;
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
}
return 0;
error0:
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
/*
* Called from xfs_bmap_add_attrfork to handle extents format files.
*/
STATIC int /* error */
xfs_bmap_add_attrfork_extents(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode pointer */
int *flags) /* inode logging flags */
{
struct xfs_btree_cur *cur; /* bmap btree cursor */
int error; /* error return value */
if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <=
XFS_IFORK_DSIZE(ip))
return 0;
cur = NULL;
error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
XFS_DATA_FORK);
if (cur) {
cur->bc_ino.allocated = 0;
xfs_btree_del_cursor(cur, error);
}
return error;
}
/*
* Called from xfs_bmap_add_attrfork to handle local format files. Each
* different data fork content type needs a different callout to do the
* conversion. Some are basic and only require special block initialisation
* callouts for the data formating, others (directories) are so specialised they
* handle everything themselves.
*
* XXX (dgc): investigate whether directory conversion can use the generic
* formatting callout. It should be possible - it's just a very complex
* formatter.
*/
STATIC int /* error */
xfs_bmap_add_attrfork_local(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode pointer */
int *flags) /* inode logging flags */
{
struct xfs_da_args dargs; /* args for dir/attr code */
if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
return 0;
if (S_ISDIR(VFS_I(ip)->i_mode)) {
memset(&dargs, 0, sizeof(dargs));
dargs.geo = ip->i_mount->m_dir_geo;
dargs.dp = ip;
dargs.total = dargs.geo->fsbcount;
dargs.whichfork = XFS_DATA_FORK;
dargs.trans = tp;
return xfs_dir2_sf_to_block(&dargs);
}
if (S_ISLNK(VFS_I(ip)->i_mode))
return xfs_bmap_local_to_extents(tp, ip, 1, flags,
XFS_DATA_FORK,
xfs_symlink_local_to_remote);
/* should only be called for types that support local format data */
ASSERT(0);
return -EFSCORRUPTED;
}
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
xfs: initialise attr fork on inode create When we allocate a new inode, we often need to add an attribute to the inode as part of the create. This can happen as a result of needing to add default ACLs or security labels before the inode is made visible to userspace. This is highly inefficient right now. We do the create transaction to allocate the inode, then we do an "add attr fork" transaction to modify the just created empty inode to set the inode fork offset to allow attributes to be stored, then we go and do the attribute creation. This means 3 transactions instead of 1 to allocate an inode, and this greatly increases the load on the CIL commit code, resulting in excessive contention on the CIL spin locks and performance degradation: 18.99% [kernel] [k] __pv_queued_spin_lock_slowpath 3.57% [kernel] [k] do_raw_spin_lock 2.51% [kernel] [k] __raw_callee_save___pv_queued_spin_unlock 2.48% [kernel] [k] memcpy 2.34% [kernel] [k] xfs_log_commit_cil The typical profile resulting from running fsmark on a selinux enabled filesytem is adds this overhead to the create path: - 15.30% xfs_init_security - 15.23% security_inode_init_security - 13.05% xfs_initxattrs - 12.94% xfs_attr_set - 6.75% xfs_bmap_add_attrfork - 5.51% xfs_trans_commit - 5.48% __xfs_trans_commit - 5.35% xfs_log_commit_cil - 3.86% _raw_spin_lock - do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.70% xfs_trans_alloc 0.52% xfs_trans_reserve - 5.41% xfs_attr_set_args - 5.39% xfs_attr_set_shortform.constprop.0 - 4.46% xfs_trans_commit - 4.46% __xfs_trans_commit - 4.33% xfs_log_commit_cil - 2.74% _raw_spin_lock - do_raw_spin_lock __pv_queued_spin_lock_slowpath 0.60% xfs_inode_item_format 0.90% xfs_attr_try_sf_addname - 1.99% selinux_inode_init_security - 1.02% security_sid_to_context_force - 1.00% security_sid_to_context_core - 0.92% sidtab_entry_to_string - 0.90% sidtab_sid2str_get 0.59% sidtab_sid2str_put.part.0 - 0.82% selinux_determine_inode_label - 0.77% security_transition_sid 0.70% security_compute_sid.part.0 And fsmark creation rate performance drops by ~25%. The key point to note here is that half the additional overhead comes from adding the attribute fork to the newly created inode. That's crazy, considering we can do this same thing at inode create time with a couple of lines of code and no extra overhead. So, if we know we are going to add an attribute immediately after creating the inode, let's just initialise the attribute fork inside the create transaction and chop that whole chunk of code out of the create fast path. This completely removes the performance drop caused by enabling SELinux, and the profile looks like: - 8.99% xfs_init_security - 9.00% security_inode_init_security - 6.43% xfs_initxattrs - 6.37% xfs_attr_set - 5.45% xfs_attr_set_args - 5.42% xfs_attr_set_shortform.constprop.0 - 4.51% xfs_trans_commit - 4.54% __xfs_trans_commit - 4.59% xfs_log_commit_cil - 2.67% _raw_spin_lock - 3.28% do_raw_spin_lock 3.08% __pv_queued_spin_lock_slowpath 0.66% xfs_inode_item_format - 0.90% xfs_attr_try_sf_addname - 0.60% xfs_trans_alloc - 2.35% selinux_inode_init_security - 1.25% security_sid_to_context_force - 1.21% security_sid_to_context_core - 1.19% sidtab_entry_to_string - 1.20% sidtab_sid2str_get - 0.86% sidtab_sid2str_put.part.0 - 0.62% _raw_spin_lock_irqsave - 0.77% do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.84% selinux_determine_inode_label - 0.83% security_transition_sid 0.86% security_compute_sid.part.0 Which indicates the XFS overhead of creating the selinux xattr has been halved. This doesn't fix the CIL lock contention problem, just means it's not a limiting factor for this workload. Lock contention in the security subsystems is going to be an issue soon, though... Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> [djwong: fix compilation error when CONFIG_SECURITY=n] Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
2021-03-23 00:52:03 +08:00
/*
* Set an inode attr fork offset based on the format of the data fork.
*/
static int
xfs_bmap_set_attrforkoff(
struct xfs_inode *ip,
int size,
int *version)
{
int default_size = xfs_default_attroffset(ip) >> 3;
switch (ip->i_df.if_format) {
case XFS_DINODE_FMT_DEV:
ip->i_forkoff = default_size;
break;
case XFS_DINODE_FMT_LOCAL:
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size);
if (!ip->i_forkoff)
ip->i_forkoff = default_size;
else if (xfs_has_attr2(ip->i_mount) && version)
*version = 2;
break;
default:
ASSERT(0);
return -EINVAL;
}
return 0;
}
/*
* Convert inode from non-attributed to attributed.
* Must not be in a transaction, ip must not be locked.
*/
int /* error code */
xfs_bmap_add_attrfork(
xfs_inode_t *ip, /* incore inode pointer */
int size, /* space new attribute needs */
int rsvd) /* xact may use reserved blks */
{
xfs_mount_t *mp; /* mount structure */
xfs_trans_t *tp; /* transaction pointer */
int blks; /* space reservation */
int version = 1; /* superblock attr version */
int logflags; /* logging flags */
int error; /* error return value */
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
ASSERT(XFS_IFORK_Q(ip) == 0);
mp = ip->i_mount;
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
blks = XFS_ADDAFORK_SPACE_RES(mp);
error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
rsvd, &tp);
if (error)
return error;
if (XFS_IFORK_Q(ip))
goto trans_cancel;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_bmap_set_attrforkoff(ip, size, &version);
if (error)
goto trans_cancel;
ASSERT(ip->i_afp == NULL);
xfs: initialise attr fork on inode create When we allocate a new inode, we often need to add an attribute to the inode as part of the create. This can happen as a result of needing to add default ACLs or security labels before the inode is made visible to userspace. This is highly inefficient right now. We do the create transaction to allocate the inode, then we do an "add attr fork" transaction to modify the just created empty inode to set the inode fork offset to allow attributes to be stored, then we go and do the attribute creation. This means 3 transactions instead of 1 to allocate an inode, and this greatly increases the load on the CIL commit code, resulting in excessive contention on the CIL spin locks and performance degradation: 18.99% [kernel] [k] __pv_queued_spin_lock_slowpath 3.57% [kernel] [k] do_raw_spin_lock 2.51% [kernel] [k] __raw_callee_save___pv_queued_spin_unlock 2.48% [kernel] [k] memcpy 2.34% [kernel] [k] xfs_log_commit_cil The typical profile resulting from running fsmark on a selinux enabled filesytem is adds this overhead to the create path: - 15.30% xfs_init_security - 15.23% security_inode_init_security - 13.05% xfs_initxattrs - 12.94% xfs_attr_set - 6.75% xfs_bmap_add_attrfork - 5.51% xfs_trans_commit - 5.48% __xfs_trans_commit - 5.35% xfs_log_commit_cil - 3.86% _raw_spin_lock - do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.70% xfs_trans_alloc 0.52% xfs_trans_reserve - 5.41% xfs_attr_set_args - 5.39% xfs_attr_set_shortform.constprop.0 - 4.46% xfs_trans_commit - 4.46% __xfs_trans_commit - 4.33% xfs_log_commit_cil - 2.74% _raw_spin_lock - do_raw_spin_lock __pv_queued_spin_lock_slowpath 0.60% xfs_inode_item_format 0.90% xfs_attr_try_sf_addname - 1.99% selinux_inode_init_security - 1.02% security_sid_to_context_force - 1.00% security_sid_to_context_core - 0.92% sidtab_entry_to_string - 0.90% sidtab_sid2str_get 0.59% sidtab_sid2str_put.part.0 - 0.82% selinux_determine_inode_label - 0.77% security_transition_sid 0.70% security_compute_sid.part.0 And fsmark creation rate performance drops by ~25%. The key point to note here is that half the additional overhead comes from adding the attribute fork to the newly created inode. That's crazy, considering we can do this same thing at inode create time with a couple of lines of code and no extra overhead. So, if we know we are going to add an attribute immediately after creating the inode, let's just initialise the attribute fork inside the create transaction and chop that whole chunk of code out of the create fast path. This completely removes the performance drop caused by enabling SELinux, and the profile looks like: - 8.99% xfs_init_security - 9.00% security_inode_init_security - 6.43% xfs_initxattrs - 6.37% xfs_attr_set - 5.45% xfs_attr_set_args - 5.42% xfs_attr_set_shortform.constprop.0 - 4.51% xfs_trans_commit - 4.54% __xfs_trans_commit - 4.59% xfs_log_commit_cil - 2.67% _raw_spin_lock - 3.28% do_raw_spin_lock 3.08% __pv_queued_spin_lock_slowpath 0.66% xfs_inode_item_format - 0.90% xfs_attr_try_sf_addname - 0.60% xfs_trans_alloc - 2.35% selinux_inode_init_security - 1.25% security_sid_to_context_force - 1.21% security_sid_to_context_core - 1.19% sidtab_entry_to_string - 1.20% sidtab_sid2str_get - 0.86% sidtab_sid2str_put.part.0 - 0.62% _raw_spin_lock_irqsave - 0.77% do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.84% selinux_determine_inode_label - 0.83% security_transition_sid 0.86% security_compute_sid.part.0 Which indicates the XFS overhead of creating the selinux xattr has been halved. This doesn't fix the CIL lock contention problem, just means it's not a limiting factor for this workload. Lock contention in the security subsystems is going to be an issue soon, though... Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> [djwong: fix compilation error when CONFIG_SECURITY=n] Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
2021-03-23 00:52:03 +08:00
ip->i_afp = xfs_ifork_alloc(XFS_DINODE_FMT_EXTENTS, 0);
logflags = 0;
switch (ip->i_df.if_format) {
case XFS_DINODE_FMT_LOCAL:
error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
break;
case XFS_DINODE_FMT_EXTENTS:
error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
break;
case XFS_DINODE_FMT_BTREE:
error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
break;
default:
error = 0;
break;
}
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
if (error)
goto trans_cancel;
if (!xfs_has_attr(mp) ||
(!xfs_has_attr2(mp) && version == 2)) {
bool log_sb = false;
spin_lock(&mp->m_sb_lock);
if (!xfs_has_attr(mp)) {
xfs_add_attr(mp);
log_sb = true;
}
if (!xfs_has_attr2(mp) && version == 2) {
xfs_add_attr2(mp);
log_sb = true;
}
xfs: remove bitfield based superblock updates When we log changes to the superblock, we first have to write them to the on-disk buffer, and then log that. Right now we have a complex bitfield based arrangement to only write the modified field to the buffer before we log it. This used to be necessary as a performance optimisation because we logged the superblock buffer in every extent or inode allocation or freeing, and so performance was extremely important. We haven't done this for years, however, ever since the lazy superblock counters pulled the superblock logging out of the transaction commit fast path. Hence we have a bunch of complexity that is not necessary that makes writing the in-core superblock to disk much more complex than it needs to be. We only need to log the superblock now during management operations (e.g. during mount, unmount or quota control operations) so it is not a performance critical path anymore. As such, remove the complex field based logging mechanism and replace it with a simple conversion function similar to what we use for all other on-disk structures. This means we always log the entirity of the superblock, but again because we rarely modify the superblock this is not an issue for log bandwidth or CPU time. Indeed, if we do log the superblock frequently, delayed logging will minimise the impact of this overhead. [Fixed gquota/pquota inode sharing regression noticed by bfoster.] Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-01-22 06:10:26 +08:00
spin_unlock(&mp->m_sb_lock);
if (log_sb)
xfs_log_sb(tp);
}
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
trans_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
/*
* Internal and external extent tree search functions.
*/
struct xfs_iread_state {
struct xfs_iext_cursor icur;
xfs_extnum_t loaded;
};
/* Stuff every bmbt record from this block into the incore extent map. */
static int
xfs_iread_bmbt_block(
struct xfs_btree_cur *cur,
int level,
void *priv)
{
struct xfs_iread_state *ir = priv;
struct xfs_mount *mp = cur->bc_mp;
struct xfs_inode *ip = cur->bc_ino.ip;
struct xfs_btree_block *block;
struct xfs_buf *bp;
struct xfs_bmbt_rec *frp;
xfs_extnum_t num_recs;
xfs_extnum_t j;
int whichfork = cur->bc_ino.whichfork;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
block = xfs_btree_get_block(cur, level, &bp);
/* Abort if we find more records than nextents. */
num_recs = xfs_btree_get_numrecs(block);
if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) {
xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
(unsigned long long)ip->i_ino);
xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
sizeof(*block), __this_address);
return -EFSCORRUPTED;
}
/* Copy records into the incore cache. */
frp = XFS_BMBT_REC_ADDR(mp, block, 1);
for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
struct xfs_bmbt_irec new;
xfs_failaddr_t fa;
xfs_bmbt_disk_get_all(frp, &new);
fa = xfs_bmap_validate_extent(ip, whichfork, &new);
if (fa) {
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iread_extents(2)", frp,
sizeof(*frp), fa);
return -EFSCORRUPTED;
}
xfs_iext_insert(ip, &ir->icur, &new,
xfs_bmap_fork_to_state(whichfork));
trace_xfs_read_extent(ip, &ir->icur,
xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
xfs_iext_next(ifp, &ir->icur);
}
return 0;
}
/*
* Read in extents from a btree-format inode.
*/
int
xfs_iread_extents(
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork)
{
struct xfs_iread_state ir;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_mount *mp = ip->i_mount;
struct xfs_btree_cur *cur;
int error;
if (!xfs_need_iread_extents(ifp))
return 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ir.loaded = 0;
xfs_iext_first(ifp, &ir.icur);
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
XFS_BTREE_VISIT_RECORDS, &ir);
xfs_btree_del_cursor(cur, error);
if (error)
goto out;
if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
error = -EFSCORRUPTED;
goto out;
}
ASSERT(ir.loaded == xfs_iext_count(ifp));
return 0;
out:
xfs_iext_destroy(ifp);
return error;
}
/*
* Returns the relative block number of the first unused block(s) in the given
* fork with at least "len" logically contiguous blocks free. This is the
* lowest-address hole if the fork has holes, else the first block past the end
* of fork. Return 0 if the fork is currently local (in-inode).
*/
int /* error */
xfs_bmap_first_unused(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_extlen_t len, /* size of hole to find */
xfs_fileoff_t *first_unused, /* unused block */
int whichfork) /* data or attr fork */
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec got;
struct xfs_iext_cursor icur;
xfs_fileoff_t lastaddr = 0;
xfs_fileoff_t lowest, max;
int error;
if (ifp->if_format == XFS_DINODE_FMT_LOCAL) {
*first_unused = 0;
return 0;
}
ASSERT(xfs_ifork_has_extents(ifp));
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
lowest = max = *first_unused;
for_each_xfs_iext(ifp, &icur, &got) {
/*
* See if the hole before this extent will work.
*/
if (got.br_startoff >= lowest + len &&
got.br_startoff - max >= len)
break;
lastaddr = got.br_startoff + got.br_blockcount;
max = XFS_FILEOFF_MAX(lastaddr, lowest);
}
*first_unused = max;
return 0;
}
/*
* Returns the file-relative block number of the last block - 1 before
* last_block (input value) in the file.
* This is not based on i_size, it is based on the extent records.
* Returns 0 for local files, as they do not have extent records.
*/
int /* error */
xfs_bmap_last_before(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t *last_block, /* last block */
int whichfork) /* data or attr fork */
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec got;
struct xfs_iext_cursor icur;
int error;
switch (ifp->if_format) {
case XFS_DINODE_FMT_LOCAL:
*last_block = 0;
return 0;
case XFS_DINODE_FMT_BTREE:
case XFS_DINODE_FMT_EXTENTS:
break;
default:
ASSERT(0);
return -EFSCORRUPTED;
}
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
*last_block = 0;
return 0;
}
int
xfs_bmap_last_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *rec,
int *is_empty)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_iext_cursor icur;
int error;
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
xfs_iext_last(ifp, &icur);
if (!xfs_iext_get_extent(ifp, &icur, rec))
*is_empty = 1;
else
*is_empty = 0;
return 0;
}
/*
* Check the last inode extent to determine whether this allocation will result
* in blocks being allocated at the end of the file. When we allocate new data
* blocks at the end of the file which do not start at the previous data block,
* we will try to align the new blocks at stripe unit boundaries.
*
* Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
* at, or past the EOF.
*/
STATIC int
xfs_bmap_isaeof(
struct xfs_bmalloca *bma,
int whichfork)
{
struct xfs_bmbt_irec rec;
int is_empty;
int error;
bma->aeof = false;
error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
&is_empty);
if (error)
return error;
if (is_empty) {
bma->aeof = true;
return 0;
}
/*
* Check if we are allocation or past the last extent, or at least into
* the last delayed allocated extent.
*/
bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
(bma->offset >= rec.br_startoff &&
isnullstartblock(rec.br_startblock));
return 0;
}
/*
* Returns the file-relative block number of the first block past eof in
* the file. This is not based on i_size, it is based on the extent records.
* Returns 0 for local files, as they do not have extent records.
*/
int
xfs_bmap_last_offset(
struct xfs_inode *ip,
xfs_fileoff_t *last_block,
int whichfork)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec rec;
int is_empty;
int error;
*last_block = 0;
if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
return 0;
if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
return -EFSCORRUPTED;
error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
if (error || is_empty)
return error;
*last_block = rec.br_startoff + rec.br_blockcount;
return 0;
}
/*
* Extent tree manipulation functions used during allocation.
*/
/*
* Convert a delayed allocation to a real allocation.
*/
STATIC int /* error */
xfs_bmap_add_extent_delay_real(
struct xfs_bmalloca *bma,
int whichfork)
{
struct xfs_mount *mp = bma->ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
struct xfs_bmbt_irec *new = &bma->got;
int error; /* error return value */
int i; /* temp state */
xfs_fileoff_t new_endoff; /* end offset of new entry */
xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
/* left is 0, right is 1, prev is 2 */
int rval=0; /* return value (logging flags) */
int state = xfs_bmap_fork_to_state(whichfork);
xfs_filblks_t da_new; /* new count del alloc blocks used */
xfs_filblks_t da_old; /* old count del alloc blocks used */
xfs_filblks_t temp=0; /* value for da_new calculations */
int tmp_rval; /* partial logging flags */
struct xfs_bmbt_irec old;
ASSERT(whichfork != XFS_ATTR_FORK);
ASSERT(!isnullstartblock(new->br_startblock));
ASSERT(!bma->cur ||
(bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
XFS_STATS_INC(mp, xs_add_exlist);
#define LEFT r[0]
#define RIGHT r[1]
#define PREV r[2]
/*
* Set up a bunch of variables to make the tests simpler.
*/
xfs_iext_get_extent(ifp, &bma->icur, &PREV);
new_endoff = new->br_startoff + new->br_blockcount;
ASSERT(isnullstartblock(PREV.br_startblock));
ASSERT(PREV.br_startoff <= new->br_startoff);
ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
da_old = startblockval(PREV.br_startblock);
da_new = 0;
/*
* Set flags determining what part of the previous delayed allocation
* extent is being replaced by a real allocation.
*/
if (PREV.br_startoff == new->br_startoff)
state |= BMAP_LEFT_FILLING;
if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
state |= BMAP_RIGHT_FILLING;
/*
* Check and set flags if this segment has a left neighbor.
* Don't set contiguous if the combined extent would be too large.
*/
if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
state |= BMAP_LEFT_VALID;
if (isnullstartblock(LEFT.br_startblock))
state |= BMAP_LEFT_DELAY;
}
if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
LEFT.br_state == new->br_state &&
LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
state |= BMAP_LEFT_CONTIG;
/*
* Check and set flags if this segment has a right neighbor.
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
state |= BMAP_RIGHT_VALID;
if (isnullstartblock(RIGHT.br_startblock))
state |= BMAP_RIGHT_DELAY;
}
if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
new_endoff == RIGHT.br_startoff &&
new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
new->br_state == RIGHT.br_state &&
new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING)) !=
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING) ||
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
<= MAXEXTLEN))
state |= BMAP_RIGHT_CONTIG;
error = 0;
/*
* Switch out based on the FILLING and CONTIG state bits.
*/
switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
/*
* Filling in all of a previously delayed allocation extent.
* The left and right neighbors are both contiguous with new.
*/
LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
xfs_iext_remove(bma->ip, &bma->icur, state);
xfs_iext_remove(bma->ip, &bma->icur, state);
xfs_iext_prev(ifp, &bma->icur);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
ifp->if_nextents--;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_delete(bma->cur, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_decrement(bma->cur, 0, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(bma->cur, &LEFT);
if (error)
goto done;
}
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
/*
* Filling in all of a previously delayed allocation extent.
* The left neighbor is contiguous, the right is not.
*/
old = LEFT;
LEFT.br_blockcount += PREV.br_blockcount;
xfs_iext_remove(bma->ip, &bma->icur, state);
xfs_iext_prev(ifp, &bma->icur);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
if (bma->cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(bma->cur, &LEFT);
if (error)
goto done;
}
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
/*
* Filling in all of a previously delayed allocation extent.
xfs: delalloc -> unwritten COW fork allocation can go wrong Long saga. There have been days spent following this through dead end after dead end in multi-GB event traces. This morning, after writing a trace-cmd wrapper that enabled me to be more selective about XFS trace points, I discovered that I could get just enough essential tracepoints enabled that there was a 50:50 chance the fsx config would fail at ~115k ops. If it didn't fail at op 115547, I stopped fsx at op 115548 anyway. That gave me two traces - one where the problem manifested, and one where it didn't. After refining the traces to have the necessary information, I found that in the failing case there was a real extent in the COW fork compared to an unwritten extent in the working case. Walking back through the two traces to the point where the CWO fork extents actually diverged, I found that the bad case had an extra unwritten extent in it. This is likely because the bug it led me to had triggered multiple times in those 115k ops, leaving stray COW extents around. What I saw was a COW delalloc conversion to an unwritten extent (as they should always be through xfs_iomap_write_allocate()) resulted in a /written extent/: xfs_writepage: dev 259:0 ino 0x83 pgoff 0x17000 size 0x79a00 offset 0 length 0 xfs_iext_remove: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/2 offset 32 block 152 count 20 flag 1 caller xfs_bmap_add_extent_delay_real xfs_bmap_pre_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 4503599627239429 count 31 flag 0 caller xfs_bmap_add_extent_delay_real xfs_bmap_post_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 121 count 51 flag 0 caller xfs_bmap_add_ex Basically, Cow fork before: 0 1 32 52 +H+DDDDDDDDDDDD+UUUUUUUUUUU+ PREV RIGHT COW delalloc conversion allocates: 1 32 +uuuuuuuuuuuu+ NEW And the result according to the xfs_bmap_post_update trace was: 0 1 32 52 +H+wwwwwwwwwwwwwwwwwwwwwwww+ PREV Which is clearly wrong - it should be a merged unwritten extent, not an unwritten extent. That lead me to look at the LEFT_FILLING|RIGHT_FILLING|RIGHT_CONTIG case in xfs_bmap_add_extent_delay_real(), and sure enough, there's the bug. It takes the old delalloc extent (PREV) and adds the length of the RIGHT extent to it, takes the start block from NEW, removes the RIGHT extent and then updates PREV with the new extent. What it fails to do is update PREV.br_state. For delalloc, this is always XFS_EXT_NORM, while in this case we are converting the delayed allocation to unwritten, so it needs to be updated to XFS_EXT_UNWRITTEN. This LF|RF|RC case does not do this, and so the resultant extent is always written. And that's the bug I've been chasing for a week - a bmap btree bug, not a reflink/dedupe/copy_file_range bug, but a BMBT bug introduced with the recent in core extent tree scalability enhancements. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-11-20 14:50:08 +08:00
* The right neighbor is contiguous, the left is not. Take care
* with delay -> unwritten extent allocation here because the
* delalloc record we are overwriting is always written.
*/
PREV.br_startblock = new->br_startblock;
PREV.br_blockcount += RIGHT.br_blockcount;
xfs: delalloc -> unwritten COW fork allocation can go wrong Long saga. There have been days spent following this through dead end after dead end in multi-GB event traces. This morning, after writing a trace-cmd wrapper that enabled me to be more selective about XFS trace points, I discovered that I could get just enough essential tracepoints enabled that there was a 50:50 chance the fsx config would fail at ~115k ops. If it didn't fail at op 115547, I stopped fsx at op 115548 anyway. That gave me two traces - one where the problem manifested, and one where it didn't. After refining the traces to have the necessary information, I found that in the failing case there was a real extent in the COW fork compared to an unwritten extent in the working case. Walking back through the two traces to the point where the CWO fork extents actually diverged, I found that the bad case had an extra unwritten extent in it. This is likely because the bug it led me to had triggered multiple times in those 115k ops, leaving stray COW extents around. What I saw was a COW delalloc conversion to an unwritten extent (as they should always be through xfs_iomap_write_allocate()) resulted in a /written extent/: xfs_writepage: dev 259:0 ino 0x83 pgoff 0x17000 size 0x79a00 offset 0 length 0 xfs_iext_remove: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/2 offset 32 block 152 count 20 flag 1 caller xfs_bmap_add_extent_delay_real xfs_bmap_pre_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 4503599627239429 count 31 flag 0 caller xfs_bmap_add_extent_delay_real xfs_bmap_post_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 121 count 51 flag 0 caller xfs_bmap_add_ex Basically, Cow fork before: 0 1 32 52 +H+DDDDDDDDDDDD+UUUUUUUUUUU+ PREV RIGHT COW delalloc conversion allocates: 1 32 +uuuuuuuuuuuu+ NEW And the result according to the xfs_bmap_post_update trace was: 0 1 32 52 +H+wwwwwwwwwwwwwwwwwwwwwwww+ PREV Which is clearly wrong - it should be a merged unwritten extent, not an unwritten extent. That lead me to look at the LEFT_FILLING|RIGHT_FILLING|RIGHT_CONTIG case in xfs_bmap_add_extent_delay_real(), and sure enough, there's the bug. It takes the old delalloc extent (PREV) and adds the length of the RIGHT extent to it, takes the start block from NEW, removes the RIGHT extent and then updates PREV with the new extent. What it fails to do is update PREV.br_state. For delalloc, this is always XFS_EXT_NORM, while in this case we are converting the delayed allocation to unwritten, so it needs to be updated to XFS_EXT_UNWRITTEN. This LF|RF|RC case does not do this, and so the resultant extent is always written. And that's the bug I've been chasing for a week - a bmap btree bug, not a reflink/dedupe/copy_file_range bug, but a BMBT bug introduced with the recent in core extent tree scalability enhancements. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2018-11-20 14:50:08 +08:00
PREV.br_state = new->br_state;
xfs_iext_next(ifp, &bma->icur);
xfs_iext_remove(bma->ip, &bma->icur, state);
xfs_iext_prev(ifp, &bma->icur);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
if (bma->cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(bma->cur, &PREV);
if (error)
goto done;
}
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
/*
* Filling in all of a previously delayed allocation extent.
* Neither the left nor right neighbors are contiguous with
* the new one.
*/
PREV.br_startblock = new->br_startblock;
PREV.br_state = new->br_state;
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
ifp->if_nextents++;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 0)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_insert(bma->cur, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
/*
* Filling in the first part of a previous delayed allocation.
* The left neighbor is contiguous.
*/
old = LEFT;
temp = PREV.br_blockcount - new->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock));
LEFT.br_blockcount += new->br_blockcount;
PREV.br_blockcount = temp;
PREV.br_startoff += new->br_blockcount;
PREV.br_startblock = nullstartblock(da_new);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
xfs_iext_prev(ifp, &bma->icur);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
if (bma->cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(bma->cur, &LEFT);
if (error)
goto done;
}
break;
case BMAP_LEFT_FILLING:
/*
* Filling in the first part of a previous delayed allocation.
* The left neighbor is not contiguous.
*/
xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
ifp->if_nextents++;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 0)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_insert(bma->cur, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
&bma->cur, 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
}
temp = PREV.br_blockcount - new->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock) -
(bma->cur ? bma->cur->bc_ino.allocated : 0));
PREV.br_startoff = new_endoff;
PREV.br_blockcount = temp;
PREV.br_startblock = nullstartblock(da_new);
xfs_iext_next(ifp, &bma->icur);
xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
xfs_iext_prev(ifp, &bma->icur);
break;
case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
/*
* Filling in the last part of a previous delayed allocation.
* The right neighbor is contiguous with the new allocation.
*/
old = RIGHT;
RIGHT.br_startoff = new->br_startoff;
RIGHT.br_startblock = new->br_startblock;
RIGHT.br_blockcount += new->br_blockcount;
if (bma->cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(bma->cur, &RIGHT);
if (error)
goto done;
}
temp = PREV.br_blockcount - new->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock));
PREV.br_blockcount = temp;
PREV.br_startblock = nullstartblock(da_new);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
xfs_iext_next(ifp, &bma->icur);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
break;
case BMAP_RIGHT_FILLING:
/*
* Filling in the last part of a previous delayed allocation.
* The right neighbor is not contiguous.
*/
xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
ifp->if_nextents++;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 0)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_insert(bma->cur, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
&bma->cur, 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
}
temp = PREV.br_blockcount - new->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock) -
(bma->cur ? bma->cur->bc_ino.allocated : 0));
PREV.br_startblock = nullstartblock(da_new);
PREV.br_blockcount = temp;
xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
xfs_iext_next(ifp, &bma->icur);
break;
case 0:
/*
* Filling in the middle part of a previous delayed allocation.
* Contiguity is impossible here.
* This case is avoided almost all the time.
*
* We start with a delayed allocation:
*
* +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
* PREV @ idx
*
* and we are allocating:
* +rrrrrrrrrrrrrrrrr+
* new
*
* and we set it up for insertion as:
* +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
* new
* PREV @ idx LEFT RIGHT
* inserted at idx + 1
*/
old = PREV;
/* LEFT is the new middle */
LEFT = *new;
/* RIGHT is the new right */
RIGHT.br_state = PREV.br_state;
RIGHT.br_startoff = new_endoff;
RIGHT.br_blockcount =
PREV.br_startoff + PREV.br_blockcount - new_endoff;
RIGHT.br_startblock =
nullstartblock(xfs_bmap_worst_indlen(bma->ip,
RIGHT.br_blockcount));
/* truncate PREV */
PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
PREV.br_startblock =
nullstartblock(xfs_bmap_worst_indlen(bma->ip,
PREV.br_blockcount));
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
xfs_iext_next(ifp, &bma->icur);
xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
ifp->if_nextents++;
if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 0)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_insert(bma->cur, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
&bma->cur, 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
}
da_new = startblockval(PREV.br_startblock) +
startblockval(RIGHT.br_startblock);
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
case BMAP_LEFT_CONTIG:
case BMAP_RIGHT_CONTIG:
/*
* These cases are all impossible.
*/
ASSERT(0);
}
/* add reverse mapping unless caller opted out */
if (!(bma->flags & XFS_BMAPI_NORMAP))
xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(bma->cur == NULL);
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
&bma->cur, da_old > 0, &tmp_logflags,
whichfork);
bma->logflags |= tmp_logflags;
if (error)
goto done;
}
if (da_new != da_old)
xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
if (bma->cur) {
da_new += bma->cur->bc_ino.allocated;
bma->cur->bc_ino.allocated = 0;
}
/* adjust for changes in reserved delayed indirect blocks */
if (da_new != da_old) {
ASSERT(state == 0 || da_new < da_old);
error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
false);
}
xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
if (whichfork != XFS_COW_FORK)
bma->logflags |= rval;
return error;
#undef LEFT
#undef RIGHT
#undef PREV
}
/*
* Convert an unwritten allocation to a real allocation or vice versa.
*/
int /* error */
xfs_bmap_add_extent_unwritten_real(
struct xfs_trans *tp,
xfs_inode_t *ip, /* incore inode pointer */
int whichfork,
struct xfs_iext_cursor *icur,
struct xfs_btree_cur **curp, /* if *curp is null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
int *logflagsp) /* inode logging flags */
{
struct xfs_btree_cur *cur; /* btree cursor */
int error; /* error return value */
int i; /* temp state */
struct xfs_ifork *ifp; /* inode fork pointer */
xfs_fileoff_t new_endoff; /* end offset of new entry */
xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
/* left is 0, right is 1, prev is 2 */
int rval=0; /* return value (logging flags) */
int state = xfs_bmap_fork_to_state(whichfork);
struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec old;
*logflagsp = 0;
cur = *curp;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(!isnullstartblock(new->br_startblock));
XFS_STATS_INC(mp, xs_add_exlist);
#define LEFT r[0]
#define RIGHT r[1]
#define PREV r[2]
/*
* Set up a bunch of variables to make the tests simpler.
*/
error = 0;
xfs_iext_get_extent(ifp, icur, &PREV);
ASSERT(new->br_state != PREV.br_state);
new_endoff = new->br_startoff + new->br_blockcount;
ASSERT(PREV.br_startoff <= new->br_startoff);
ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
/*
* Set flags determining what part of the previous oldext allocation
* extent is being replaced by a newext allocation.
*/
if (PREV.br_startoff == new->br_startoff)
state |= BMAP_LEFT_FILLING;
if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
state |= BMAP_RIGHT_FILLING;
/*
* Check and set flags if this segment has a left neighbor.
* Don't set contiguous if the combined extent would be too large.
*/
if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
state |= BMAP_LEFT_VALID;
if (isnullstartblock(LEFT.br_startblock))
state |= BMAP_LEFT_DELAY;
}
if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
LEFT.br_state == new->br_state &&
LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
state |= BMAP_LEFT_CONTIG;
/*
* Check and set flags if this segment has a right neighbor.
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
state |= BMAP_RIGHT_VALID;
if (isnullstartblock(RIGHT.br_startblock))
state |= BMAP_RIGHT_DELAY;
}
if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
new_endoff == RIGHT.br_startoff &&
new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
new->br_state == RIGHT.br_state &&
new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING)) !=
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING) ||
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
<= MAXEXTLEN))
state |= BMAP_RIGHT_CONTIG;
/*
* Switch out based on the FILLING and CONTIG state bits.
*/
switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
/*
* Setting all of a previous oldext extent to newext.
* The left and right neighbors are both contiguous with new.
*/
LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
xfs_iext_remove(ip, icur, state);
xfs_iext_remove(ip, icur, state);
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &LEFT);
ifp->if_nextents -= 2;
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &LEFT);
if (error)
goto done;
}
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
/*
* Setting all of a previous oldext extent to newext.
* The left neighbor is contiguous, the right is not.
*/
LEFT.br_blockcount += PREV.br_blockcount;
xfs_iext_remove(ip, icur, state);
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &LEFT);
ifp->if_nextents--;
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &LEFT);
if (error)
goto done;
}
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
/*
* Setting all of a previous oldext extent to newext.
* The right neighbor is contiguous, the left is not.
*/
PREV.br_blockcount += RIGHT.br_blockcount;
PREV.br_state = new->br_state;
xfs_iext_next(ifp, icur);
xfs_iext_remove(ip, icur, state);
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &PREV);
ifp->if_nextents--;
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
}
break;
xfs: xfs_bmap_add_attrfork_local is too generic When we are converting local data to an extent format as a result of adding an attribute, the type of data contained in the local fork determines the behaviour that needs to occur. xfs_bmap_add_attrfork_local() already handles the directory data case specially by using S_ISDIR() and calling out to xfs_dir2_sf_to_block(), but with verifiers we now need to handle each different type of metadata specially and different metadata formats require different verifiers (and eventually block header initialisation). There is only a single place that we add and attribute fork to the inode, but that is in the attribute code and it knows nothing about the specific contents of the data fork. It is only the case of local data that is the issue here, so adding code to hadnle this case in the attribute specific code is wrong. Hence we are really stuck trying to detect the data fork contents in xfs_bmap_add_attrfork_local() and performing the correct callout there. Luckily the current cases can be determined by S_IS* macros, and we can push the work off to data specific callouts, but each of those callouts does a lot of work in common with xfs_bmap_local_to_extents(). The only reason that this fails for symlinks right now is is that xfs_bmap_local_to_extents() assumes the data fork contains extent data, and so attaches a a bmap extent data verifier to the buffer and simply copies the data fork information straight into it. To fix this, allow us to pass a "formatting" callback into xfs_bmap_local_to_extents() which is responsible for setting the buffer type, initialising it and copying the data fork contents over to the new buffer. This allows callers to specify how they want to format the new buffer (which is necessary for the upcoming CRC enabled metadata blocks) and hence make xfs_bmap_local_to_extents() useful for any type of data fork content. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
2013-02-11 12:58:13 +08:00
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
/*
* Setting all of a previous oldext extent to newext.
* Neither the left nor right neighbors are contiguous with
* the new one.
*/
PREV.br_state = new->br_state;
xfs_iext_update_extent(ip, state, icur, &PREV);
xfs: xfs_bmap_add_attrfork_local is too generic When we are converting local data to an extent format as a result of adding an attribute, the type of data contained in the local fork determines the behaviour that needs to occur. xfs_bmap_add_attrfork_local() already handles the directory data case specially by using S_ISDIR() and calling out to xfs_dir2_sf_to_block(), but with verifiers we now need to handle each different type of metadata specially and different metadata formats require different verifiers (and eventually block header initialisation). There is only a single place that we add and attribute fork to the inode, but that is in the attribute code and it knows nothing about the specific contents of the data fork. It is only the case of local data that is the issue here, so adding code to hadnle this case in the attribute specific code is wrong. Hence we are really stuck trying to detect the data fork contents in xfs_bmap_add_attrfork_local() and performing the correct callout there. Luckily the current cases can be determined by S_IS* macros, and we can push the work off to data specific callouts, but each of those callouts does a lot of work in common with xfs_bmap_local_to_extents(). The only reason that this fails for symlinks right now is is that xfs_bmap_local_to_extents() assumes the data fork contains extent data, and so attaches a a bmap extent data verifier to the buffer and simply copies the data fork information straight into it. To fix this, allow us to pass a "formatting" callback into xfs_bmap_local_to_extents() which is responsible for setting the buffer type, initialising it and copying the data fork contents over to the new buffer. This allows callers to specify how they want to format the new buffer (which is necessary for the upcoming CRC enabled metadata blocks) and hence make xfs_bmap_local_to_extents() useful for any type of data fork content. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
2013-02-11 12:58:13 +08:00
if (cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
error = xfs_bmbt_lookup_eq(cur, new, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
}
break;
xfs: xfs_bmap_add_attrfork_local is too generic When we are converting local data to an extent format as a result of adding an attribute, the type of data contained in the local fork determines the behaviour that needs to occur. xfs_bmap_add_attrfork_local() already handles the directory data case specially by using S_ISDIR() and calling out to xfs_dir2_sf_to_block(), but with verifiers we now need to handle each different type of metadata specially and different metadata formats require different verifiers (and eventually block header initialisation). There is only a single place that we add and attribute fork to the inode, but that is in the attribute code and it knows nothing about the specific contents of the data fork. It is only the case of local data that is the issue here, so adding code to hadnle this case in the attribute specific code is wrong. Hence we are really stuck trying to detect the data fork contents in xfs_bmap_add_attrfork_local() and performing the correct callout there. Luckily the current cases can be determined by S_IS* macros, and we can push the work off to data specific callouts, but each of those callouts does a lot of work in common with xfs_bmap_local_to_extents(). The only reason that this fails for symlinks right now is is that xfs_bmap_local_to_extents() assumes the data fork contains extent data, and so attaches a a bmap extent data verifier to the buffer and simply copies the data fork information straight into it. To fix this, allow us to pass a "formatting" callback into xfs_bmap_local_to_extents() which is responsible for setting the buffer type, initialising it and copying the data fork contents over to the new buffer. This allows callers to specify how they want to format the new buffer (which is necessary for the upcoming CRC enabled metadata blocks) and hence make xfs_bmap_local_to_extents() useful for any type of data fork content. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
2013-02-11 12:58:13 +08:00
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
/*
* Setting the first part of a previous oldext extent to newext.
* The left neighbor is contiguous.
*/
LEFT.br_blockcount += new->br_blockcount;
old = PREV;
PREV.br_startoff += new->br_blockcount;
PREV.br_startblock += new->br_blockcount;
PREV.br_blockcount -= new->br_blockcount;
[XFS] 929045 567344 This mod introduces multi-level in-core file extent functionality, building upon the new layout introduced in mod xfs-linux:xfs-kern:207390a. The new multi-level extent allocations are only required for heavily fragmented files, so the old-style linear extent list is used on files until the extents reach a pre-determined size of 4k. 4k buffers are used because this is the system page size on Linux i386 and systems with larger page sizes don't seem to gain much, if anything, by using their native page size as the extent buffer size. Also, using 4k extent buffers everywhere provides a consistent interface for CXFS across different platforms. The 4k extent buffers are managed by an indirection array (xfs_ext_irec_t) which is basically just a pointer array with a bit of extra information to keep track of the number of extents in each buffer as well as the extent offset of each buffer. Major changes include: - Add multi-level in-core file extent functionality to the xfs_iext_ subroutines introduced in mod: xfs-linux:xfs-kern:207390a - Introduce 13 new subroutines which add functionality for multi-level in-core file extents: xfs_iext_add_indirect_multi() xfs_iext_remove_indirect() xfs_iext_realloc_indirect() xfs_iext_indirect_to_direct() xfs_iext_bno_to_irec() xfs_iext_idx_to_irec() xfs_iext_irec_init() xfs_iext_irec_new() xfs_iext_irec_remove() xfs_iext_irec_compact() xfs_iext_irec_compact_pages() xfs_iext_irec_compact_full() xfs_iext_irec_update_extoffs() SGI-PV: 928864 SGI-Modid: xfs-linux-melb:xfs-kern:207393a Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
2006-03-14 10:30:23 +08:00
xfs_iext_update_extent(ip, state, icur, &PREV);
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &LEFT);
[XFS] There are a few problems with the new xfs_bmap_search_multi_extents() wrapper function that I introduced in mod xfs-linux:xfs-kern:207393a. The function was added as a wrapper around xfs_bmap_do_search_extents() to avoid breaking the top-of-tree CXFS interface. The idea of the function was basically to extract the target extent buffer (if muli- level extent allocation mode), then call xfs_bmap_do_search_extents() with either a pointer to the first extent in the target buffer or a pointer to the first extent in the file, depending on which extent mode was being used. However, in addition to locating the target extent record for block bno, xfs_bmap_do_search_extents() also sets four parameters needed by the caller: *lastx, *eofp, *gotp, *prevp. Passing only the target extent buffer to xfs_bmap_do_search_extents() causes *eofp to be set incorrectly if the extent is at the end of the target list but there are actually more extents in the next er_extbuf. Likewise, if the extent is the first one in the buffer but NOT the first in the file, *prevp is incorrectly set to NULL. Adding the needed functionality to xfs_bmap_search_multi_extents() to re-set any incorrectly set fields is redundant and makes the call to xfs_bmap_do_search_extents() not make much sense when multi-level extent allocation mode is being used. This mod basically extracts the two functional components from xfs_bmap_do_search_extents(), with the intent of obsoleting/removing xfs_bmap_do_search_extents() after the CXFS mult-level in-core extent changes are checked in. The two components are: 1) The binary search to locate the target extent record, and 2) Setting the four parameters needed by the caller (*lastx, *eofp, *gotp, *prevp). Component 1: I created a new function in xfs_inode.c called xfs_iext_bno_to_ext(), which executes the binary search to find the target extent record. xfs_bmap_search_multi_extents() has been modified to call xfs_iext_bno_to_ext() rather than xfs_bmap_do_search_extents(). Component 2: The parameter setting functionality has been added to xfs_bmap_search_multi_extents(), eliminating the need for xfs_bmap_do_search_extents(). These changes make the removal of xfs_bmap_do_search_extents() trival once the CXFS changes are in place. They also allow us to maintain the current XFS interface, using the new search function introduced in mod xfs-linux:xfs-kern:207393a. SGI-PV: 928864 SGI-Modid: xfs-linux-melb:xfs-kern:207866a Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
2006-03-17 14:25:04 +08:00
if (cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto done;
error = xfs_bmbt_update(cur, &LEFT);
if (error)
goto done;
[XFS] There are a few problems with the new xfs_bmap_search_multi_extents() wrapper function that I introduced in mod xfs-linux:xfs-kern:207393a. The function was added as a wrapper around xfs_bmap_do_search_extents() to avoid breaking the top-of-tree CXFS interface. The idea of the function was basically to extract the target extent buffer (if muli- level extent allocation mode), then call xfs_bmap_do_search_extents() with either a pointer to the first extent in the target buffer or a pointer to the first extent in the file, depending on which extent mode was being used. However, in addition to locating the target extent record for block bno, xfs_bmap_do_search_extents() also sets four parameters needed by the caller: *lastx, *eofp, *gotp, *prevp. Passing only the target extent buffer to xfs_bmap_do_search_extents() causes *eofp to be set incorrectly if the extent is at the end of the target list but there are actually more extents in the next er_extbuf. Likewise, if the extent is the first one in the buffer but NOT the first in the file, *prevp is incorrectly set to NULL. Adding the needed functionality to xfs_bmap_search_multi_extents() to re-set any incorrectly set fields is redundant and makes the call to xfs_bmap_do_search_extents() not make much sense when multi-level extent allocation mode is being used. This mod basically extracts the two functional components from xfs_bmap_do_search_extents(), with the intent of obsoleting/removing xfs_bmap_do_search_extents() after the CXFS mult-level in-core extent changes are checked in. The two components are: 1) The binary search to locate the target extent record, and 2) Setting the four parameters needed by the caller (*lastx, *eofp, *gotp, *prevp). Component 1: I created a new function in xfs_inode.c called xfs_iext_bno_to_ext(), which executes the binary search to find the target extent record. xfs_bmap_search_multi_extents() has been modified to call xfs_iext_bno_to_ext() rather than xfs_bmap_do_search_extents(). Component 2: The parameter setting functionality has been added to xfs_bmap_search_multi_extents(), eliminating the need for xfs_bmap_do_search_extents(). These changes make the removal of xfs_bmap_do_search_extents() trival once the CXFS changes are in place. They also allow us to maintain the current XFS interface, using the new search function introduced in mod xfs-linux:xfs-kern:207393a. SGI-PV: 928864 SGI-Modid: xfs-linux-melb:xfs-kern:207866a Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
2006-03-17 14:25:04 +08:00
}
break;
[XFS] 929045 567344 This mod introduces multi-level in-core file extent functionality, building upon the new layout introduced in mod xfs-linux:xfs-kern:207390a. The new multi-level extent allocations are only required for heavily fragmented files, so the old-style linear extent list is used on files until the extents reach a pre-determined size of 4k. 4k buffers are used because this is the system page size on Linux i386 and systems with larger page sizes don't seem to gain much, if anything, by using their native page size as the extent buffer size. Also, using 4k extent buffers everywhere provides a consistent interface for CXFS across different platforms. The 4k extent buffers are managed by an indirection array (xfs_ext_irec_t) which is basically just a pointer array with a bit of extra information to keep track of the number of extents in each buffer as well as the extent offset of each buffer. Major changes include: - Add multi-level in-core file extent functionality to the xfs_iext_ subroutines introduced in mod: xfs-linux:xfs-kern:207390a - Introduce 13 new subroutines which add functionality for multi-level in-core file extents: xfs_iext_add_indirect_multi() xfs_iext_remove_indirect() xfs_iext_realloc_indirect() xfs_iext_indirect_to_direct() xfs_iext_bno_to_irec() xfs_iext_idx_to_irec() xfs_iext_irec_init() xfs_iext_irec_new() xfs_iext_irec_remove() xfs_iext_irec_compact() xfs_iext_irec_compact_pages() xfs_iext_irec_compact_full() xfs_iext_irec_update_extoffs() SGI-PV: 928864 SGI-Modid: xfs-linux-melb:xfs-kern:207393a Signed-off-by: Mandy Kirkconnell <alkirkco@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
2006-03-14 10:30:23 +08:00
case BMAP_LEFT_FILLING:
/*
* Setting the first part of a previous oldext extent to newext.
* The left neighbor is not contiguous.
*/
old = PREV;
PREV.br_startoff += new->br_blockcount;
PREV.br_startblock += new->br_blockcount;
PREV.br_blockcount -= new->br_blockcount;
xfs_iext_update_extent(ip, state, icur, &PREV);
xfs_iext_insert(ip, icur, new, state);
ifp->if_nextents++;
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
cur->bc_rec.b = *new;
if ((error = xfs_btree_insert(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
break;
case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
/*
* Setting the last part of a previous oldext extent to newext.
* The right neighbor is contiguous with the new allocation.
*/
old = PREV;
PREV.br_blockcount -= new->br_blockcount;
RIGHT.br_startoff = new->br_startoff;
RIGHT.br_startblock = new->br_startblock;
RIGHT.br_blockcount += new->br_blockcount;
xfs_iext_update_extent(ip, state, icur, &PREV);
xfs_iext_next(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &RIGHT);
if (cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto done;
error = xfs_bmbt_update(cur, &RIGHT);
if (error)
goto done;
}
break;
case BMAP_RIGHT_FILLING:
/*
* Setting the last part of a previous oldext extent to newext.
* The right neighbor is not contiguous.
*/
old = PREV;
PREV.br_blockcount -= new->br_blockcount;
xfs_iext_update_extent(ip, state, icur, &PREV);
xfs_iext_next(ifp, icur);
xfs_iext_insert(ip, icur, new, state);
ifp->if_nextents++;
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
error = xfs_bmbt_lookup_eq(cur, new, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 0)) {
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_insert(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
break;
case 0:
/*
* Setting the middle part of a previous oldext extent to
* newext. Contiguity is impossible here.
* One extent becomes three extents.
*/
old = PREV;
PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
r[0] = *new;
r[1].br_startoff = new_endoff;
r[1].br_blockcount =
old.br_startoff + old.br_blockcount - new_endoff;
r[1].br_startblock = new->br_startblock + new->br_blockcount;
r[1].br_state = PREV.br_state;
xfs_iext_update_extent(ip, state, icur, &PREV);
xfs_iext_next(ifp, icur);
xfs_iext_insert(ip, icur, &r[1], state);
xfs_iext_insert(ip, icur, &r[0], state);
ifp->if_nextents += 2;
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
/* new right extent - oldext */
error = xfs_bmbt_update(cur, &r[1]);
if (error)
goto done;
/* new left extent - oldext */
cur->bc_rec.b = PREV;
if ((error = xfs_btree_insert(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
/*
* Reset the cursor to the position of the new extent
* we are about to insert as we can't trust it after
* the previous insert.
*/
error = xfs_bmbt_lookup_eq(cur, new, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 0)) {
error = -EFSCORRUPTED;
goto done;
}
/* new middle extent - newext */
if ((error = xfs_btree_insert(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
case BMAP_LEFT_CONTIG:
case BMAP_RIGHT_CONTIG:
/*
* These cases are all impossible.
*/
ASSERT(0);
}
/* update reverse mappings */
xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
&tmp_logflags, whichfork);
*logflagsp |= tmp_logflags;
if (error)
goto done;
}
/* clear out the allocated field, done with it now in any case. */
if (cur) {
cur->bc_ino.allocated = 0;
*curp = cur;
}
xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
done:
*logflagsp |= rval;
return error;
#undef LEFT
#undef RIGHT
#undef PREV
}
/*
* Convert a hole to a delayed allocation.
*/
STATIC void
xfs_bmap_add_extent_hole_delay(
xfs_inode_t *ip, /* incore inode pointer */
int whichfork,
struct xfs_iext_cursor *icur,
xfs_bmbt_irec_t *new) /* new data to add to file extents */
{
struct xfs_ifork *ifp; /* inode fork pointer */
xfs_bmbt_irec_t left; /* left neighbor extent entry */
xfs_filblks_t newlen=0; /* new indirect size */
xfs_filblks_t oldlen=0; /* old indirect size */
xfs_bmbt_irec_t right; /* right neighbor extent entry */
int state = xfs_bmap_fork_to_state(whichfork);
xfs_filblks_t temp; /* temp for indirect calculations */
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(isnullstartblock(new->br_startblock));
/*
* Check and set flags if this segment has a left neighbor
*/
if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
state |= BMAP_LEFT_VALID;
if (isnullstartblock(left.br_startblock))
state |= BMAP_LEFT_DELAY;
}
/*
* Check and set flags if the current (right) segment exists.
* If it doesn't exist, we're converting the hole at end-of-file.
*/
if (xfs_iext_get_extent(ifp, icur, &right)) {
state |= BMAP_RIGHT_VALID;
if (isnullstartblock(right.br_startblock))
state |= BMAP_RIGHT_DELAY;
}
/*
* Set contiguity flags on the left and right neighbors.
* Don't let extents get too large, even if the pieces are contiguous.
*/
if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
left.br_startoff + left.br_blockcount == new->br_startoff &&
left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
state |= BMAP_LEFT_CONTIG;
if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
new->br_startoff + new->br_blockcount == right.br_startoff &&
new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
(!(state & BMAP_LEFT_CONTIG) ||
(left.br_blockcount + new->br_blockcount +
right.br_blockcount <= MAXEXTLEN)))
state |= BMAP_RIGHT_CONTIG;
/*
* Switch out based on the contiguity flags.
*/
switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
/*
* New allocation is contiguous with delayed allocations
* on the left and on the right.
* Merge all three into a single extent record.
*/
temp = left.br_blockcount + new->br_blockcount +
right.br_blockcount;
oldlen = startblockval(left.br_startblock) +
startblockval(new->br_startblock) +
startblockval(right.br_startblock);
xfs: handle indlen shortage on delalloc extent merge When a delalloc extent is created, it can be merged with pre-existing, contiguous, delalloc extents. When this occurs, xfs_bmap_add_extent_hole_delay() merges the extents along with the associated indirect block reservations. The expectation here is that the combined worst case indlen reservation is always less than or equal to the indlen reservation for the individual extents. This is not always the case, however, as existing extents can less than the expected indlen reservation if the extent was previously split due to a hole punch. If a new extent merges with such an extent, the total indlen requirement may be larger than the sum of the indlen reservations held by both extents. xfs_bmap_add_extent_hole_delay() assumes that the worst case indlen reservation is always available and assigns it to the merged extent without consideration for the indlen held by the pre-existing extent. As a result, the subsequent xfs_mod_fdblocks() call can attempt an unintentional allocation rather than a free (indicated by an ASSERT() failure). Further, if the allocation happens to fail in this context, the failure goes unhandled and creates a filesystem wide block accounting inconsistency. Fix xfs_bmap_add_extent_hole_delay() to function as designed. Cap the indlen reservation assigned to the merged extent to the sum of the indlen reservations held by each of the individual extents. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-02-14 14:48:18 +08:00
newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
oldlen);
left.br_startblock = nullstartblock(newlen);
left.br_blockcount = temp;
xfs_iext_remove(ip, icur, state);
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &left);
break;
case BMAP_LEFT_CONTIG:
/*
* New allocation is contiguous with a delayed allocation
* on the left.
* Merge the new allocation with the left neighbor.
*/
temp = left.br_blockcount + new->br_blockcount;
oldlen = startblockval(left.br_startblock) +
startblockval(new->br_startblock);
xfs: handle indlen shortage on delalloc extent merge When a delalloc extent is created, it can be merged with pre-existing, contiguous, delalloc extents. When this occurs, xfs_bmap_add_extent_hole_delay() merges the extents along with the associated indirect block reservations. The expectation here is that the combined worst case indlen reservation is always less than or equal to the indlen reservation for the individual extents. This is not always the case, however, as existing extents can less than the expected indlen reservation if the extent was previously split due to a hole punch. If a new extent merges with such an extent, the total indlen requirement may be larger than the sum of the indlen reservations held by both extents. xfs_bmap_add_extent_hole_delay() assumes that the worst case indlen reservation is always available and assigns it to the merged extent without consideration for the indlen held by the pre-existing extent. As a result, the subsequent xfs_mod_fdblocks() call can attempt an unintentional allocation rather than a free (indicated by an ASSERT() failure). Further, if the allocation happens to fail in this context, the failure goes unhandled and creates a filesystem wide block accounting inconsistency. Fix xfs_bmap_add_extent_hole_delay() to function as designed. Cap the indlen reservation assigned to the merged extent to the sum of the indlen reservations held by each of the individual extents. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-02-14 14:48:18 +08:00
newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
oldlen);
left.br_blockcount = temp;
left.br_startblock = nullstartblock(newlen);
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &left);
break;
case BMAP_RIGHT_CONTIG:
/*
* New allocation is contiguous with a delayed allocation
* on the right.
* Merge the new allocation with the right neighbor.
*/
temp = new->br_blockcount + right.br_blockcount;
oldlen = startblockval(new->br_startblock) +
startblockval(right.br_startblock);
xfs: handle indlen shortage on delalloc extent merge When a delalloc extent is created, it can be merged with pre-existing, contiguous, delalloc extents. When this occurs, xfs_bmap_add_extent_hole_delay() merges the extents along with the associated indirect block reservations. The expectation here is that the combined worst case indlen reservation is always less than or equal to the indlen reservation for the individual extents. This is not always the case, however, as existing extents can less than the expected indlen reservation if the extent was previously split due to a hole punch. If a new extent merges with such an extent, the total indlen requirement may be larger than the sum of the indlen reservations held by both extents. xfs_bmap_add_extent_hole_delay() assumes that the worst case indlen reservation is always available and assigns it to the merged extent without consideration for the indlen held by the pre-existing extent. As a result, the subsequent xfs_mod_fdblocks() call can attempt an unintentional allocation rather than a free (indicated by an ASSERT() failure). Further, if the allocation happens to fail in this context, the failure goes unhandled and creates a filesystem wide block accounting inconsistency. Fix xfs_bmap_add_extent_hole_delay() to function as designed. Cap the indlen reservation assigned to the merged extent to the sum of the indlen reservations held by each of the individual extents. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-02-14 14:48:18 +08:00
newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
oldlen);
right.br_startoff = new->br_startoff;
right.br_startblock = nullstartblock(newlen);
right.br_blockcount = temp;
xfs_iext_update_extent(ip, state, icur, &right);
break;
case 0:
/*
* New allocation is not contiguous with another
* delayed allocation.
* Insert a new entry.
*/
oldlen = newlen = 0;
xfs_iext_insert(ip, icur, new, state);
break;
}
if (oldlen != newlen) {
ASSERT(oldlen > newlen);
xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
false);
/*
* Nothing to do for disk quota accounting here.
*/
xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
}
}
/*
* Convert a hole to a real allocation.
*/
STATIC int /* error */
xfs_bmap_add_extent_hole_real(
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork,
struct xfs_iext_cursor *icur,
struct xfs_btree_cur **curp,
struct xfs_bmbt_irec *new,
int *logflagsp,
int flags)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_mount *mp = ip->i_mount;
struct xfs_btree_cur *cur = *curp;
int error; /* error return value */
int i; /* temp state */
xfs_bmbt_irec_t left; /* left neighbor extent entry */
xfs_bmbt_irec_t right; /* right neighbor extent entry */
int rval=0; /* return value (logging flags) */
int state = xfs_bmap_fork_to_state(whichfork);
struct xfs_bmbt_irec old;
ASSERT(!isnullstartblock(new->br_startblock));
ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
XFS_STATS_INC(mp, xs_add_exlist);
/*
* Check and set flags if this segment has a left neighbor.
*/
if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
state |= BMAP_LEFT_VALID;
if (isnullstartblock(left.br_startblock))
state |= BMAP_LEFT_DELAY;
}
/*
* Check and set flags if this segment has a current value.
* Not true if we're inserting into the "hole" at eof.
*/
if (xfs_iext_get_extent(ifp, icur, &right)) {
state |= BMAP_RIGHT_VALID;
if (isnullstartblock(right.br_startblock))
state |= BMAP_RIGHT_DELAY;
}
/*
* We're inserting a real allocation between "left" and "right".
* Set the contiguity flags. Don't let extents get too large.
*/
if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
left.br_startoff + left.br_blockcount == new->br_startoff &&
left.br_startblock + left.br_blockcount == new->br_startblock &&
left.br_state == new->br_state &&
left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
state |= BMAP_LEFT_CONTIG;
if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
new->br_startoff + new->br_blockcount == right.br_startoff &&
new->br_startblock + new->br_blockcount == right.br_startblock &&
new->br_state == right.br_state &&
new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
(!(state & BMAP_LEFT_CONTIG) ||
left.br_blockcount + new->br_blockcount +
right.br_blockcount <= MAXEXTLEN))
state |= BMAP_RIGHT_CONTIG;
error = 0;
/*
* Select which case we're in here, and implement it.
*/
switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
/*
* New allocation is contiguous with real allocations on the
* left and on the right.
* Merge all three into a single extent record.
*/
left.br_blockcount += new->br_blockcount + right.br_blockcount;
xfs_iext_remove(ip, icur, state);
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &left);
ifp->if_nextents--;
if (cur == NULL) {
rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
} else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(cur, &right, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &left);
if (error)
goto done;
}
break;
case BMAP_LEFT_CONTIG:
/*
* New allocation is contiguous with a real allocation
* on the left.
* Merge the new allocation with the left neighbor.
*/
old = left;
left.br_blockcount += new->br_blockcount;
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, state, icur, &left);
if (cur == NULL) {
rval = xfs_ilog_fext(whichfork);
} else {
rval = 0;
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &left);
if (error)
goto done;
}
break;
case BMAP_RIGHT_CONTIG:
/*
* New allocation is contiguous with a real allocation
* on the right.
* Merge the new allocation with the right neighbor.
*/
old = right;
right.br_startoff = new->br_startoff;
right.br_startblock = new->br_startblock;
right.br_blockcount += new->br_blockcount;
xfs_iext_update_extent(ip, state, icur, &right);
if (cur == NULL) {
rval = xfs_ilog_fext(whichfork);
} else {
rval = 0;
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_bmbt_update(cur, &right);
if (error)
goto done;
}
break;
case 0:
/*
* New allocation is not contiguous with another
* real allocation.
* Insert a new entry.
*/
xfs_iext_insert(ip, icur, new, state);
ifp->if_nextents++;
if (cur == NULL) {
rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
} else {
rval = XFS_ILOG_CORE;
error = xfs_bmbt_lookup_eq(cur, new, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 0)) {
error = -EFSCORRUPTED;
goto done;
}
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
break;
}
/* add reverse mapping unless caller opted out */
if (!(flags & XFS_BMAPI_NORMAP))
xfs_rmap_map_extent(tp, ip, whichfork, new);
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
&tmp_logflags, whichfork);
*logflagsp |= tmp_logflags;
cur = *curp;
if (error)
goto done;
}
/* clear out the allocated field, done with it now in any case. */
if (cur)
cur->bc_ino.allocated = 0;
xfs_bmap_check_leaf_extents(cur, ip, whichfork);
done:
*logflagsp |= rval;
return error;
}
/*
* Functions used in the extent read, allocate and remove paths
*/
/*
* Adjust the size of the new extent based on i_extsize and rt extsize.
*/
int
xfs_bmap_extsize_align(
xfs_mount_t *mp,
xfs_bmbt_irec_t *gotp, /* next extent pointer */
xfs_bmbt_irec_t *prevp, /* previous extent pointer */
xfs_extlen_t extsz, /* align to this extent size */
int rt, /* is this a realtime inode? */
int eof, /* is extent at end-of-file? */
int delay, /* creating delalloc extent? */
int convert, /* overwriting unwritten extent? */
xfs_fileoff_t *offp, /* in/out: aligned offset */
xfs_extlen_t *lenp) /* in/out: aligned length */
{
xfs_fileoff_t orig_off; /* original offset */
xfs_extlen_t orig_alen; /* original length */
xfs_fileoff_t orig_end; /* original off+len */
xfs_fileoff_t nexto; /* next file offset */
xfs_fileoff_t prevo; /* previous file offset */
xfs_fileoff_t align_off; /* temp for offset */
xfs_extlen_t align_alen; /* temp for length */
xfs_extlen_t temp; /* temp for calculations */
if (convert)
return 0;
orig_off = align_off = *offp;
orig_alen = align_alen = *lenp;
orig_end = orig_off + orig_alen;
/*
* If this request overlaps an existing extent, then don't
* attempt to perform any additional alignment.
*/
if (!delay && !eof &&
(orig_off >= gotp->br_startoff) &&
(orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
return 0;
}
/*
* If the file offset is unaligned vs. the extent size
* we need to align it. This will be possible unless
* the file was previously written with a kernel that didn't
* perform this alignment, or if a truncate shot us in the
* foot.
*/
div_u64_rem(orig_off, extsz, &temp);
if (temp) {
align_alen += temp;
align_off -= temp;
}
xfs: extent size hints can round up extents past MAXEXTLEN This results in BMBT corruption, as seen by this test: # mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc .... # mount /dev/vdc /mnt/scratch # xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo which results in this failure on a debug kernel: XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211 .... Call Trace: [<ffffffff814cf0ff>] xfs_bmbt_set_allf+0x8f/0x100 [<ffffffff814cf18d>] xfs_bmbt_set_all+0x1d/0x20 [<ffffffff814f2efe>] xfs_iext_insert+0x9e/0x120 [<ffffffff814c7956>] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [<ffffffff814c7956>] xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [<ffffffff814caaab>] xfs_bmapi_write+0x72b/0xed0 [<ffffffff811c72ac>] ? kmem_cache_alloc+0x15c/0x170 [<ffffffff814fe070>] xfs_alloc_file_space+0x160/0x400 [<ffffffff81ddcc29>] ? down_write+0x29/0x60 [<ffffffff815063eb>] xfs_file_fallocate+0x29b/0x310 [<ffffffff811d2bc8>] ? __sb_start_write+0x58/0x120 [<ffffffff811e3e18>] ? do_vfs_ioctl+0x318/0x570 [<ffffffff811cd680>] vfs_fallocate+0x140/0x260 [<ffffffff811ce6f8>] SyS_fallocate+0x48/0x80 [<ffffffff81ddec09>] system_call_fastpath+0x12/0x17 The tracepoint that indicates the extent that triggered the assert failure is: xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1 Clearly indicating that the extent length is greater than MAXEXTLEN, which is 2097151. A prior trace point shows the allocation was an exact size match and that a length greater than MAXEXTLEN was asked for: xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152 ^^^^^^^ ^^^^^^^ We don't see this problem with extent size hints through the IO path because we can't do single IOs large enough to trigger MAXEXTLEN allocation. fallocate(), OTOH, is not limited in it's allocation sizes and so needs help here. The issue is that the extent size hint alignment is rounding up the extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking into account extent size hints when calculating the maximum extent length to allocate. xfs_bmapi_reserve_delalloc() is already doing this, but direct extent allocation is not. Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is wrong, and it works only because delayed allocation extents are not limited in size to MAXEXTLEN in the in-core extent tree. hence this calculation does not work for direct allocation, and the delalloc code needs fixing. This may, in fact be the underlying bug that occassionally causes transaction overruns in delayed allocation extent conversion, so now we know it's wrong we should fix it, too. Many thanks to Brian Foster for finding this problem during review of this patch. Hence the fix, after much code reading, is to allow xfs_bmap_extsize_align() to align partial extents when full alignment would extend the alignment past MAXEXTLEN. We can safely do this because all callers have higher layer allocation loops that already handle short allocations, and so will simply run another allocation to cover the remainder of the requested allocation range that we ignored during alignment. The advantage of this approach is that it also removes the need for callers to do anything other than limit their requests to MAXEXTLEN - they don't really need to be aware of extent size hints at all. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 05:40:06 +08:00
/* Same adjustment for the end of the requested area. */
temp = (align_alen % extsz);
if (temp)
align_alen += extsz - temp;
/*
xfs: extent size hints can round up extents past MAXEXTLEN This results in BMBT corruption, as seen by this test: # mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc .... # mount /dev/vdc /mnt/scratch # xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo which results in this failure on a debug kernel: XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211 .... Call Trace: [<ffffffff814cf0ff>] xfs_bmbt_set_allf+0x8f/0x100 [<ffffffff814cf18d>] xfs_bmbt_set_all+0x1d/0x20 [<ffffffff814f2efe>] xfs_iext_insert+0x9e/0x120 [<ffffffff814c7956>] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [<ffffffff814c7956>] xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [<ffffffff814caaab>] xfs_bmapi_write+0x72b/0xed0 [<ffffffff811c72ac>] ? kmem_cache_alloc+0x15c/0x170 [<ffffffff814fe070>] xfs_alloc_file_space+0x160/0x400 [<ffffffff81ddcc29>] ? down_write+0x29/0x60 [<ffffffff815063eb>] xfs_file_fallocate+0x29b/0x310 [<ffffffff811d2bc8>] ? __sb_start_write+0x58/0x120 [<ffffffff811e3e18>] ? do_vfs_ioctl+0x318/0x570 [<ffffffff811cd680>] vfs_fallocate+0x140/0x260 [<ffffffff811ce6f8>] SyS_fallocate+0x48/0x80 [<ffffffff81ddec09>] system_call_fastpath+0x12/0x17 The tracepoint that indicates the extent that triggered the assert failure is: xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1 Clearly indicating that the extent length is greater than MAXEXTLEN, which is 2097151. A prior trace point shows the allocation was an exact size match and that a length greater than MAXEXTLEN was asked for: xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152 ^^^^^^^ ^^^^^^^ We don't see this problem with extent size hints through the IO path because we can't do single IOs large enough to trigger MAXEXTLEN allocation. fallocate(), OTOH, is not limited in it's allocation sizes and so needs help here. The issue is that the extent size hint alignment is rounding up the extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking into account extent size hints when calculating the maximum extent length to allocate. xfs_bmapi_reserve_delalloc() is already doing this, but direct extent allocation is not. Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is wrong, and it works only because delayed allocation extents are not limited in size to MAXEXTLEN in the in-core extent tree. hence this calculation does not work for direct allocation, and the delalloc code needs fixing. This may, in fact be the underlying bug that occassionally causes transaction overruns in delayed allocation extent conversion, so now we know it's wrong we should fix it, too. Many thanks to Brian Foster for finding this problem during review of this patch. Hence the fix, after much code reading, is to allow xfs_bmap_extsize_align() to align partial extents when full alignment would extend the alignment past MAXEXTLEN. We can safely do this because all callers have higher layer allocation loops that already handle short allocations, and so will simply run another allocation to cover the remainder of the requested allocation range that we ignored during alignment. The advantage of this approach is that it also removes the need for callers to do anything other than limit their requests to MAXEXTLEN - they don't really need to be aware of extent size hints at all. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 05:40:06 +08:00
* For large extent hint sizes, the aligned extent might be larger than
* MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
* the length back under MAXEXTLEN. The outer allocation loops handle
* short allocation just fine, so it is safe to do this. We only want to
* do it when we are forced to, though, because it means more allocation
* operations are required.
*/
xfs: extent size hints can round up extents past MAXEXTLEN This results in BMBT corruption, as seen by this test: # mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc .... # mount /dev/vdc /mnt/scratch # xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo which results in this failure on a debug kernel: XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211 .... Call Trace: [<ffffffff814cf0ff>] xfs_bmbt_set_allf+0x8f/0x100 [<ffffffff814cf18d>] xfs_bmbt_set_all+0x1d/0x20 [<ffffffff814f2efe>] xfs_iext_insert+0x9e/0x120 [<ffffffff814c7956>] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [<ffffffff814c7956>] xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [<ffffffff814caaab>] xfs_bmapi_write+0x72b/0xed0 [<ffffffff811c72ac>] ? kmem_cache_alloc+0x15c/0x170 [<ffffffff814fe070>] xfs_alloc_file_space+0x160/0x400 [<ffffffff81ddcc29>] ? down_write+0x29/0x60 [<ffffffff815063eb>] xfs_file_fallocate+0x29b/0x310 [<ffffffff811d2bc8>] ? __sb_start_write+0x58/0x120 [<ffffffff811e3e18>] ? do_vfs_ioctl+0x318/0x570 [<ffffffff811cd680>] vfs_fallocate+0x140/0x260 [<ffffffff811ce6f8>] SyS_fallocate+0x48/0x80 [<ffffffff81ddec09>] system_call_fastpath+0x12/0x17 The tracepoint that indicates the extent that triggered the assert failure is: xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1 Clearly indicating that the extent length is greater than MAXEXTLEN, which is 2097151. A prior trace point shows the allocation was an exact size match and that a length greater than MAXEXTLEN was asked for: xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152 ^^^^^^^ ^^^^^^^ We don't see this problem with extent size hints through the IO path because we can't do single IOs large enough to trigger MAXEXTLEN allocation. fallocate(), OTOH, is not limited in it's allocation sizes and so needs help here. The issue is that the extent size hint alignment is rounding up the extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking into account extent size hints when calculating the maximum extent length to allocate. xfs_bmapi_reserve_delalloc() is already doing this, but direct extent allocation is not. Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is wrong, and it works only because delayed allocation extents are not limited in size to MAXEXTLEN in the in-core extent tree. hence this calculation does not work for direct allocation, and the delalloc code needs fixing. This may, in fact be the underlying bug that occassionally causes transaction overruns in delayed allocation extent conversion, so now we know it's wrong we should fix it, too. Many thanks to Brian Foster for finding this problem during review of this patch. Hence the fix, after much code reading, is to allow xfs_bmap_extsize_align() to align partial extents when full alignment would extend the alignment past MAXEXTLEN. We can safely do this because all callers have higher layer allocation loops that already handle short allocations, and so will simply run another allocation to cover the remainder of the requested allocation range that we ignored during alignment. The advantage of this approach is that it also removes the need for callers to do anything other than limit their requests to MAXEXTLEN - they don't really need to be aware of extent size hints at all. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 05:40:06 +08:00
while (align_alen > MAXEXTLEN)
align_alen -= extsz;
ASSERT(align_alen <= MAXEXTLEN);
/*
* If the previous block overlaps with this proposed allocation
* then move the start forward without adjusting the length.
*/
if (prevp->br_startoff != NULLFILEOFF) {
if (prevp->br_startblock == HOLESTARTBLOCK)
prevo = prevp->br_startoff;
else
prevo = prevp->br_startoff + prevp->br_blockcount;
} else
prevo = 0;
if (align_off != orig_off && align_off < prevo)
align_off = prevo;
/*
* If the next block overlaps with this proposed allocation
* then move the start back without adjusting the length,
* but not before offset 0.
* This may of course make the start overlap previous block,
* and if we hit the offset 0 limit then the next block
* can still overlap too.
*/
if (!eof && gotp->br_startoff != NULLFILEOFF) {
if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
(!delay && gotp->br_startblock == DELAYSTARTBLOCK))
nexto = gotp->br_startoff + gotp->br_blockcount;
else
nexto = gotp->br_startoff;
} else
nexto = NULLFILEOFF;
if (!eof &&
align_off + align_alen != orig_end &&
align_off + align_alen > nexto)
align_off = nexto > align_alen ? nexto - align_alen : 0;
/*
* If we're now overlapping the next or previous extent that
* means we can't fit an extsz piece in this hole. Just move
* the start forward to the first valid spot and set
* the length so we hit the end.
*/
if (align_off != orig_off && align_off < prevo)
align_off = prevo;
if (align_off + align_alen != orig_end &&
align_off + align_alen > nexto &&
nexto != NULLFILEOFF) {
ASSERT(nexto > prevo);
align_alen = nexto - align_off;
}
/*
* If realtime, and the result isn't a multiple of the realtime
* extent size we need to remove blocks until it is.
*/
if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
/*
* We're not covering the original request, or
* we won't be able to once we fix the length.
*/
if (orig_off < align_off ||
orig_end > align_off + align_alen ||
align_alen - temp < orig_alen)
return -EINVAL;
/*
* Try to fix it by moving the start up.
*/
if (align_off + temp <= orig_off) {
align_alen -= temp;
align_off += temp;
}
/*
* Try to fix it by moving the end in.
*/
else if (align_off + align_alen - temp >= orig_end)
align_alen -= temp;
/*
* Set the start to the minimum then trim the length.
*/
else {
align_alen -= orig_off - align_off;
align_off = orig_off;
align_alen -= align_alen % mp->m_sb.sb_rextsize;
}
/*
* Result doesn't cover the request, fail it.
*/
if (orig_off < align_off || orig_end > align_off + align_alen)
return -EINVAL;
} else {
ASSERT(orig_off >= align_off);
xfs: extent size hints can round up extents past MAXEXTLEN This results in BMBT corruption, as seen by this test: # mkfs.xfs -f -d size=40051712b,agcount=4 /dev/vdc .... # mount /dev/vdc /mnt/scratch # xfs_io -ft -c "extsize 16m" -c "falloc 0 30g" -c "bmap -vp" /mnt/scratch/foo which results in this failure on a debug kernel: XFS: Assertion failed: (blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0, file: fs/xfs/libxfs/xfs_bmap_btree.c, line: 211 .... Call Trace: [<ffffffff814cf0ff>] xfs_bmbt_set_allf+0x8f/0x100 [<ffffffff814cf18d>] xfs_bmbt_set_all+0x1d/0x20 [<ffffffff814f2efe>] xfs_iext_insert+0x9e/0x120 [<ffffffff814c7956>] ? xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [<ffffffff814c7956>] xfs_bmap_add_extent_hole_real+0x1c6/0xc70 [<ffffffff814caaab>] xfs_bmapi_write+0x72b/0xed0 [<ffffffff811c72ac>] ? kmem_cache_alloc+0x15c/0x170 [<ffffffff814fe070>] xfs_alloc_file_space+0x160/0x400 [<ffffffff81ddcc29>] ? down_write+0x29/0x60 [<ffffffff815063eb>] xfs_file_fallocate+0x29b/0x310 [<ffffffff811d2bc8>] ? __sb_start_write+0x58/0x120 [<ffffffff811e3e18>] ? do_vfs_ioctl+0x318/0x570 [<ffffffff811cd680>] vfs_fallocate+0x140/0x260 [<ffffffff811ce6f8>] SyS_fallocate+0x48/0x80 [<ffffffff81ddec09>] system_call_fastpath+0x12/0x17 The tracepoint that indicates the extent that triggered the assert failure is: xfs_iext_insert: idx 0 offset 0 block 16777224 count 2097152 flag 1 Clearly indicating that the extent length is greater than MAXEXTLEN, which is 2097151. A prior trace point shows the allocation was an exact size match and that a length greater than MAXEXTLEN was asked for: xfs_alloc_size_done: agno 1 agbno 8 minlen 2097152 maxlen 2097152 ^^^^^^^ ^^^^^^^ We don't see this problem with extent size hints through the IO path because we can't do single IOs large enough to trigger MAXEXTLEN allocation. fallocate(), OTOH, is not limited in it's allocation sizes and so needs help here. The issue is that the extent size hint alignment is rounding up the extent size past MAXEXTLEN, because xfs_bmapi_write() is not taking into account extent size hints when calculating the maximum extent length to allocate. xfs_bmapi_reserve_delalloc() is already doing this, but direct extent allocation is not. Unfortunately, the calculation in xfs_bmapi_reserve_delalloc() is wrong, and it works only because delayed allocation extents are not limited in size to MAXEXTLEN in the in-core extent tree. hence this calculation does not work for direct allocation, and the delalloc code needs fixing. This may, in fact be the underlying bug that occassionally causes transaction overruns in delayed allocation extent conversion, so now we know it's wrong we should fix it, too. Many thanks to Brian Foster for finding this problem during review of this patch. Hence the fix, after much code reading, is to allow xfs_bmap_extsize_align() to align partial extents when full alignment would extend the alignment past MAXEXTLEN. We can safely do this because all callers have higher layer allocation loops that already handle short allocations, and so will simply run another allocation to cover the remainder of the requested allocation range that we ignored during alignment. The advantage of this approach is that it also removes the need for callers to do anything other than limit their requests to MAXEXTLEN - they don't really need to be aware of extent size hints at all. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-05-29 05:40:06 +08:00
/* see MAXEXTLEN handling above */
ASSERT(orig_end <= align_off + align_alen ||
align_alen + extsz > MAXEXTLEN);
}
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
#ifdef DEBUG
if (!eof && gotp->br_startoff != NULLFILEOFF)
ASSERT(align_off + align_alen <= gotp->br_startoff);
if (prevp->br_startoff != NULLFILEOFF)
ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
#endif
*lenp = align_alen;
*offp = align_off;
return 0;
}
#define XFS_ALLOC_GAP_UNITS 4
void
xfs_bmap_adjacent(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */
{
xfs_fsblock_t adjust; /* adjustment to block numbers */
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
xfs_mount_t *mp; /* mount point structure */
int nullfb; /* true if ap->firstblock isn't set */
int rt; /* true if inode is realtime */
#define ISVALID(x,y) \
(rt ? \
(x) < mp->m_sb.sb_rblocks : \
XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
mp = ap->ip->i_mount;
nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
xfs: remote attribute blocks aren't really userdata When adding a new remote attribute, we write the attribute to the new extent before the allocation transaction is committed. This means we cannot reuse busy extents as that violates crash consistency semantics. Hence we currently treat remote attribute extent allocation like userdata because it has the same overwrite ordering constraints as userdata. Unfortunately, this also allows the allocator to incorrectly apply extent size hints to the remote attribute extent allocation. This results in interesting failures, such as transaction block reservation overruns and in-memory inode attribute fork corruption. To fix this, we need to separate the busy extent reuse configuration from the userdata configuration. This changes the definition of XFS_BMAPI_METADATA slightly - it now means that allocation is metadata and reuse of busy extents is acceptible due to the metadata ordering semantics of the journal. If this flag is not set, it means the allocation is that has unordered data writeback, and hence busy extent reuse is not allowed. It no longer implies the allocation is for user data, just that the data write will not be strictly ordered. This matches the semantics for both user data and remote attribute block allocation. As such, This patch changes the "userdata" field to a "datatype" field, and adds a "no busy reuse" flag to the field. When we detect an unordered data extent allocation, we immediately set the no reuse flag. We then set the "user data" flags based on the inode fork we are allocating the extent to. Hence we only set userdata flags on data fork allocations now and consider attribute fork remote extents to be an unordered metadata extent. The result is that remote attribute extents now have the expected allocation semantics, and the data fork allocation behaviour is completely unchanged. It should be noted that there may be other ways to fix this (e.g. use ordered metadata buffers for the remote attribute extent data write) but they are more invasive and difficult to validate both from a design and implementation POV. Hence this patch takes the simple, obvious route to fixing the problem... Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 06:21:28 +08:00
rt = XFS_IS_REALTIME_INODE(ap->ip) &&
(ap->datatype & XFS_ALLOC_USERDATA);
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
ap->tp->t_firstblock);
/*
* If allocating at eof, and there's a previous real block,
* try to use its last block as our starting point.
*/
if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
!isnullstartblock(ap->prev.br_startblock) &&
ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
ap->prev.br_startblock)) {
ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
/*
* Adjust for the gap between prevp and us.
*/
adjust = ap->offset -
(ap->prev.br_startoff + ap->prev.br_blockcount);
if (adjust &&
ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
ap->blkno += adjust;
}
/*
* If not at eof, then compare the two neighbor blocks.
* Figure out whether either one gives us a good starting point,
* and pick the better one.
*/
else if (!ap->eof) {
xfs_fsblock_t gotbno; /* right side block number */
xfs_fsblock_t gotdiff=0; /* right side difference */
xfs_fsblock_t prevbno; /* left side block number */
xfs_fsblock_t prevdiff=0; /* left side difference */
/*
* If there's a previous (left) block, select a requested
* start block based on it.
*/
if (ap->prev.br_startoff != NULLFILEOFF &&
!isnullstartblock(ap->prev.br_startblock) &&
(prevbno = ap->prev.br_startblock +
ap->prev.br_blockcount) &&
ISVALID(prevbno, ap->prev.br_startblock)) {
/*
* Calculate gap to end of previous block.
*/
adjust = prevdiff = ap->offset -
(ap->prev.br_startoff +
ap->prev.br_blockcount);
/*
* Figure the startblock based on the previous block's
* end and the gap size.
* Heuristic!
* If the gap is large relative to the piece we're
* allocating, or using it gives us an invalid block
* number, then just use the end of the previous block.
*/
if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
ISVALID(prevbno + prevdiff,
ap->prev.br_startblock))
prevbno += adjust;
else
prevdiff += adjust;
/*
* If the firstblock forbids it, can't use it,
* must use default.
*/
if (!rt && !nullfb &&
XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
prevbno = NULLFSBLOCK;
}
/*
* No previous block or can't follow it, just default.
*/
else
prevbno = NULLFSBLOCK;
/*
* If there's a following (right) block, select a requested
* start block based on it.
*/
if (!isnullstartblock(ap->got.br_startblock)) {
/*
* Calculate gap to start of next block.
*/
adjust = gotdiff = ap->got.br_startoff - ap->offset;
/*
* Figure the startblock based on the next block's
* start and the gap size.
*/
gotbno = ap->got.br_startblock;
/*
* Heuristic!
* If the gap is large relative to the piece we're
* allocating, or using it gives us an invalid block
* number, then just use the start of the next block
* offset by our length.
*/
if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
ISVALID(gotbno - gotdiff, gotbno))
gotbno -= adjust;
else if (ISVALID(gotbno - ap->length, gotbno)) {
gotbno -= ap->length;
gotdiff += adjust - ap->length;
} else
gotdiff += adjust;
/*
* If the firstblock forbids it, can't use it,
* must use default.
*/
if (!rt && !nullfb &&
XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
gotbno = NULLFSBLOCK;
}
/*
* No next block, just default.
*/
else
gotbno = NULLFSBLOCK;
/*
* If both valid, pick the better one, else the only good
* one, else ap->blkno is already set (to 0 or the inode block).
*/
if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
else if (prevbno != NULLFSBLOCK)
ap->blkno = prevbno;
else if (gotbno != NULLFSBLOCK)
ap->blkno = gotbno;
}
#undef ISVALID
}
static int
xfs_bmap_longest_free_extent(
struct xfs_trans *tp,
xfs_agnumber_t ag,
xfs_extlen_t *blen,
int *notinit)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_perag *pag;
xfs_extlen_t longest;
int error = 0;
pag = xfs_perag_get(mp, ag);
if (!pag->pagf_init) {
error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
if (error) {
/* Couldn't lock the AGF, so skip this AG. */
if (error == -EAGAIN) {
*notinit = 1;
error = 0;
}
goto out;
}
}
longest = xfs_alloc_longest_free_extent(pag,
xfs: set up per-AG free space reservations One unfortunate quirk of the reference count and reverse mapping btrees -- they can expand in size when blocks are written to *other* allocation groups if, say, one large extent becomes a lot of tiny extents. Since we don't want to start throwing errors in the middle of CoWing, we need to reserve some blocks to handle future expansion. The transaction block reservation counters aren't sufficient here because we have to have a reserve of blocks in every AG, not just somewhere in the filesystem. Therefore, create two per-AG block reservation pools. One feeds the AGFL so that rmapbt expansion always succeeds, and the other feeds all other metadata so that refcountbt expansion never fails. Use the count of how many reserved blocks we need to have on hand to create a virtual reservation in the AG. Through selective clamping of the maximum length of allocation requests and of the length of the longest free extent, we can make it look like there's less free space in the AG unless the reservation owner is asking for blocks. In other words, play some accounting tricks in-core to make sure that we always have blocks available. On the plus side, there's nothing to clean up if we crash, which is contrast to the strategy that the rough draft used (actually removing extents from the freespace btrees). Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 08:30:52 +08:00
xfs_alloc_min_freelist(mp, pag),
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
if (*blen < longest)
*blen = longest;
out:
xfs_perag_put(pag);
return error;
}
static void
xfs_bmap_select_minlen(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t *blen,
int notinit)
{
if (notinit || *blen < ap->minlen) {
/*
* Since we did a BUF_TRYLOCK above, it is possible that
* there is space for this request.
*/
args->minlen = ap->minlen;
} else if (*blen < args->maxlen) {
/*
* If the best seen length is less than the request length,
* use the best as the minimum.
*/
args->minlen = *blen;
} else {
/*
* Otherwise we've seen an extent as big as maxlen, use that
* as the minimum.
*/
args->minlen = args->maxlen;
}
}
STATIC int
xfs_bmap_btalloc_nullfb(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t *blen)
{
struct xfs_mount *mp = ap->ip->i_mount;
xfs_agnumber_t ag, startag;
int notinit = 0;
int error;
args->type = XFS_ALLOCTYPE_START_BNO;
args->total = ap->total;
startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
if (startag == NULLAGNUMBER)
startag = ag = 0;
while (*blen < args->maxlen) {
error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
&notinit);
if (error)
return error;
if (++ag == mp->m_sb.sb_agcount)
ag = 0;
if (ag == startag)
break;
}
xfs_bmap_select_minlen(ap, args, blen, notinit);
return 0;
}
STATIC int
xfs_bmap_btalloc_filestreams(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t *blen)
{
struct xfs_mount *mp = ap->ip->i_mount;
xfs_agnumber_t ag;
int notinit = 0;
int error;
args->type = XFS_ALLOCTYPE_NEAR_BNO;
args->total = ap->total;
ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
if (ag == NULLAGNUMBER)
ag = 0;
error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
if (error)
return error;
if (*blen < args->maxlen) {
error = xfs_filestream_new_ag(ap, &ag);
if (error)
return error;
error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
&notinit);
if (error)
return error;
}
xfs_bmap_select_minlen(ap, args, blen, notinit);
/*
* Set the failure fallback case to look in the selected AG as stream
* may have moved.
*/
ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
return 0;
}
/* Update all inode and quota accounting for the allocation we just did. */
static void
xfs_bmap_btalloc_accounting(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args)
{
if (ap->flags & XFS_BMAPI_COWFORK) {
/*
* COW fork blocks are in-core only and thus are treated as
* in-core quota reservation (like delalloc blocks) even when
* converted to real blocks. The quota reservation is not
* accounted to disk until blocks are remapped to the data
* fork. So if these blocks were previously delalloc, we
* already have quota reservation and there's nothing to do
* yet.
*/
if (ap->wasdel) {
xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
return;
}
/*
* Otherwise, we've allocated blocks in a hole. The transaction
* has acquired in-core quota reservation for this extent.
* Rather than account these as real blocks, however, we reduce
* the transaction quota reservation based on the allocation.
* This essentially transfers the transaction quota reservation
* to that of a delalloc extent.
*/
ap->ip->i_delayed_blks += args->len;
xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
-(long)args->len);
return;
}
/* data/attr fork only */
ap->ip->i_nblocks += args->len;
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
if (ap->wasdel) {
ap->ip->i_delayed_blks -= args->len;
xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len);
}
xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
args->len);
}
static int
xfs_bmap_compute_alignments(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args)
{
struct xfs_mount *mp = args->mp;
xfs_extlen_t align = 0; /* minimum allocation alignment */
int stripe_align = 0;
/* stripe alignment for allocation is determined by mount parameters */
if (mp->m_swidth && xfs_has_swalloc(mp))
stripe_align = mp->m_swidth;
else if (mp->m_dalign)
stripe_align = mp->m_dalign;
if (ap->flags & XFS_BMAPI_COWFORK)
align = xfs_get_cowextsz_hint(ap->ip);
else if (ap->datatype & XFS_ALLOC_USERDATA)
align = xfs_get_extsz_hint(ap->ip);
if (align) {
if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
ap->eof, 0, ap->conv, &ap->offset,
&ap->length))
ASSERT(0);
ASSERT(ap->length);
}
/* apply extent size hints if obtained earlier */
if (align) {
args->prod = align;
div_u64_rem(ap->offset, args->prod, &args->mod);
if (args->mod)
args->mod = args->prod - args->mod;
} else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
args->prod = 1;
args->mod = 0;
} else {
args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
div_u64_rem(ap->offset, args->prod, &args->mod);
if (args->mod)
args->mod = args->prod - args->mod;
}
return stripe_align;
}
static void
xfs_bmap_process_allocated_extent(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_fileoff_t orig_offset,
xfs_extlen_t orig_length)
{
int nullfb;
nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
/*
* check the allocation happened at the same or higher AG than
* the first block that was allocated.
*/
ASSERT(nullfb ||
XFS_FSB_TO_AGNO(args->mp, ap->tp->t_firstblock) <=
XFS_FSB_TO_AGNO(args->mp, args->fsbno));
ap->blkno = args->fsbno;
if (nullfb)
ap->tp->t_firstblock = args->fsbno;
ap->length = args->len;
/*
* If the extent size hint is active, we tried to round the
* caller's allocation request offset down to extsz and the
* length up to another extsz boundary. If we found a free
* extent we mapped it in starting at this new offset. If the
* newly mapped space isn't long enough to cover any of the
* range of offsets that was originally requested, move the
* mapping up so that we can fill as much of the caller's
* original request as possible. Free space is apparently
* very fragmented so we're unlikely to be able to satisfy the
* hints anyway.
*/
if (ap->length <= orig_length)
ap->offset = orig_offset;
else if (ap->offset + ap->length < orig_offset + orig_length)
ap->offset = orig_offset + orig_length - ap->length;
xfs_bmap_btalloc_accounting(ap, args);
}
#ifdef DEBUG
static int
xfs_bmap_exact_minlen_extent_alloc(
struct xfs_bmalloca *ap)
{
struct xfs_mount *mp = ap->ip->i_mount;
struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp };
xfs_fileoff_t orig_offset;
xfs_extlen_t orig_length;
int error;
ASSERT(ap->length);
if (ap->minlen != 1) {
ap->blkno = NULLFSBLOCK;
ap->length = 0;
return 0;
}
xfs: don't screw up direct writes when freesp is fragmented xfs_bmap_btalloc is given a range of file offset blocks that must be allocated to some data/attr/cow fork. If the fork has an extent size hint associated with it, the request will be enlarged on both ends to try to satisfy the alignment hint. If free space is fragmentated, sometimes we can allocate some blocks but not enough to fulfill any of the requested range. Since bmapi_allocate always trims the new extent mapping to match the originally requested range, this results in bmapi_write returning zero and no mapping. The consequences of this vary -- buffered writes will simply re-call bmapi_write until it can satisfy at least one block from the original request. Direct IO overwrites notice nmaps == 0 and return -ENOSPC through the dio mechanism out to userspace with the weird result that writes fail even when we have enough space because the ENOSPC return overrides any partial write status. For direct CoW writes the situation was disastrous because nobody notices us returning an invalid zero-length wrong-offset mapping to iomap and the write goes off into space. Therefore, if free space is so fragmented that we managed to allocate some space but not enough to map into even a single block of the original allocation request range, we should break the alignment hint in order to guarantee at least some forward progress for the direct write. If we return a short allocation to iomap_apply it'll call back about the remaining blocks. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2018-01-20 09:47:36 +08:00
orig_offset = ap->offset;
orig_length = ap->length;
args.alloc_minlen_only = 1;
xfs_bmap_compute_alignments(ap, &args);
if (ap->tp->t_firstblock == NULLFSBLOCK) {
/*
* Unlike the longest extent available in an AG, we don't track
* the length of an AG's shortest extent.
* XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and
* hence we can afford to start traversing from the 0th AG since
* we need not be concerned about a drop in performance in
* "debug only" code paths.
*/
ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
} else {
ap->blkno = ap->tp->t_firstblock;
}
args.fsbno = ap->blkno;
args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
args.type = XFS_ALLOCTYPE_FIRST_AG;
xfs: Initialize xfs_alloc_arg->total correctly when allocating minlen extents xfs/538 can cause the following call trace to be printed when executing on a multi-block directory configuration, WARNING: CPU: 1 PID: 2578 at fs/xfs/libxfs/xfs_bmap.c:717 xfs_bmap_extents_to_btree+0x520/0x5d0 Call Trace: ? xfs_buf_rele+0x4f/0x450 xfs_bmap_add_extent_hole_real+0x747/0x960 xfs_bmapi_allocate+0x39a/0x440 xfs_bmapi_write+0x507/0x9e0 xfs_da_grow_inode_int+0x1cd/0x330 ? up+0x12/0x60 xfs_dir2_grow_inode+0x62/0x110 ? xfs_trans_log_inode+0x234/0x2d0 xfs_dir2_sf_to_block+0x103/0x940 ? xfs_dir2_sf_check+0x8c/0x210 ? xfs_da_compname+0x19/0x30 ? xfs_dir2_sf_lookup+0xd0/0x3d0 xfs_dir2_sf_addname+0x10d/0x910 xfs_dir_createname+0x1ad/0x210 xfs_create+0x404/0x620 xfs_generic_create+0x24c/0x320 path_openat+0xda6/0x1030 do_filp_open+0x88/0x130 ? kmem_cache_alloc+0x50/0x210 ? __cond_resched+0x16/0x40 ? kmem_cache_alloc+0x50/0x210 do_sys_openat2+0x97/0x150 __x64_sys_creat+0x49/0x70 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae This occurs because xfs_bmap_exact_minlen_extent_alloc() initializes xfs_alloc_arg->total to xfs_bmalloca->minlen. In the context of xfs_bmap_exact_minlen_extent_alloc(), xfs_bmalloca->minlen has a value of 1 and hence the space allocator could choose an AG which has less than xfs_bmalloca->total number of free blocks available. As the transaction proceeds, one of the future space allocation requests could fail due to non-availability of free blocks in the AG that was originally chosen. This commit fixes the bug by assigning xfs_alloc_arg->total to the value of xfs_bmalloca->total. Fixes: 301519674699 ("xfs: Introduce error injection to allocate only minlen size extents for files") Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
2021-03-26 02:55:10 +08:00
args.minlen = args.maxlen = ap->minlen;
args.total = ap->total;
args.alignment = 1;
args.minalignslop = 0;
args.minleft = ap->minleft;
args.wasdel = ap->wasdel;
args.resv = XFS_AG_RESV_NONE;
args.datatype = ap->datatype;
error = xfs_alloc_vextent(&args);
if (error)
return error;
if (args.fsbno != NULLFSBLOCK) {
xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
orig_length);
} else {
ap->blkno = NULLFSBLOCK;
ap->length = 0;
}
return 0;
}
#else
#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
#endif
STATIC int
xfs_bmap_btalloc(
struct xfs_bmalloca *ap)
{
struct xfs_mount *mp = ap->ip->i_mount;
struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp };
xfs_alloctype_t atype = 0;
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
xfs_agnumber_t ag;
xfs_fileoff_t orig_offset;
xfs_extlen_t orig_length;
xfs_extlen_t blen;
xfs_extlen_t nextminlen = 0;
int nullfb; /* true if ap->firstblock isn't set */
int isaligned;
int tryagain;
int error;
int stripe_align;
ASSERT(ap->length);
orig_offset = ap->offset;
orig_length = ap->length;
stripe_align = xfs_bmap_compute_alignments(ap, &args);
nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
ap->tp->t_firstblock);
if (nullfb) {
if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs: remote attribute blocks aren't really userdata When adding a new remote attribute, we write the attribute to the new extent before the allocation transaction is committed. This means we cannot reuse busy extents as that violates crash consistency semantics. Hence we currently treat remote attribute extent allocation like userdata because it has the same overwrite ordering constraints as userdata. Unfortunately, this also allows the allocator to incorrectly apply extent size hints to the remote attribute extent allocation. This results in interesting failures, such as transaction block reservation overruns and in-memory inode attribute fork corruption. To fix this, we need to separate the busy extent reuse configuration from the userdata configuration. This changes the definition of XFS_BMAPI_METADATA slightly - it now means that allocation is metadata and reuse of busy extents is acceptible due to the metadata ordering semantics of the journal. If this flag is not set, it means the allocation is that has unordered data writeback, and hence busy extent reuse is not allowed. It no longer implies the allocation is for user data, just that the data write will not be strictly ordered. This matches the semantics for both user data and remote attribute block allocation. As such, This patch changes the "userdata" field to a "datatype" field, and adds a "no busy reuse" flag to the field. When we detect an unordered data extent allocation, we immediately set the no reuse flag. We then set the "user data" flags based on the inode fork we are allocating the extent to. Hence we only set userdata flags on data fork allocations now and consider attribute fork remote extents to be an unordered metadata extent. The result is that remote attribute extents now have the expected allocation semantics, and the data fork allocation behaviour is completely unchanged. It should be noted that there may be other ways to fix this (e.g. use ordered metadata buffers for the remote attribute extent data write) but they are more invasive and difficult to validate both from a design and implementation POV. Hence this patch takes the simple, obvious route to fixing the problem... Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 06:21:28 +08:00
xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
}
} else
ap->blkno = ap->tp->t_firstblock;
xfs_bmap_adjacent(ap);
/*
* If allowed, use ap->blkno; otherwise must use firstblock since
* it's in the right allocation group.
*/
if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
;
else
ap->blkno = ap->tp->t_firstblock;
/*
* Normal allocation, done through xfs_alloc_vextent.
*/
tryagain = isaligned = 0;
args.fsbno = ap->blkno;
args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
/* Trim the allocation back to the maximum an AG can fit. */
args.maxlen = min(ap->length, mp->m_ag_max_usable);
blen = 0;
if (nullfb) {
/*
* Search for an allocation group with a single extent large
* enough for the request. If one isn't found, then adjust
* the minimum allocation size to the largest space found.
*/
if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs: remote attribute blocks aren't really userdata When adding a new remote attribute, we write the attribute to the new extent before the allocation transaction is committed. This means we cannot reuse busy extents as that violates crash consistency semantics. Hence we currently treat remote attribute extent allocation like userdata because it has the same overwrite ordering constraints as userdata. Unfortunately, this also allows the allocator to incorrectly apply extent size hints to the remote attribute extent allocation. This results in interesting failures, such as transaction block reservation overruns and in-memory inode attribute fork corruption. To fix this, we need to separate the busy extent reuse configuration from the userdata configuration. This changes the definition of XFS_BMAPI_METADATA slightly - it now means that allocation is metadata and reuse of busy extents is acceptible due to the metadata ordering semantics of the journal. If this flag is not set, it means the allocation is that has unordered data writeback, and hence busy extent reuse is not allowed. It no longer implies the allocation is for user data, just that the data write will not be strictly ordered. This matches the semantics for both user data and remote attribute block allocation. As such, This patch changes the "userdata" field to a "datatype" field, and adds a "no busy reuse" flag to the field. When we detect an unordered data extent allocation, we immediately set the no reuse flag. We then set the "user data" flags based on the inode fork we are allocating the extent to. Hence we only set userdata flags on data fork allocations now and consider attribute fork remote extents to be an unordered metadata extent. The result is that remote attribute extents now have the expected allocation semantics, and the data fork allocation behaviour is completely unchanged. It should be noted that there may be other ways to fix this (e.g. use ordered metadata buffers for the remote attribute extent data write) but they are more invasive and difficult to validate both from a design and implementation POV. Hence this patch takes the simple, obvious route to fixing the problem... Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 06:21:28 +08:00
xfs_inode_is_filestream(ap->ip))
error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
else
error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
if (error)
return error;
} else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
if (xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_FIRST_AG;
else
args.type = XFS_ALLOCTYPE_START_BNO;
args.total = args.minlen = ap->minlen;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.total = ap->total;
args.minlen = ap->minlen;
}
/*
* If we are not low on available data blocks, and the underlying
* logical volume manager is a stripe, and the file offset is zero then
* try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
* is only set if the allocation length is >= the stripe unit and the
* allocation offset is at the end of file.
*/
if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
if (!ap->offset) {
args.alignment = stripe_align;
atype = args.type;
isaligned = 1;
/*
* Adjust minlen to try and preserve alignment if we
* can't guarantee an aligned maxlen extent.
*/
if (blen > args.alignment &&
blen <= args.maxlen + args.alignment)
args.minlen = blen - args.alignment;
args.minalignslop = 0;
} else {
/*
* First try an exact bno allocation.
* If it fails then do a near or start bno
* allocation with alignment turned on.
*/
atype = args.type;
tryagain = 1;
args.type = XFS_ALLOCTYPE_THIS_BNO;
args.alignment = 1;
/*
* Compute the minlen+alignment for the
* next case. Set slop so that the value
* of minlen+alignment+slop doesn't go up
* between the calls.
*/
if (blen > stripe_align && blen <= args.maxlen)
nextminlen = blen - stripe_align;
else
nextminlen = args.minlen;
if (nextminlen + stripe_align > args.minlen + 1)
args.minalignslop =
nextminlen + stripe_align -
args.minlen - 1;
else
args.minalignslop = 0;
}
} else {
args.alignment = 1;
args.minalignslop = 0;
}
args.minleft = ap->minleft;
args.wasdel = ap->wasdel;
xfs: set up per-AG free space reservations One unfortunate quirk of the reference count and reverse mapping btrees -- they can expand in size when blocks are written to *other* allocation groups if, say, one large extent becomes a lot of tiny extents. Since we don't want to start throwing errors in the middle of CoWing, we need to reserve some blocks to handle future expansion. The transaction block reservation counters aren't sufficient here because we have to have a reserve of blocks in every AG, not just somewhere in the filesystem. Therefore, create two per-AG block reservation pools. One feeds the AGFL so that rmapbt expansion always succeeds, and the other feeds all other metadata so that refcountbt expansion never fails. Use the count of how many reserved blocks we need to have on hand to create a virtual reservation in the AG. Through selective clamping of the maximum length of allocation requests and of the length of the longest free extent, we can make it look like there's less free space in the AG unless the reservation owner is asking for blocks. In other words, play some accounting tricks in-core to make sure that we always have blocks available. On the plus side, there's nothing to clean up if we crash, which is contrast to the strategy that the rough draft used (actually removing extents from the freespace btrees). Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-19 08:30:52 +08:00
args.resv = XFS_AG_RESV_NONE;
xfs: remote attribute blocks aren't really userdata When adding a new remote attribute, we write the attribute to the new extent before the allocation transaction is committed. This means we cannot reuse busy extents as that violates crash consistency semantics. Hence we currently treat remote attribute extent allocation like userdata because it has the same overwrite ordering constraints as userdata. Unfortunately, this also allows the allocator to incorrectly apply extent size hints to the remote attribute extent allocation. This results in interesting failures, such as transaction block reservation overruns and in-memory inode attribute fork corruption. To fix this, we need to separate the busy extent reuse configuration from the userdata configuration. This changes the definition of XFS_BMAPI_METADATA slightly - it now means that allocation is metadata and reuse of busy extents is acceptible due to the metadata ordering semantics of the journal. If this flag is not set, it means the allocation is that has unordered data writeback, and hence busy extent reuse is not allowed. It no longer implies the allocation is for user data, just that the data write will not be strictly ordered. This matches the semantics for both user data and remote attribute block allocation. As such, This patch changes the "userdata" field to a "datatype" field, and adds a "no busy reuse" flag to the field. When we detect an unordered data extent allocation, we immediately set the no reuse flag. We then set the "user data" flags based on the inode fork we are allocating the extent to. Hence we only set userdata flags on data fork allocations now and consider attribute fork remote extents to be an unordered metadata extent. The result is that remote attribute extents now have the expected allocation semantics, and the data fork allocation behaviour is completely unchanged. It should be noted that there may be other ways to fix this (e.g. use ordered metadata buffers for the remote attribute extent data write) but they are more invasive and difficult to validate both from a design and implementation POV. Hence this patch takes the simple, obvious route to fixing the problem... Reported-and-tested-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-09-26 06:21:28 +08:00
args.datatype = ap->datatype;
error = xfs_alloc_vextent(&args);
if (error)
return error;
if (tryagain && args.fsbno == NULLFSBLOCK) {
/*
* Exact allocation failed. Now try with alignment
* turned on.
*/
args.type = atype;
args.fsbno = ap->blkno;
args.alignment = stripe_align;
args.minlen = nextminlen;
args.minalignslop = 0;
isaligned = 1;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
if (isaligned && args.fsbno == NULLFSBLOCK) {
/*
* allocation failed, so turn off alignment and
* try again.
*/
args.type = atype;
args.fsbno = ap->blkno;
args.alignment = 0;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
if (args.fsbno == NULLFSBLOCK && nullfb &&
args.minlen > ap->minlen) {
args.minlen = ap->minlen;
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = ap->blkno;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
if (args.fsbno == NULLFSBLOCK && nullfb) {
args.fsbno = 0;
args.type = XFS_ALLOCTYPE_FIRST_AG;
args.total = ap->minlen;
if ((error = xfs_alloc_vextent(&args)))
return error;
ap->tp->t_flags |= XFS_TRANS_LOWMODE;
}
if (args.fsbno != NULLFSBLOCK) {
xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
orig_length);
} else {
ap->blkno = NULLFSBLOCK;
ap->length = 0;
}
return 0;
}
/* Trim extent to fit a logical block range. */
void
xfs_trim_extent(
struct xfs_bmbt_irec *irec,
xfs_fileoff_t bno,
xfs_filblks_t len)
{
xfs_fileoff_t distance;
xfs_fileoff_t end = bno + len;
if (irec->br_startoff + irec->br_blockcount <= bno ||
irec->br_startoff >= end) {
irec->br_blockcount = 0;
return;
}
if (irec->br_startoff < bno) {
distance = bno - irec->br_startoff;
if (isnullstartblock(irec->br_startblock))
irec->br_startblock = DELAYSTARTBLOCK;
if (irec->br_startblock != DELAYSTARTBLOCK &&
irec->br_startblock != HOLESTARTBLOCK)
irec->br_startblock += distance;
irec->br_startoff += distance;
irec->br_blockcount -= distance;
}
if (end < irec->br_startoff + irec->br_blockcount) {
distance = irec->br_startoff + irec->br_blockcount - end;
irec->br_blockcount -= distance;
}
}
/*
* Trim the returned map to the required bounds
*/
STATIC void
xfs_bmapi_trim_map(
struct xfs_bmbt_irec *mval,
struct xfs_bmbt_irec *got,
xfs_fileoff_t *bno,
xfs_filblks_t len,
xfs_fileoff_t obno,
xfs_fileoff_t end,
int n,
int flags)
{
if ((flags & XFS_BMAPI_ENTIRE) ||
got->br_startoff + got->br_blockcount <= obno) {
*mval = *got;
if (isnullstartblock(got->br_startblock))
mval->br_startblock = DELAYSTARTBLOCK;
return;
}
if (obno > *bno)
*bno = obno;
ASSERT((*bno >= obno) || (n == 0));
ASSERT(*bno < end);
mval->br_startoff = *bno;
if (isnullstartblock(got->br_startblock))
mval->br_startblock = DELAYSTARTBLOCK;
else
mval->br_startblock = got->br_startblock +
(*bno - got->br_startoff);
/*
* Return the minimum of what we got and what we asked for for
* the length. We can use the len variable here because it is
* modified below and we could have been there before coming
* here if the first part of the allocation didn't overlap what
* was asked for.
*/
mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
got->br_blockcount - (*bno - got->br_startoff));
mval->br_state = got->br_state;
ASSERT(mval->br_blockcount <= len);
return;
}
/*
* Update and validate the extent map to return
*/
STATIC void
xfs_bmapi_update_map(
struct xfs_bmbt_irec **map,
xfs_fileoff_t *bno,
xfs_filblks_t *len,
xfs_fileoff_t obno,
xfs_fileoff_t end,
int *n,
int flags)
{
xfs_bmbt_irec_t *mval = *map;
ASSERT((flags & XFS_BMAPI_ENTIRE) ||
((mval->br_startoff + mval->br_blockcount) <= end));
ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
(mval->br_startoff < obno));
*bno = mval->br_startoff + mval->br_blockcount;
*len = end - *bno;
if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
/* update previous map with new information */
ASSERT(mval->br_startblock == mval[-1].br_startblock);
ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
ASSERT(mval->br_state == mval[-1].br_state);
mval[-1].br_blockcount = mval->br_blockcount;
mval[-1].br_state = mval->br_state;
} else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
mval[-1].br_startblock != DELAYSTARTBLOCK &&
mval[-1].br_startblock != HOLESTARTBLOCK &&
mval->br_startblock == mval[-1].br_startblock +
mval[-1].br_blockcount &&
mval[-1].br_state == mval->br_state) {
ASSERT(mval->br_startoff ==
mval[-1].br_startoff + mval[-1].br_blockcount);
mval[-1].br_blockcount += mval->br_blockcount;
} else if (*n > 0 &&
mval->br_startblock == DELAYSTARTBLOCK &&
mval[-1].br_startblock == DELAYSTARTBLOCK &&
mval->br_startoff ==
mval[-1].br_startoff + mval[-1].br_blockcount) {
mval[-1].br_blockcount += mval->br_blockcount;
mval[-1].br_state = mval->br_state;
} else if (!((*n == 0) &&
((mval->br_startoff + mval->br_blockcount) <=
obno))) {
mval++;
(*n)++;
}
*map = mval;
}
/*
* Map file blocks to filesystem blocks without allocation.
*/
int
xfs_bmapi_read(
struct xfs_inode *ip,
xfs_fileoff_t bno,
xfs_filblks_t len,
struct xfs_bmbt_irec *mval,
int *nmap,
int flags)
{
struct xfs_mount *mp = ip->i_mount;
int whichfork = xfs_bmapi_whichfork(flags);
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec got;
xfs_fileoff_t obno;
xfs_fileoff_t end;
struct xfs_iext_cursor icur;
int error;
bool eof = false;
int n = 0;
ASSERT(*nmap >= 1);
ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
if (WARN_ON_ONCE(!ifp))
return -EFSCORRUPTED;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
return -EFSCORRUPTED;
if (xfs_is_shutdown(mp))
return -EIO;
XFS_STATS_INC(mp, xs_blk_mapr);
error = xfs_iread_extents(NULL, ip, whichfork);
if (error)
return error;
if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
eof = true;
end = bno + len;
obno = bno;
while (bno < end && n < *nmap) {
/* Reading past eof, act as though there's a hole up to end. */
if (eof)
got.br_startoff = end;
if (got.br_startoff > bno) {
/* Reading in a hole. */
mval->br_startoff = bno;
mval->br_startblock = HOLESTARTBLOCK;
mval->br_blockcount =
XFS_FILBLKS_MIN(len, got.br_startoff - bno);
mval->br_state = XFS_EXT_NORM;
bno += mval->br_blockcount;
len -= mval->br_blockcount;
mval++;
n++;
continue;
}
/* set up the extent map to return. */
xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
/* If we're done, stop now. */
if (bno >= end || n >= *nmap)
break;
/* Else go on to the next record. */
if (!xfs_iext_next_extent(ifp, &icur, &got))
eof = true;
}
*nmap = n;
return 0;
}
xfs: use iomap new flag for newly allocated delalloc blocks Commit fa7f138 ("xfs: clear delalloc and cache on buffered write failure") fixed one regression in the iomap error handling code and exposed another. The fundamental problem is that if a buffered write is a rewrite of preexisting delalloc blocks and the write fails, the failure handling code can punch out preexisting blocks with valid file data. This was reproduced directly by sub-block writes in the LTP kernel/syscalls/write/write03 test. A first 100 byte write allocates a single block in a file. A subsequent 100 byte write fails and punches out the block, including the data successfully written by the previous write. To address this problem, update the ->iomap_begin() handler to distinguish newly allocated delalloc blocks from preexisting delalloc blocks via the IOMAP_F_NEW flag. Use this flag in the ->iomap_end() handler to decide when a failed or short write should punch out delalloc blocks. This introduces the subtle requirement that ->iomap_begin() should never combine newly allocated delalloc blocks with existing blocks in the resulting iomap descriptor. This can occur when a new delalloc reservation merges with a neighboring extent that is part of the current write, for example. Therefore, drop the post-allocation extent lookup from xfs_bmapi_reserve_delalloc() and just return the record inserted into the fork. This ensures only new blocks are returned and thus that preexisting delalloc blocks are always handled as "found" blocks and not punched out on a failed rewrite. Reported-by: Xiong Zhou <xzhou@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2017-03-09 01:58:08 +08:00
/*
* Add a delayed allocation extent to an inode. Blocks are reserved from the
* global pool and the extent inserted into the inode in-core extent tree.
*
* On entry, got refers to the first extent beyond the offset of the extent to
* allocate or eof is specified if no such extent exists. On return, got refers
* to the extent record that was inserted to the inode fork.
*
* Note that the allocated extent may have been merged with contiguous extents
* during insertion into the inode fork. Thus, got does not reflect the current
* state of the inode fork on return. If necessary, the caller can use lastx to
* look up the updated record in the inode fork.
*/
int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
int whichfork,
xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-11-28 11:57:42 +08:00
xfs_fileoff_t off,
xfs_filblks_t len,
xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-11-28 11:57:42 +08:00
xfs_filblks_t prealloc,
struct xfs_bmbt_irec *got,
struct xfs_iext_cursor *icur,
int eof)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
xfs_extlen_t alen;
xfs_extlen_t indlen;
int error;
xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-11-28 11:57:42 +08:00
xfs_fileoff_t aoff = off;
xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-11-28 11:57:42 +08:00
/*
* Cap the alloc length. Keep track of prealloc so we know whether to
* tag the inode before we return.
*/
alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
if (!eof)
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-11-28 11:57:42 +08:00
if (prealloc && alen >= len)
prealloc = alen - len;
/* Figure out the extent size, adjust alen */
if (whichfork == XFS_COW_FORK) {
struct xfs_bmbt_irec prev;
xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
prev.br_startoff = NULLFILEOFF;
error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
1, 0, &aoff, &alen);
ASSERT(!error);
}
/*
* Make a transaction-less quota reservation for delayed allocation
* blocks. This number gets adjusted later. We return if we haven't
* allocated blocks already inside this loop.
*/
error = xfs_quota_reserve_blkres(ip, alen);
if (error)
return error;
/*
* Split changing sb for alen and indlen since they could be coming
* from different places.
*/
indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
ASSERT(indlen > 0);
error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
if (error)
goto out_unreserve_quota;
error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
if (error)
goto out_unreserve_blocks;
ip->i_delayed_blks += alen;
xfs_mod_delalloc(ip->i_mount, alen + indlen);
got->br_startoff = aoff;
got->br_startblock = nullstartblock(indlen);
got->br_blockcount = alen;
got->br_state = XFS_EXT_NORM;
xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
xfs: track preallocation separately in xfs_bmapi_reserve_delalloc() Speculative preallocation is currently processed entirely by the callers of xfs_bmapi_reserve_delalloc(). The caller determines how much preallocation to include, adjusts the extent length and passes down the resulting request. While this works fine for post-eof speculative preallocation, it is not as reliable for COW fork preallocation. COW fork preallocation is implemented via the cowextszhint, which aligns the start offset as well as the length of the extent. Further, it is difficult for the caller to accurately identify when preallocation occurs because the returned extent could have been merged with neighboring extents in the fork. To simplify this situation and facilitate further COW fork preallocation enhancements, update xfs_bmapi_reserve_delalloc() to take a separate preallocation parameter to incorporate into the allocation request. The preallocation blocks value is tacked onto the end of the request and adjusted to accommodate neighboring extents and extent size limits. Since xfs_bmapi_reserve_delalloc() now knows precisely how much preallocation was included in the allocation, it can also tag the inodes appropriately to support preallocation reclaim. Note that xfs_bmapi_reserve_delalloc() callers are not yet updated to use the preallocation mechanism. This patch should not change behavior outside of correctly tagging reflink inodes when start offset preallocation occurs (which the caller does not handle correctly). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-11-28 11:57:42 +08:00
/*
* Tag the inode if blocks were preallocated. Note that COW fork
* preallocation can occur at the start or end of the extent, even when
* prealloc == 0, so we must also check the aligned offset and length.
*/
if (whichfork == XFS_DATA_FORK && prealloc)
xfs_inode_set_eofblocks_tag(ip);
if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
xfs_inode_set_cowblocks_tag(ip);
return 0;
out_unreserve_blocks:
xfs_mod_fdblocks(mp, alen, false);
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_quota_unreserve_blkres(ip, alen);
return error;
}
static int
xfs_bmap_alloc_userdata(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
int whichfork = xfs_bmapi_whichfork(bma->flags);
int error;
/*
* Set the data type being allocated. For the data fork, the first data
* in the file is treated differently to all other allocations. For the
* attribute fork, we only need to ensure the allocated range is not on
* the busy list.
*/
bma->datatype = XFS_ALLOC_NOBUSY;
if (whichfork == XFS_DATA_FORK) {
bma->datatype |= XFS_ALLOC_USERDATA;
if (bma->offset == 0)
bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
if (mp->m_dalign && bma->length >= mp->m_dalign) {
error = xfs_bmap_isaeof(bma, whichfork);
if (error)
return error;
}
if (XFS_IS_REALTIME_INODE(bma->ip))
return xfs_bmap_rtalloc(bma);
}
if (unlikely(XFS_TEST_ERROR(false, mp,
XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
return xfs_bmap_exact_minlen_extent_alloc(bma);
return xfs_bmap_btalloc(bma);
}
xfs: refine the allocation stack switch The allocation stack switch at xfs_bmapi_allocate() has served it's purpose, but is no longer a sufficient solution to the stack usage problem we have in the XFS allocation path. Whilst the kernel stack size is now 16k, that is not a valid reason for undoing all our "keep stack usage down" modifications. What it does allow us to do is have the freedom to refine and perfect the modifications knowing that if we get it wrong it won't blow up in our faces - we have a safety net now. This is important because we still have the issue of older kernels having smaller stacks and that they are still supported and are demonstrating a wide range of different stack overflows. Red Hat has several open bugs for allocation based stack overflows from directory modifications and direct IO block allocation and these problems still need to be solved. If we can solve them upstream, then distro's won't need to bake their own unique solutions. To that end, I've observed that every allocation based stack overflow report has had a specific characteristic - it has happened during or directly after a bmap btree block split. That event requires a new block to be allocated to the tree, and so we effectively stack one allocation stack on top of another, and that's when we get into trouble. A further observation is that bmap btree block splits are much rarer than writeback allocation - over a range of different workloads I've observed the ratio of bmap btree inserts to splits ranges from 100:1 (xfstests run) to 10000:1 (local VM image server with sparse files that range in the hundreds of thousands to millions of extents). Either way, bmap btree split events are much, much rarer than allocation events. Finally, we have to move the kswapd state to the allocation workqueue work when allocation is done on behalf of kswapd. This is proving to cause significant perturbation in performance under memory pressure and appears to be generating allocation deadlock warnings under some workloads, so avoiding the use of a workqueue for the majority of kswapd writeback allocation will minimise the impact of such behaviour. Hence it makes sense to move the stack switch to xfs_btree_split() and only do it for bmap btree splits. Stack switches during allocation will be much rarer, so there won't be significant performacne overhead caused by switching stacks. The worse case stack from all allocation paths will be split, not just writeback. And the majority of memory allocations will be done in the correct context (e.g. kswapd) without causing additional latency, and so we simplify the memory reclaim interactions between processes, workqueues and kswapd. The worst stack I've been able to generate with this patch in place is 5600 bytes deep. It's very revealing because we exit XFS at: 37) 1768 64 kmem_cache_alloc+0x13b/0x170 about 1800 bytes of stack consumed, and the remaining 3800 bytes (and 36 functions) is memory reclaim, swap and the IO stack. And this occurs in the inode allocation from an open(O_CREAT) syscall, not writeback. The amount of stack being used is much less than I've previously be able to generate - fs_mark testing has been able to generate stack usage of around 7k without too much trouble; with this patch it's only just getting to 5.5k. This is primarily because the metadata allocation paths (e.g. directory blocks) are no longer causing double splits on the same stack, and hence now stack tracing is showing swapping being the worst stack consumer rather than XFS. Performance of fs_mark inode create workloads is unchanged. Performance of fs_mark async fsync workloads is consistently good with context switches reduced by around 150,000/s (30%). Performance of dbench, streaming IO and postmark is unchanged. Allocation deadlock warnings have not been seen on the workloads that generated them since adding this patch. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-07-15 05:08:24 +08:00
static int
xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
int whichfork = xfs_bmapi_whichfork(bma->flags);
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
int tmp_logflags = 0;
int error;
ASSERT(bma->length > 0);
/*
* For the wasdelay case, we could also just allocate the stuff asked
* for in this bmap call but that wouldn't be as good.
*/
if (bma->wasdel) {
bma->length = (xfs_extlen_t)bma->got.br_blockcount;
bma->offset = bma->got.br_startoff;
if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
bma->prev.br_startoff = NULLFILEOFF;
} else {
bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
if (!bma->eof)
bma->length = XFS_FILBLKS_MIN(bma->length,
bma->got.br_startoff - bma->offset);
}
if (bma->flags & XFS_BMAPI_CONTIG)
bma->minlen = bma->length;
else
bma->minlen = 1;
if (bma->flags & XFS_BMAPI_METADATA) {
if (unlikely(XFS_TEST_ERROR(false, mp,
XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
error = xfs_bmap_exact_minlen_extent_alloc(bma);
else
error = xfs_bmap_btalloc(bma);
} else {
error = xfs_bmap_alloc_userdata(bma);
}
if (error || bma->blkno == NULLFSBLOCK)
return error;
if (bma->flags & XFS_BMAPI_ZERO) {
error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
if (error)
return error;
}
if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur)
bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
/*
* Bump the number of extents we've allocated
* in this call.
*/
bma->nallocs++;
if (bma->cur)
bma->cur->bc_ino.flags =
bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
bma->got.br_startoff = bma->offset;
bma->got.br_startblock = bma->blkno;
bma->got.br_blockcount = bma->length;
bma->got.br_state = XFS_EXT_NORM;
if (bma->flags & XFS_BMAPI_PREALLOC)
bma->got.br_state = XFS_EXT_UNWRITTEN;
if (bma->wasdel)
error = xfs_bmap_add_extent_delay_real(bma, whichfork);
else
error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
whichfork, &bma->icur, &bma->cur, &bma->got,
&bma->logflags, bma->flags);
bma->logflags |= tmp_logflags;
if (error)
return error;
/*
* Update our extent pointer, given that xfs_bmap_add_extent_delay_real
* or xfs_bmap_add_extent_hole_real might have merged it into one of
* the neighbouring ones.
*/
xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
ASSERT(bma->got.br_startoff <= bma->offset);
ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
bma->offset + bma->length);
ASSERT(bma->got.br_state == XFS_EXT_NORM ||
bma->got.br_state == XFS_EXT_UNWRITTEN);
return 0;
}
STATIC int
xfs_bmapi_convert_unwritten(
struct xfs_bmalloca *bma,
struct xfs_bmbt_irec *mval,
xfs_filblks_t len,
int flags)
{
int whichfork = xfs_bmapi_whichfork(flags);
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
int tmp_logflags = 0;
int error;
/* check if we need to do unwritten->real conversion */
if (mval->br_state == XFS_EXT_UNWRITTEN &&
(flags & XFS_BMAPI_PREALLOC))
return 0;
/* check if we need to do real->unwritten conversion */
if (mval->br_state == XFS_EXT_NORM &&
(flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
return 0;
/*
* Modify (by adding) the state flag, if writing.
*/
ASSERT(mval->br_blockcount <= len);
if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) {
bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
bma->ip, whichfork);
}
mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
/*
* Before insertion into the bmbt, zero the range being converted
* if required.
*/
if (flags & XFS_BMAPI_ZERO) {
error = xfs_zero_extent(bma->ip, mval->br_startblock,
mval->br_blockcount);
if (error)
return error;
}
error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
&bma->icur, &bma->cur, mval, &tmp_logflags);
xfs: always log the inode on unwritten extent conversion The fsync() requirements for crash consistency on XFS are to flush file data and force any in-core inode updates to the log. We currently check whether the inode is pinned to identify whether the log needs to be forced, since a non-zero pin count generally represents an inode that has transactions awaiting a flush to the on-disk log. This is not sufficient in all cases, however. Reports of xfstests test generic/311 failures on ppc64/s390x hosts have identified failures to fsync outstanding inode modifications due to the inode not being pinned at the time of the fsync. This occurs because certain bmap updates can complete by logging bmapbt buffers but without ever dirtying (and thus pinning) the core inode. The following is a specific incarnation of this problem: $ mount $dev /mnt -o noatime,nobarrier $ for i in $(seq 0 2 31); do \ xfs_io -f -c "falloc $((i * 32768)) 32k" -c fsync /mnt/file; \ done $ xfs_io -c "pwrite -S 0 80k 16k" -c fsync -c "pwrite 76k 4k" -c fsync /mnt/file; \ hexdump /mnt/file; \ ./xfstests-dev/src/godown /mnt ... 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 0013000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0014000 0000 0000 0000 0000 0000 0000 0000 0000 * 00f8000 $ umount /mnt; mount ... $ hexdump /mnt/file 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 00f8000 In short, the unwritten extent conversion for the last write is lost despite the fact that an fsync executed before the filesystem was shutdown. Note that this is impossible to reproduce on v5 supers due to unconditional time callbacks for di_changecount and highly difficult to reproduce on CONFIG_HZ=1000 kernels due to those same callbacks frequently updating cmtime prior to the bmap update. CONFIG_HZ=100 reduces timer granularity enough to increase the odds that time updates are skipped and allows this to reproduce within a handful of attempts. To deal with this problem, unconditionally log the core in the unwritten extent conversion path. Fix up logflags after the extent conversion to keep the extent update code consistent with the other extent update helpers. This fixup is not necessary for the other (hole, delay) extent helpers because they execute in the block allocation codepath, which already logs the inode for other reasons (e.g., for di_nblocks). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 05:15:23 +08:00
/*
* Log the inode core unconditionally in the unwritten extent conversion
* path because the conversion might not have done so (e.g., if the
* extent count hasn't changed). We need to make sure the inode is dirty
* in the transaction for the sake of fsync(), even if nothing has
* changed, because fsync() will not force the log for this transaction
* unless it sees the inode pinned.
*
* Note: If we're only converting cow fork extents, there aren't
* any on-disk updates to make, so we don't need to log anything.
xfs: always log the inode on unwritten extent conversion The fsync() requirements for crash consistency on XFS are to flush file data and force any in-core inode updates to the log. We currently check whether the inode is pinned to identify whether the log needs to be forced, since a non-zero pin count generally represents an inode that has transactions awaiting a flush to the on-disk log. This is not sufficient in all cases, however. Reports of xfstests test generic/311 failures on ppc64/s390x hosts have identified failures to fsync outstanding inode modifications due to the inode not being pinned at the time of the fsync. This occurs because certain bmap updates can complete by logging bmapbt buffers but without ever dirtying (and thus pinning) the core inode. The following is a specific incarnation of this problem: $ mount $dev /mnt -o noatime,nobarrier $ for i in $(seq 0 2 31); do \ xfs_io -f -c "falloc $((i * 32768)) 32k" -c fsync /mnt/file; \ done $ xfs_io -c "pwrite -S 0 80k 16k" -c fsync -c "pwrite 76k 4k" -c fsync /mnt/file; \ hexdump /mnt/file; \ ./xfstests-dev/src/godown /mnt ... 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 0013000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0014000 0000 0000 0000 0000 0000 0000 0000 0000 * 00f8000 $ umount /mnt; mount ... $ hexdump /mnt/file 0000000 0000 0000 0000 0000 0000 0000 0000 0000 * 00f8000 In short, the unwritten extent conversion for the last write is lost despite the fact that an fsync executed before the filesystem was shutdown. Note that this is impossible to reproduce on v5 supers due to unconditional time callbacks for di_changecount and highly difficult to reproduce on CONFIG_HZ=1000 kernels due to those same callbacks frequently updating cmtime prior to the bmap update. CONFIG_HZ=100 reduces timer granularity enough to increase the odds that time updates are skipped and allows this to reproduce within a handful of attempts. To deal with this problem, unconditionally log the core in the unwritten extent conversion path. Fix up logflags after the extent conversion to keep the extent update code consistent with the other extent update helpers. This fixup is not necessary for the other (hole, delay) extent helpers because they execute in the block allocation codepath, which already logs the inode for other reasons (e.g., for di_nblocks). Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 05:15:23 +08:00
*/
if (whichfork != XFS_COW_FORK)
bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
if (error)
return error;
/*
* Update our extent pointer, given that
* xfs_bmap_add_extent_unwritten_real might have merged it into one
* of the neighbouring ones.
*/
xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
/*
* We may have combined previously unwritten space with written space,
* so generate another request.
*/
if (mval->br_blockcount < len)
return -EAGAIN;
return 0;
}
static inline xfs_extlen_t
xfs_bmapi_minleft(
struct xfs_trans *tp,
struct xfs_inode *ip,
int fork)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, fork);
if (tp && tp->t_firstblock != NULLFSBLOCK)
return 0;
if (ifp->if_format != XFS_DINODE_FMT_BTREE)
return 1;
return be16_to_cpu(ifp->if_broot->bb_level) + 1;
}
/*
* Log whatever the flags say, even if error. Otherwise we might miss detecting
* a case where the data is changed, there's an error, and it's not logged so we
* don't shutdown when we should. Don't bother logging extents/btree changes if
* we converted to the other format.
*/
static void
xfs_bmapi_finish(
struct xfs_bmalloca *bma,
int whichfork,
int error)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
if ((bma->logflags & xfs_ilog_fext(whichfork)) &&
ifp->if_format != XFS_DINODE_FMT_EXTENTS)
bma->logflags &= ~xfs_ilog_fext(whichfork);
else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) &&
ifp->if_format != XFS_DINODE_FMT_BTREE)
bma->logflags &= ~xfs_ilog_fbroot(whichfork);
if (bma->logflags)
xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags);
if (bma->cur)
xfs_btree_del_cursor(bma->cur, error);
}
/*
* Map file blocks to filesystem blocks, and allocate blocks or convert the
* extent state if necessary. Details behaviour is controlled by the flags
* parameter. Only allocates blocks from a single allocation group, to avoid
* locking problems.
*/
int
xfs_bmapi_write(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t bno, /* starting file offs. mapped */
xfs_filblks_t len, /* length to map in file */
int flags, /* XFS_BMAPI_... */
xfs_extlen_t total, /* total blocks needed */
struct xfs_bmbt_irec *mval, /* output: map values */
int *nmap) /* i/o: mval size/count */
{
struct xfs_bmalloca bma = {
.tp = tp,
.ip = ip,
.total = total,
};
struct xfs_mount *mp = ip->i_mount;
int whichfork = xfs_bmapi_whichfork(flags);
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
xfs_fileoff_t end; /* end of mapped file region */
bool eof = false; /* after the end of extents */
int error; /* error return */
int n; /* current extent index */
xfs_fileoff_t obno; /* old block number (offset) */
#ifdef DEBUG
xfs_fileoff_t orig_bno; /* original block number value */
int orig_flags; /* original flags arg value */
xfs_filblks_t orig_len; /* original value of len arg */
struct xfs_bmbt_irec *orig_mval; /* original value of mval */
int orig_nmap; /* original value of *nmap */
orig_bno = bno;
orig_len = len;
orig_flags = flags;
orig_mval = mval;
orig_nmap = *nmap;
#endif
ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(tp != NULL);
ASSERT(len > 0);
ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!(flags & XFS_BMAPI_REMAP));
/* zeroing is for currently only for data extents, not metadata */
ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
(XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
/*
* we can allocate unwritten extents or pre-zero allocated blocks,
* but it makes no sense to do both at once. This would result in
* zeroing the unwritten extent twice, but it still being an
* unwritten extent....
*/
ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
if (xfs_is_shutdown(mp))
return -EIO;
XFS_STATS_INC(mp, xs_blk_mapw);
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
goto error0;
if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
eof = true;
if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
bma.prev.br_startoff = NULLFILEOFF;
bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
n = 0;
end = bno + len;
obno = bno;
while (bno < end && n < *nmap) {
bool need_alloc = false, wasdelay = false;
/* in hole or beyond EOF? */
if (eof || bma.got.br_startoff > bno) {
/*
* CoW fork conversions should /never/ hit EOF or
* holes. There should always be something for us
* to work on.
*/
ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
(flags & XFS_BMAPI_COWFORK)));
need_alloc = true;
} else if (isnullstartblock(bma.got.br_startblock)) {
wasdelay = true;
}
/*
* First, deal with the hole before the allocated space
* that we found, if any.
*/
if (need_alloc || wasdelay) {
bma.eof = eof;
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
bma.wasdel = wasdelay;
bma.offset = bno;
bma.flags = flags;
/*
* There's a 32/64 bit type mismatch between the
* allocation length request (which can be 64 bits in
* length) and the bma length request, which is
* xfs_extlen_t and therefore 32 bits. Hence we have to
* check for 32-bit overflows and handle them here.
*/
if (len > (xfs_filblks_t)MAXEXTLEN)
bma.length = MAXEXTLEN;
else
bma.length = len;
ASSERT(len > 0);
ASSERT(bma.length > 0);
error = xfs_bmapi_allocate(&bma);
if (error)
goto error0;
if (bma.blkno == NULLFSBLOCK)
break;
/*
* If this is a CoW allocation, record the data in
* the refcount btree for orphan recovery.
*/
if (whichfork == XFS_COW_FORK)
xfs_refcount_alloc_cow_extent(tp, bma.blkno,
bma.length);
}
/* Deal with the allocated space we found. */
xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
end, n, flags);
/* Execute unwritten extent conversion if necessary */
error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
if (error == -EAGAIN)
continue;
if (error)
goto error0;
/* update the extent map to return */
xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
/*
* If we're done, stop now. Stop when we've allocated
* XFS_BMAP_MAX_NMAP extents no matter what. Otherwise
* the transaction may get too big.
*/
if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
break;
/* Else go on to the next record. */
bma.prev = bma.got;
if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
eof = true;
}
*nmap = n;
error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
whichfork);
if (error)
goto error0;
ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE ||
ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
xfs_bmapi_finish(&bma, whichfork, 0);
xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
orig_nmap, *nmap);
return 0;
error0:
xfs_bmapi_finish(&bma, whichfork, error);
return error;
}
/*
* Convert an existing delalloc extent to real blocks based on file offset. This
* attempts to allocate the entire delalloc extent and may require multiple
* invocations to allocate the target offset if a large enough physical extent
* is not available.
*/
int
xfs_bmapi_convert_delalloc(
struct xfs_inode *ip,
int whichfork,
xfs_off_t offset,
struct iomap *iomap,
unsigned int *seq)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL };
uint16_t flags = 0;
struct xfs_trans *tp;
int error;
if (whichfork == XFS_COW_FORK)
flags |= IOMAP_F_SHARED;
/*
* Space for the extent and indirect blocks was reserved when the
* delalloc extent was created so there's no need to do so here.
*/
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
XFS_TRANS_RESERVE, &tp);
if (error)
return error;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_iext_count_may_overflow(ip, whichfork,
XFS_IEXT_ADD_NOSPLIT_CNT);
if (error)
goto out_trans_cancel;
xfs_trans_ijoin(tp, ip, 0);
if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) ||
bma.got.br_startoff > offset_fsb) {
/*
* No extent found in the range we are trying to convert. This
* should only happen for the COW fork, where another thread
* might have moved the extent to the data fork in the meantime.
*/
WARN_ON_ONCE(whichfork != XFS_COW_FORK);
error = -EAGAIN;
goto out_trans_cancel;
}
/*
* If we find a real extent here we raced with another thread converting
* the extent. Just return the real extent at this offset.
*/
if (!isnullstartblock(bma.got.br_startblock)) {
xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
*seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel;
}
bma.tp = tp;
bma.ip = ip;
bma.wasdel = true;
bma.offset = bma.got.br_startoff;
bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
/*
* When we're converting the delalloc reservations backing dirty pages
* in the page cache, we must be careful about how we create the new
* extents:
*
* New CoW fork extents are created unwritten, turned into real extents
* when we're about to write the data to disk, and mapped into the data
* fork after the write finishes. End of story.
*
* New data fork extents must be mapped in as unwritten and converted
* to real extents after the write succeeds to avoid exposing stale
* disk contents if we crash.
*/
bma.flags = XFS_BMAPI_PREALLOC;
if (whichfork == XFS_COW_FORK)
bma.flags |= XFS_BMAPI_COWFORK;
if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
bma.prev.br_startoff = NULLFILEOFF;
error = xfs_bmapi_allocate(&bma);
if (error)
goto out_finish;
error = -ENOSPC;
if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
goto out_finish;
error = -EFSCORRUPTED;
if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
goto out_finish;
XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock));
xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
*seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK)
xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
whichfork);
if (error)
goto out_finish;
xfs_bmapi_finish(&bma, whichfork, 0);
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
out_finish:
xfs_bmapi_finish(&bma, whichfork, error);
out_trans_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
int
xfs_bmapi_remap(
struct xfs_trans *tp,
struct xfs_inode *ip,
xfs_fileoff_t bno,
xfs_filblks_t len,
xfs_fsblock_t startblock,
int flags)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp;
struct xfs_btree_cur *cur = NULL;
struct xfs_bmbt_irec got;
struct xfs_iext_cursor icur;
int whichfork = xfs_bmapi_whichfork(flags);
int logflags = 0, error;
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(len > 0);
ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
XFS_BMAPI_NORMAP)));
ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
if (xfs_is_shutdown(mp))
return -EIO;
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
/* make sure we only reflink into a hole. */
ASSERT(got.br_startoff > bno);
ASSERT(got.br_startoff - bno >= len);
}
ip->i_nblocks += len;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_ino.flags = 0;
}
got.br_startoff = bno;
got.br_startblock = startblock;
got.br_blockcount = len;
if (flags & XFS_BMAPI_PREALLOC)
got.br_state = XFS_EXT_UNWRITTEN;
else
got.br_state = XFS_EXT_NORM;
error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
&cur, &got, &logflags, flags);
if (error)
goto error0;
error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork);
error0:
if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS)
logflags &= ~XFS_ILOG_DEXT;
else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE)
logflags &= ~XFS_ILOG_DBROOT;
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
if (cur)
xfs_btree_del_cursor(cur, error);
return error;
}
/*
* When a delalloc extent is split (e.g., due to a hole punch), the original
* indlen reservation must be shared across the two new extents that are left
* behind.
*
* Given the original reservation and the worst case indlen for the two new
* extents (as calculated by xfs_bmap_worst_indlen()), split the original
xfs: borrow indirect blocks from freed extent when available xfs_bmap_del_extent() handles extent removal from the in-core and on-disk extent lists. When removing a delalloc range, it updates the indirect block reservation appropriately based on the removal. It currently enforces that the new indirect block reservation is less than or equal to the original. This is normally the case in all situations except for in certain cases when the removed range creates a hole in a single delalloc extent, thus splitting a single delalloc extent in two. It is possible with small enough extents to split an indlen==1 extent into two such slightly smaller extents. This leaves one extent with 0 indirect blocks and leads to assert failures in other areas (e.g., xfs_bunmapi() if the extent happens to be removed). Update the indlen distribution code to steal blocks from the deleted extent, if necessary, to satisfy the worst case total indirect reservation for the new extents. This is safe as the caller does not update the fdblocks counters until the extent is removed. Blocks stolen in this manner simply remain accounted as allocated, having ownership transferred from the data extent to an indirect reservation. As a precaution, fall back to the original reservation algorithm if the new indlen requirement is not met and warn if we end up with extents without any reservation at all to detect this more easily in the future. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-03-15 08:42:47 +08:00
* reservation fairly across the two new extents. If necessary, steal available
* blocks from a deleted extent to make up a reservation deficiency (e.g., if
* ores == 1). The number of stolen blocks is returned. The availability and
* subsequent accounting of stolen blocks is the responsibility of the caller.
*/
xfs: borrow indirect blocks from freed extent when available xfs_bmap_del_extent() handles extent removal from the in-core and on-disk extent lists. When removing a delalloc range, it updates the indirect block reservation appropriately based on the removal. It currently enforces that the new indirect block reservation is less than or equal to the original. This is normally the case in all situations except for in certain cases when the removed range creates a hole in a single delalloc extent, thus splitting a single delalloc extent in two. It is possible with small enough extents to split an indlen==1 extent into two such slightly smaller extents. This leaves one extent with 0 indirect blocks and leads to assert failures in other areas (e.g., xfs_bunmapi() if the extent happens to be removed). Update the indlen distribution code to steal blocks from the deleted extent, if necessary, to satisfy the worst case total indirect reservation for the new extents. This is safe as the caller does not update the fdblocks counters until the extent is removed. Blocks stolen in this manner simply remain accounted as allocated, having ownership transferred from the data extent to an indirect reservation. As a precaution, fall back to the original reservation algorithm if the new indlen requirement is not met and warn if we end up with extents without any reservation at all to detect this more easily in the future. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-03-15 08:42:47 +08:00
static xfs_filblks_t
xfs_bmap_split_indlen(
xfs_filblks_t ores, /* original res. */
xfs_filblks_t *indlen1, /* ext1 worst indlen */
xfs: borrow indirect blocks from freed extent when available xfs_bmap_del_extent() handles extent removal from the in-core and on-disk extent lists. When removing a delalloc range, it updates the indirect block reservation appropriately based on the removal. It currently enforces that the new indirect block reservation is less than or equal to the original. This is normally the case in all situations except for in certain cases when the removed range creates a hole in a single delalloc extent, thus splitting a single delalloc extent in two. It is possible with small enough extents to split an indlen==1 extent into two such slightly smaller extents. This leaves one extent with 0 indirect blocks and leads to assert failures in other areas (e.g., xfs_bunmapi() if the extent happens to be removed). Update the indlen distribution code to steal blocks from the deleted extent, if necessary, to satisfy the worst case total indirect reservation for the new extents. This is safe as the caller does not update the fdblocks counters until the extent is removed. Blocks stolen in this manner simply remain accounted as allocated, having ownership transferred from the data extent to an indirect reservation. As a precaution, fall back to the original reservation algorithm if the new indlen requirement is not met and warn if we end up with extents without any reservation at all to detect this more easily in the future. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-03-15 08:42:47 +08:00
xfs_filblks_t *indlen2, /* ext2 worst indlen */
xfs_filblks_t avail) /* stealable blocks */
{
xfs_filblks_t len1 = *indlen1;
xfs_filblks_t len2 = *indlen2;
xfs_filblks_t nres = len1 + len2; /* new total res. */
xfs: borrow indirect blocks from freed extent when available xfs_bmap_del_extent() handles extent removal from the in-core and on-disk extent lists. When removing a delalloc range, it updates the indirect block reservation appropriately based on the removal. It currently enforces that the new indirect block reservation is less than or equal to the original. This is normally the case in all situations except for in certain cases when the removed range creates a hole in a single delalloc extent, thus splitting a single delalloc extent in two. It is possible with small enough extents to split an indlen==1 extent into two such slightly smaller extents. This leaves one extent with 0 indirect blocks and leads to assert failures in other areas (e.g., xfs_bunmapi() if the extent happens to be removed). Update the indlen distribution code to steal blocks from the deleted extent, if necessary, to satisfy the worst case total indirect reservation for the new extents. This is safe as the caller does not update the fdblocks counters until the extent is removed. Blocks stolen in this manner simply remain accounted as allocated, having ownership transferred from the data extent to an indirect reservation. As a precaution, fall back to the original reservation algorithm if the new indlen requirement is not met and warn if we end up with extents without any reservation at all to detect this more easily in the future. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-03-15 08:42:47 +08:00
xfs_filblks_t stolen = 0;
xfs_filblks_t resfactor;
xfs: borrow indirect blocks from freed extent when available xfs_bmap_del_extent() handles extent removal from the in-core and on-disk extent lists. When removing a delalloc range, it updates the indirect block reservation appropriately based on the removal. It currently enforces that the new indirect block reservation is less than or equal to the original. This is normally the case in all situations except for in certain cases when the removed range creates a hole in a single delalloc extent, thus splitting a single delalloc extent in two. It is possible with small enough extents to split an indlen==1 extent into two such slightly smaller extents. This leaves one extent with 0 indirect blocks and leads to assert failures in other areas (e.g., xfs_bunmapi() if the extent happens to be removed). Update the indlen distribution code to steal blocks from the deleted extent, if necessary, to satisfy the worst case total indirect reservation for the new extents. This is safe as the caller does not update the fdblocks counters until the extent is removed. Blocks stolen in this manner simply remain accounted as allocated, having ownership transferred from the data extent to an indirect reservation. As a precaution, fall back to the original reservation algorithm if the new indlen requirement is not met and warn if we end up with extents without any reservation at all to detect this more easily in the future. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-03-15 08:42:47 +08:00
/*
* Steal as many blocks as we can to try and satisfy the worst case
* indlen for both new extents.
*/
if (ores < nres && avail)
stolen = XFS_FILBLKS_MIN(nres - ores, avail);
ores += stolen;
/* nothing else to do if we've satisfied the new reservation */
if (ores >= nres)
return stolen;
/*
* We can't meet the total required reservation for the two extents.
* Calculate the percent of the overall shortage between both extents
* and apply this percentage to each of the requested indlen values.
* This distributes the shortage fairly and reduces the chances that one
* of the two extents is left with nothing when extents are repeatedly
* split.
*/
resfactor = (ores * 100);
do_div(resfactor, nres);
len1 *= resfactor;
do_div(len1, 100);
len2 *= resfactor;
do_div(len2, 100);
ASSERT(len1 + len2 <= ores);
ASSERT(len1 < *indlen1 && len2 < *indlen2);
/*
* Hand out the remainder to each extent. If one of the two reservations
* is zero, we want to make sure that one gets a block first. The loop
* below starts with len1, so hand len2 a block right off the bat if it
* is zero.
*/
ores -= (len1 + len2);
ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
if (ores && !len2 && *indlen2) {
len2++;
ores--;
}
while (ores) {
if (len1 < *indlen1) {
len1++;
ores--;
}
if (!ores)
break;
if (len2 < *indlen2) {
len2++;
ores--;
}
}
*indlen1 = len1;
*indlen2 = len2;
xfs: borrow indirect blocks from freed extent when available xfs_bmap_del_extent() handles extent removal from the in-core and on-disk extent lists. When removing a delalloc range, it updates the indirect block reservation appropriately based on the removal. It currently enforces that the new indirect block reservation is less than or equal to the original. This is normally the case in all situations except for in certain cases when the removed range creates a hole in a single delalloc extent, thus splitting a single delalloc extent in two. It is possible with small enough extents to split an indlen==1 extent into two such slightly smaller extents. This leaves one extent with 0 indirect blocks and leads to assert failures in other areas (e.g., xfs_bunmapi() if the extent happens to be removed). Update the indlen distribution code to steal blocks from the deleted extent, if necessary, to satisfy the worst case total indirect reservation for the new extents. This is safe as the caller does not update the fdblocks counters until the extent is removed. Blocks stolen in this manner simply remain accounted as allocated, having ownership transferred from the data extent to an indirect reservation. As a precaution, fall back to the original reservation algorithm if the new indlen requirement is not met and warn if we end up with extents without any reservation at all to detect this more easily in the future. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-03-15 08:42:47 +08:00
return stolen;
}
int
xfs_bmap_del_extent_delay(
struct xfs_inode *ip,
int whichfork,
struct xfs_iext_cursor *icur,
struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *del)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec new;
int64_t da_old, da_new, da_diff = 0;
xfs_fileoff_t del_endoff, got_endoff;
xfs_filblks_t got_indlen, new_indlen, stolen;
int state = xfs_bmap_fork_to_state(whichfork);
int error = 0;
bool isrt;
XFS_STATS_INC(mp, xs_del_exlist);
isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
del_endoff = del->br_startoff + del->br_blockcount;
got_endoff = got->br_startoff + got->br_blockcount;
da_old = startblockval(got->br_startblock);
da_new = 0;
ASSERT(del->br_blockcount > 0);
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
if (isrt) {
uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_frextents(mp, rtexts);
}
/*
* Update the inode delalloc counter now and wait to update the
* sb counters as we might have to borrow some blocks for the
* indirect block accounting.
*/
ASSERT(!isrt);
error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
if (error)
return error;
ip->i_delayed_blks -= del->br_blockcount;
if (got->br_startoff == del->br_startoff)
state |= BMAP_LEFT_FILLING;
if (got_endoff == del_endoff)
state |= BMAP_RIGHT_FILLING;
switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
/*
* Matches the whole extent. Delete the entry.
*/
xfs_iext_remove(ip, icur, state);
xfs_iext_prev(ifp, icur);
break;
case BMAP_LEFT_FILLING:
/*
* Deleting the first part of the extent.
*/
got->br_startoff = del_endoff;
got->br_blockcount -= del->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
got->br_blockcount), da_old);
got->br_startblock = nullstartblock((int)da_new);
xfs_iext_update_extent(ip, state, icur, got);
break;
case BMAP_RIGHT_FILLING:
/*
* Deleting the last part of the extent.
*/
got->br_blockcount = got->br_blockcount - del->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
got->br_blockcount), da_old);
got->br_startblock = nullstartblock((int)da_new);
xfs_iext_update_extent(ip, state, icur, got);
break;
case 0:
/*
* Deleting the middle of the extent.
*
* Distribute the original indlen reservation across the two new
* extents. Steal blocks from the deleted extent if necessary.
* Stealing blocks simply fudges the fdblocks accounting below.
* Warn if either of the new indlen reservations is zero as this
* can lead to delalloc problems.
*/
got->br_blockcount = del->br_startoff - got->br_startoff;
got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
new.br_blockcount = got_endoff - del_endoff;
new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
WARN_ON_ONCE(!got_indlen || !new_indlen);
stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
del->br_blockcount);
got->br_startblock = nullstartblock((int)got_indlen);
new.br_startoff = del_endoff;
new.br_state = got->br_state;
new.br_startblock = nullstartblock((int)new_indlen);
xfs_iext_update_extent(ip, state, icur, got);
xfs_iext_next(ifp, icur);
xfs_iext_insert(ip, icur, &new, state);
da_new = got_indlen + new_indlen - stolen;
del->br_blockcount -= stolen;
break;
}
ASSERT(da_old >= da_new);
da_diff = da_old - da_new;
if (!isrt)
da_diff += del->br_blockcount;
if (da_diff) {
xfs_mod_fdblocks(mp, da_diff, false);
xfs_mod_delalloc(mp, -da_diff);
}
return error;
}
void
xfs_bmap_del_extent_cow(
struct xfs_inode *ip,
struct xfs_iext_cursor *icur,
struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *del)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec new;
xfs_fileoff_t del_endoff, got_endoff;
int state = BMAP_COWFORK;
XFS_STATS_INC(mp, xs_del_exlist);
del_endoff = del->br_startoff + del->br_blockcount;
got_endoff = got->br_startoff + got->br_blockcount;
ASSERT(del->br_blockcount > 0);
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
ASSERT(!isnullstartblock(got->br_startblock));
if (got->br_startoff == del->br_startoff)
state |= BMAP_LEFT_FILLING;
if (got_endoff == del_endoff)
state |= BMAP_RIGHT_FILLING;
switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
/*
* Matches the whole extent. Delete the entry.
*/
xfs_iext_remove(ip, icur, state);
xfs_iext_prev(ifp, icur);
break;
case BMAP_LEFT_FILLING:
/*
* Deleting the first part of the extent.
*/
got->br_startoff = del_endoff;
got->br_blockcount -= del->br_blockcount;
got->br_startblock = del->br_startblock + del->br_blockcount;
xfs_iext_update_extent(ip, state, icur, got);
break;
case BMAP_RIGHT_FILLING:
/*
* Deleting the last part of the extent.
*/
got->br_blockcount -= del->br_blockcount;
xfs_iext_update_extent(ip, state, icur, got);
break;
case 0:
/*
* Deleting the middle of the extent.
*/
got->br_blockcount = del->br_startoff - got->br_startoff;
new.br_startoff = del_endoff;
new.br_blockcount = got_endoff - del_endoff;
new.br_state = got->br_state;
new.br_startblock = del->br_startblock + del->br_blockcount;
xfs_iext_update_extent(ip, state, icur, got);
xfs_iext_next(ifp, icur);
xfs_iext_insert(ip, icur, &new, state);
break;
}
ip->i_delayed_blks -= del->br_blockcount;
}
/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space.
*/
STATIC int /* error */
xfs_bmap_del_extent_real(
xfs_inode_t *ip, /* incore inode pointer */
xfs_trans_t *tp, /* current transaction pointer */
struct xfs_iext_cursor *icur,
struct xfs_btree_cur *cur, /* if null, not a btree */
xfs_bmbt_irec_t *del, /* data to remove from extents */
int *logflagsp, /* inode logging flags */
int whichfork, /* data or attr fork */
int bflags) /* bmapi flags */
{
xfs_fsblock_t del_endblock=0; /* first block past del */
xfs_fileoff_t del_endoff; /* first offset past del */
int do_fx; /* free extent at end of routine */
int error; /* error return value */
int flags = 0;/* inode logging flags */
struct xfs_bmbt_irec got; /* current extent entry */
xfs_fileoff_t got_endoff; /* first offset past got */
int i; /* temp state */
struct xfs_ifork *ifp; /* inode fork pointer */
xfs_mount_t *mp; /* mount structure */
xfs_filblks_t nblks; /* quota/sb block count */
xfs_bmbt_irec_t new; /* new record to be inserted */
/* REFERENCED */
uint qfield; /* quota field to update */
int state = xfs_bmap_fork_to_state(whichfork);
struct xfs_bmbt_irec old;
mp = ip->i_mount;
XFS_STATS_INC(mp, xs_del_exlist);
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(del->br_blockcount > 0);
xfs_iext_get_extent(ifp, icur, &got);
ASSERT(got.br_startoff <= del->br_startoff);
del_endoff = del->br_startoff + del->br_blockcount;
got_endoff = got.br_startoff + got.br_blockcount;
ASSERT(got_endoff >= del_endoff);
ASSERT(!isnullstartblock(got.br_startblock));
qfield = 0;
error = 0;
/*
* If it's the case where the directory code is running with no block
* reservation, and the deleted block is in the middle of its extent,
* and the resulting insert of an extent would cause transformation to
* btree format, then reject it. The calling code will then swap blocks
* around instead. We have to do this now, rather than waiting for the
* conversion to btree format, since the transaction will be dirty then.
*/
if (tp->t_blk_res == 0 &&
ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) &&
del->br_startoff > got.br_startoff && del_endoff < got_endoff)
return -ENOSPC;
flags = XFS_ILOG_CORE;
if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
xfs_filblks_t len;
xfs_extlen_t mod;
len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
&mod);
ASSERT(mod == 0);
if (!(bflags & XFS_BMAPI_REMAP)) {
xfs_fsblock_t bno;
bno = div_u64_rem(del->br_startblock,
mp->m_sb.sb_rextsize, &mod);
ASSERT(mod == 0);
error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
if (error)
goto done;
}
do_fx = 0;
nblks = len * mp->m_sb.sb_rextsize;
qfield = XFS_TRANS_DQ_RTBCOUNT;
} else {
do_fx = 1;
nblks = del->br_blockcount;
qfield = XFS_TRANS_DQ_BCOUNT;
}
del_endblock = del->br_startblock + del->br_blockcount;
if (cur) {
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
}
xfs: add owner field to extent allocation and freeing For the rmap btree to work, we have to feed the extent owner information to the the allocation and freeing functions. This information is what will end up in the rmap btree that tracks allocated extents. While we technically don't need the owner information when freeing extents, passing it allows us to validate that the extent we are removing from the rmap btree actually belonged to the owner we expected it to belong to. We also define a special set of owner values for internal metadata that would otherwise have no owner. This allows us to tell the difference between metadata owned by different per-ag btrees, as well as static fs metadata (e.g. AG headers) and internal journal blocks. There are also a couple of special cases we need to take care of - during EFI recovery, we don't actually know who the original owner was, so we need to pass a wildcard to indicate that we aren't checking the owner for validity. We also need special handling in growfs, as we "free" the space in the last AG when extending it, but because it's new space it has no actual owner... While touching the xfs_bmap_add_free() function, re-order the parameters to put the struct xfs_mount first. Extend the owner field to include both the owner type and some sort of index within the owner. The index field will be used to support reverse mappings when reflink is enabled. When we're freeing extents from an EFI, we don't have the owner information available (rmap updates have their own redo items). xfs_free_extent therefore doesn't need to do an rmap update. Make sure that the log replay code signals this correctly. This is based upon a patch originally from Dave Chinner. It has been extended to add more owner information with the intent of helping recovery operations when things go wrong (e.g. offset of user data block in a file). [dchinner: de-shout the xfs_rmap_*_owner helpers] [darrick: minor style fixes suggested by Christoph Hellwig] Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
2016-08-03 09:33:42 +08:00
if (got.br_startoff == del->br_startoff)
state |= BMAP_LEFT_FILLING;
if (got_endoff == del_endoff)
state |= BMAP_RIGHT_FILLING;
switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
/*
* Matches the whole extent. Delete the entry.
*/
xfs_iext_remove(ip, icur, state);
xfs_iext_prev(ifp, icur);
ifp->if_nextents--;
flags |= XFS_ILOG_CORE;
if (!cur) {
flags |= xfs_ilog_fext(whichfork);
break;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
break;
case BMAP_LEFT_FILLING:
/*
* Deleting the first part of the extent.
*/
got.br_startoff = del_endoff;
got.br_startblock = del_endblock;
got.br_blockcount -= del->br_blockcount;
xfs_iext_update_extent(ip, state, icur, &got);
if (!cur) {
flags |= xfs_ilog_fext(whichfork);
break;
}
error = xfs_bmbt_update(cur, &got);
if (error)
goto done;
break;
case BMAP_RIGHT_FILLING:
/*
* Deleting the last part of the extent.
*/
got.br_blockcount -= del->br_blockcount;
xfs_iext_update_extent(ip, state, icur, &got);
if (!cur) {
flags |= xfs_ilog_fext(whichfork);
break;
}
error = xfs_bmbt_update(cur, &got);
if (error)
goto done;
break;
case 0:
/*
* Deleting the middle of the extent.
*/
/*
* For directories, -ENOSPC is returned since a directory entry
* remove operation must not fail due to low extent count
* availability. -ENOSPC will be handled by higher layers of XFS
* by letting the corresponding empty Data/Free blocks to linger
* until a future remove operation. Dabtree blocks would be
* swapped with the last block in the leaf space and then the
* new last block will be unmapped.
xfs: Check for extent overflow when renaming dir entries A rename operation is essentially a directory entry remove operation from the perspective of parent directory (i.e. src_dp) of rename's source. Hence the only place where we check for extent count overflow for src_dp is in xfs_bmap_del_extent_real(). xfs_bmap_del_extent_real() returns -ENOSPC when it detects a possible extent count overflow and in response, the higher layers of directory handling code do the following: 1. Data/Free blocks: XFS lets these blocks linger until a future remove operation removes them. 2. Dabtree blocks: XFS swaps the blocks with the last block in the Leaf space and unmaps the last block. For target_dp, there are two cases depending on whether the destination directory entry exists or not. When destination directory entry does not exist (i.e. target_ip == NULL), extent count overflow check is performed only when transaction has a non-zero sized space reservation associated with it. With a zero-sized space reservation, XFS allows a rename operation to continue only when the directory has sufficient free space in its data/leaf/free space blocks to hold the new entry. When destination directory entry exists (i.e. target_ip != NULL), all we need to do is change the inode number associated with the already existing entry. Hence there is no need to perform an extent count overflow check. Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2021-01-23 08:48:13 +08:00
*
* The above logic also applies to the source directory entry of
* a rename operation.
*/
error = xfs_iext_count_may_overflow(ip, whichfork, 1);
if (error) {
ASSERT(S_ISDIR(VFS_I(ip)->i_mode) &&
whichfork == XFS_DATA_FORK);
error = -ENOSPC;
goto done;
}
old = got;
got.br_blockcount = del->br_startoff - got.br_startoff;
xfs_iext_update_extent(ip, state, icur, &got);
new.br_startoff = del_endoff;
new.br_blockcount = got_endoff - del_endoff;
new.br_state = got.br_state;
new.br_startblock = del_endblock;
flags |= XFS_ILOG_CORE;
if (cur) {
error = xfs_bmbt_update(cur, &got);
if (error)
goto done;
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto done;
cur->bc_rec.b = new;
error = xfs_btree_insert(cur, &i);
if (error && error != -ENOSPC)
goto done;
/*
* If get no-space back from btree insert, it tried a
* split, and we have a zero block reservation. Fix up
* our state and return the error.
*/
if (error == -ENOSPC) {
/*
* Reset the cursor, don't trust it after any
* insert operation.
*/
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
goto done;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
/*
* Update the btree record back
* to the original value.
*/
error = xfs_bmbt_update(cur, &old);
if (error)
goto done;
/*
* Reset the extent record back
* to the original value.
*/
xfs_iext_update_extent(ip, state, icur, &old);
flags = 0;
error = -ENOSPC;
goto done;
}
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto done;
}
} else
flags |= xfs_ilog_fext(whichfork);
ifp->if_nextents++;
xfs_iext_next(ifp, icur);
xfs_iext_insert(ip, icur, &new, state);
break;
}
/* remove reverse mapping */
xfs_rmap_unmap_extent(tp, ip, whichfork, del);
/*
* If we need to, add to list of extents to delete.
*/
if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
} else {
__xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
(bflags & XFS_BMAPI_NODISCARD) ||
del->br_state == XFS_EXT_UNWRITTEN);
}
}
/*
* Adjust inode # blocks in the file.
*/
if (nblks)
ip->i_nblocks -= nblks;
/*
* Adjust quota data.
*/
if (qfield && !(bflags & XFS_BMAPI_REMAP))
xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
done:
*logflagsp = flags;
return error;
}
/*
* Unmap (remove) blocks from a file.
* If nexts is nonzero then the number of extents to remove is limited to
* that value. If not all extents in the block range can be removed then
* *done is set.
*/
int /* error */
__xfs_bunmapi(
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */
xfs_fileoff_t start, /* first file offset deleted */
xfs_filblks_t *rlen, /* i/o: amount remaining */
int flags, /* misc flags */
xfs_extnum_t nexts) /* number of extents max */
{
struct xfs_btree_cur *cur; /* bmap btree cursor */
struct xfs_bmbt_irec del; /* extent being deleted */
int error; /* error return value */
xfs_extnum_t extno; /* extent number in list */
struct xfs_bmbt_irec got; /* current extent record */
struct xfs_ifork *ifp; /* inode fork pointer */
int isrt; /* freeing in rt area */
int logflags; /* transaction logging flags */
xfs_extlen_t mod; /* rt extent offset */
struct xfs_mount *mp = ip->i_mount;
int tmp_logflags; /* partial logging flags */
int wasdel; /* was a delayed alloc extent */
int whichfork; /* data or attribute fork */
xfs_fsblock_t sum;
xfs_filblks_t len = *rlen; /* length to unmap in file */
xfs_fileoff_t max_len;
xfs_fileoff_t end;
struct xfs_iext_cursor icur;
bool done = false;
trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
whichfork = xfs_bmapi_whichfork(flags);
ASSERT(whichfork != XFS_COW_FORK);
ifp = XFS_IFORK_PTR(ip, whichfork);
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
return -EFSCORRUPTED;
if (xfs_is_shutdown(mp))
return -EIO;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(len > 0);
ASSERT(nexts >= 0);
/*
* Guesstimate how many blocks we can unmap without running the risk of
* blowing out the transaction with a mix of EFIs and reflink
* adjustments.
*/
if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
else
max_len = len;
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
if (xfs_iext_count(ifp) == 0) {
*rlen = 0;
return 0;
}
XFS_STATS_INC(mp, xs_blk_unmap);
isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
end = start + len;
if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
*rlen = 0;
return 0;
}
end--;
logflags = 0;
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_ino.flags = 0;
} else
cur = NULL;
if (isrt) {
/*
* Synchronize by locking the bitmap inode.
*/
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
}
extno = 0;
while (end != (xfs_fileoff_t)-1 && end >= start &&
(nexts == 0 || extno < nexts) && max_len > 0) {
/*
* Is the found extent after a hole in which end lives?
* Just back up to the previous extent, if so.
*/
if (got.br_startoff > end &&
!xfs_iext_prev_extent(ifp, &icur, &got)) {
done = true;
break;
}
/*
* Is the last block of this extent before the range
* we're supposed to delete? If so, we're done.
*/
end = XFS_FILEOFF_MIN(end,
got.br_startoff + got.br_blockcount - 1);
if (end < start)
break;
/*
* Then deal with the (possibly delayed) allocated space
* we found.
*/
del = got;
wasdel = isnullstartblock(del.br_startblock);
if (got.br_startoff < start) {
del.br_startoff = start;
del.br_blockcount -= start - got.br_startoff;
if (!wasdel)
del.br_startblock += start - got.br_startoff;
}
if (del.br_startoff + del.br_blockcount > end + 1)
del.br_blockcount = end + 1 - del.br_startoff;
/* How much can we safely unmap? */
if (max_len < del.br_blockcount) {
del.br_startoff += del.br_blockcount - max_len;
if (!wasdel)
del.br_startblock += del.br_blockcount - max_len;
del.br_blockcount = max_len;
}
if (!isrt)
goto delete;
sum = del.br_startblock + del.br_blockcount;
div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
if (mod) {
/*
* Realtime extent not lined up at the end.
* The extent could have been split into written
* and unwritten pieces, or we could just be
* unmapping part of it. But we can't really
* get rid of part of a realtime extent.
*/
if (del.br_state == XFS_EXT_UNWRITTEN) {
/*
* This piece is unwritten, or we're not
* using unwritten extents. Skip over it.
*/
ASSERT(end >= mod);
end -= mod > del.br_blockcount ?
del.br_blockcount : mod;
if (end < got.br_startoff &&
!xfs_iext_prev_extent(ifp, &icur, &got)) {
done = true;
break;
}
continue;
}
/*
* It's written, turn it unwritten.
* This is better than zeroing it.
*/
ASSERT(del.br_state == XFS_EXT_NORM);
ASSERT(tp->t_blk_res > 0);
/*
* If this spans a realtime extent boundary,
* chop it back to the start of the one we end at.
*/
if (del.br_blockcount > mod) {
del.br_startoff += del.br_blockcount - mod;
del.br_startblock += del.br_blockcount - mod;
del.br_blockcount = mod;
}
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp, ip,
whichfork, &icur, &cur, &del,
&logflags);
if (error)
goto error0;
goto nodelete;
}
div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
if (mod) {
xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
/*
* Realtime extent is lined up at the end but not
* at the front. We'll get rid of full extents if
* we can.
*/
if (del.br_blockcount > off) {
del.br_blockcount -= off;
del.br_startoff += off;
del.br_startblock += off;
} else if (del.br_startoff == start &&
(del.br_state == XFS_EXT_UNWRITTEN ||
tp->t_blk_res == 0)) {
/*
* Can't make it unwritten. There isn't
* a full extent here so just skip it.
*/
ASSERT(end >= del.br_blockcount);
end -= del.br_blockcount;
if (got.br_startoff > end &&
!xfs_iext_prev_extent(ifp, &icur, &got)) {
done = true;
break;
}
continue;
} else if (del.br_state == XFS_EXT_UNWRITTEN) {
struct xfs_bmbt_irec prev;
xfs_fileoff_t unwrite_start;
/*
* This one is already unwritten.
* It must have a written left neighbor.
* Unwrite the killed part of that one and
* try again.
*/
if (!xfs_iext_prev_extent(ifp, &icur, &prev))
ASSERT(0);
ASSERT(prev.br_state == XFS_EXT_NORM);
ASSERT(!isnullstartblock(prev.br_startblock));
ASSERT(del.br_startblock ==
prev.br_startblock + prev.br_blockcount);
unwrite_start = max3(start,
del.br_startoff - mod,
prev.br_startoff);
mod = unwrite_start - prev.br_startoff;
prev.br_startoff = unwrite_start;
prev.br_startblock += mod;
prev.br_blockcount -= mod;
prev.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp,
ip, whichfork, &icur, &cur,
&prev, &logflags);
if (error)
goto error0;
goto nodelete;
} else {
ASSERT(del.br_state == XFS_EXT_NORM);
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp,
ip, whichfork, &icur, &cur,
&del, &logflags);
if (error)
goto error0;
goto nodelete;
}
}
delete:
if (wasdel) {
error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
&got, &del);
} else {
error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
&del, &tmp_logflags, whichfork,
flags);
logflags |= tmp_logflags;
}
if (error)
goto error0;
max_len -= del.br_blockcount;
end = del.br_startoff - 1;
nodelete:
/*
* If not done go on to the next (previous) record.
*/
if (end != (xfs_fileoff_t)-1 && end >= start) {
if (!xfs_iext_get_extent(ifp, &icur, &got) ||
(got.br_startoff > end &&
!xfs_iext_prev_extent(ifp, &icur, &got))) {
done = true;
break;
}
extno++;
}
}
if (done || end == (xfs_fileoff_t)-1 || end < start)
*rlen = 0;
else
*rlen = end - start + 1;
/*
* Convert to a btree if necessary.
*/
if (xfs_bmap_needs_btree(ip, whichfork)) {
ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
&tmp_logflags, whichfork);
logflags |= tmp_logflags;
} else {
error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags,
whichfork);
}
error0:
/*
* Log everything. Do this after conversion, there's no point in
* logging the extent records if we've converted to btree format.
*/
if ((logflags & xfs_ilog_fext(whichfork)) &&
ifp->if_format != XFS_DINODE_FMT_EXTENTS)
logflags &= ~xfs_ilog_fext(whichfork);
else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
ifp->if_format != XFS_DINODE_FMT_BTREE)
logflags &= ~xfs_ilog_fbroot(whichfork);
/*
* Log inode even in the error case, if the transaction
* is dirty we'll need to shut down the filesystem.
*/
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
if (cur) {
if (!error)
cur->bc_ino.allocated = 0;
xfs_btree_del_cursor(cur, error);
}
return error;
}
/* Unmap a range of a file. */
int
xfs_bunmapi(
xfs_trans_t *tp,
struct xfs_inode *ip,
xfs_fileoff_t bno,
xfs_filblks_t len,
int flags,
xfs_extnum_t nexts,
int *done)
{
int error;
error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
*done = (len == 0);
return error;
}
/*
* Determine whether an extent shift can be accomplished by a merge with the
* extent that precedes the target hole of the shift.
*/
STATIC bool
xfs_bmse_can_merge(
struct xfs_bmbt_irec *left, /* preceding extent */
struct xfs_bmbt_irec *got, /* current extent to shift */
xfs_fileoff_t shift) /* shift fsb */
{
xfs_fileoff_t startoff;
startoff = got->br_startoff - shift;
/*
* The extent, once shifted, must be adjacent in-file and on-disk with
* the preceding extent.
*/
if ((left->br_startoff + left->br_blockcount != startoff) ||
(left->br_startblock + left->br_blockcount != got->br_startblock) ||
(left->br_state != got->br_state) ||
(left->br_blockcount + got->br_blockcount > MAXEXTLEN))
return false;
return true;
}
/*
* A bmap extent shift adjusts the file offset of an extent to fill a preceding
* hole in the file. If an extent shift would result in the extent being fully
* adjacent to the extent that currently precedes the hole, we can merge with
* the preceding extent rather than do the shift.
*
* This function assumes the caller has verified a shift-by-merge is possible
* with the provided extents via xfs_bmse_can_merge().
*/
STATIC int
xfs_bmse_merge(
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork,
xfs_fileoff_t shift, /* shift fsb */
struct xfs_iext_cursor *icur,
struct xfs_bmbt_irec *got, /* extent to shift */
struct xfs_bmbt_irec *left, /* preceding extent */
struct xfs_btree_cur *cur,
int *logflags) /* output */
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec new;
xfs_filblks_t blockcount;
int error, i;
struct xfs_mount *mp = ip->i_mount;
blockcount = left->br_blockcount + got->br_blockcount;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(xfs_bmse_can_merge(left, got, shift));
new = *left;
new.br_blockcount = blockcount;
/*
* Update the on-disk extent count, the btree if necessary and log the
* inode.
*/
ifp->if_nextents--;
*logflags |= XFS_ILOG_CORE;
if (!cur) {
*logflags |= XFS_ILOG_DEXT;
goto done;
}
/* lookup and remove the extent to merge */
error = xfs_bmbt_lookup_eq(cur, got, &i);
if (error)
return error;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1))
return -EFSCORRUPTED;
error = xfs_btree_delete(cur, &i);
if (error)
return error;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1))
return -EFSCORRUPTED;
/* lookup and update size of the previous extent */
error = xfs_bmbt_lookup_eq(cur, left, &i);
if (error)
return error;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1))
return -EFSCORRUPTED;
error = xfs_bmbt_update(cur, &new);
if (error)
return error;
xfs: convert inode to extent format after extent merge due to shift The collapse range operation can merge extents if two newly adjacent extents are physically contiguous. If the extent count is reduced on a btree format inode, a change to extent format might be necessary. This format change currently occurs as a side effect of the file size update after extents have been shifted for the collapse. This codepath ultimately calls xfs_bunmapi(), which happens to check for and execute the format conversion even if there were no blocks removed from the mapping. While this ultimately puts the inode into the correct state, the fact the format conversion occurs in a separate transaction from the change that called for it is a problem. If an extent shift transaction commits and the filesystem happens to crash before the format conversion, the inode fork is left in a corrupted state after log recovery. The inode fork verifier fails and xfs_repair ultimately nukes the inode. This problem was originally reproduced by generic/388. Similar to how the insert range extent split code handles extent to btree conversion, update the collapse range extent merge code to handle btree to extent format conversion in the same transaction that merges the extents. This ensures that the inode fork format remains consistent if the filesystem happens to crash in the middle of a collapse range operation that changes the inode fork format. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-09-19 08:50:45 +08:00
/* change to extent format if required after extent removal */
error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
if (error)
return error;
done:
xfs_iext_remove(ip, icur, 0);
xfs_iext_prev(ifp, icur);
xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
&new);
/* update reverse mapping. rmap functions merge the rmaps for us */
xfs_rmap_unmap_extent(tp, ip, whichfork, got);
memcpy(&new, got, sizeof(new));
new.br_startoff = left->br_startoff + left->br_blockcount;
xfs_rmap_map_extent(tp, ip, whichfork, &new);
return 0;
}
static int
xfs_bmap_shift_update_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork,
struct xfs_iext_cursor *icur,
struct xfs_bmbt_irec *got,
struct xfs_btree_cur *cur,
int *logflags,
xfs_fileoff_t startoff)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec prev = *got;
int error, i;
*logflags |= XFS_ILOG_CORE;
got->br_startoff = startoff;
if (cur) {
error = xfs_bmbt_lookup_eq(cur, &prev, &i);
if (error)
return error;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1))
return -EFSCORRUPTED;
error = xfs_bmbt_update(cur, got);
if (error)
return error;
} else {
*logflags |= XFS_ILOG_DEXT;
}
xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
got);
/* update reverse mapping */
xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
xfs_rmap_map_extent(tp, ip, whichfork, got);
return 0;
}
int
xfs_bmap_collapse_extents(
struct xfs_trans *tp,
struct xfs_inode *ip,
xfs_fileoff_t *next_fsb,
xfs_fileoff_t offset_shift_fsb,
bool *done)
{
int whichfork = XFS_DATA_FORK;
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_btree_cur *cur = NULL;
struct xfs_bmbt_irec got, prev;
struct xfs_iext_cursor icur;
xfs_fileoff_t new_startoff;
int error = 0;
int logflags = 0;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
if (xfs_is_shutdown(mp))
return -EIO;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_ino.flags = 0;
}
if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
*done = true;
goto del_cursor;
}
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
error = -EFSCORRUPTED;
goto del_cursor;
}
new_startoff = got.br_startoff - offset_shift_fsb;
if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
if (new_startoff < prev.br_startoff + prev.br_blockcount) {
error = -EINVAL;
goto del_cursor;
}
if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
error = xfs_bmse_merge(tp, ip, whichfork,
offset_shift_fsb, &icur, &got, &prev,
cur, &logflags);
if (error)
goto del_cursor;
goto done;
}
} else {
if (got.br_startoff < offset_shift_fsb) {
error = -EINVAL;
goto del_cursor;
}
}
error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
cur, &logflags, new_startoff);
if (error)
goto del_cursor;
done:
if (!xfs_iext_next_extent(ifp, &icur, &got)) {
*done = true;
goto del_cursor;
}
*next_fsb = got.br_startoff;
del_cursor:
if (cur)
xfs_btree_del_cursor(cur, error);
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
return error;
}
/* Make sure we won't be right-shifting an extent past the maximum bound. */
int
xfs_bmap_can_insert_extents(
struct xfs_inode *ip,
xfs_fileoff_t off,
xfs_fileoff_t shift)
{
struct xfs_bmbt_irec got;
int is_empty;
int error = 0;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
if (xfs_is_shutdown(ip->i_mount))
return -EIO;
xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
if (!error && !is_empty && got.br_startoff >= off &&
((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
error = -EINVAL;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
int
xfs_bmap_insert_extents(
struct xfs_trans *tp,
struct xfs_inode *ip,
xfs_fileoff_t *next_fsb,
xfs_fileoff_t offset_shift_fsb,
bool *done,
xfs_fileoff_t stop_fsb)
{
int whichfork = XFS_DATA_FORK;
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_btree_cur *cur = NULL;
struct xfs_bmbt_irec got, next;
struct xfs_iext_cursor icur;
xfs_fileoff_t new_startoff;
int error = 0;
int logflags = 0;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
if (xfs_is_shutdown(mp))
return -EIO;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_ino.flags = 0;
}
if (*next_fsb == NULLFSBLOCK) {
xfs_iext_last(ifp, &icur);
if (!xfs_iext_get_extent(ifp, &icur, &got) ||
stop_fsb > got.br_startoff) {
*done = true;
goto del_cursor;
}
} else {
if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
*done = true;
goto del_cursor;
}
}
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
error = -EFSCORRUPTED;
goto del_cursor;
}
xfs: stabilize insert range start boundary to avoid COW writeback race generic/522 (fsx) occasionally fails with a file corruption due to an insert range operation. The primary characteristic of the corruption is a misplaced insert range operation that differs from the requested target offset. The reason for this behavior is a race between the extent shift sequence of an insert range and a COW writeback completion that causes a front merge with the first extent in the shift. The shift preparation function flushes and unmaps from the target offset of the operation to the end of the file to ensure no modifications can be made and page cache is invalidated before file data is shifted. An insert range operation then splits the extent at the target offset, if necessary, and begins to shift the start offset of each extent starting from the end of the file to the start offset. The shift sequence operates at extent level and so depends on the preparation sequence to guarantee no changes can be made to the target range during the shift. If the block immediately prior to the target offset was dirty and shared, however, it can undergo writeback and move from the COW fork to the data fork at any point during the shift. If the block is contiguous with the block at the start offset of the insert range, it can front merge and alter the start offset of the extent. Once the shift sequence reaches the target offset, it shifts based on the latest start offset and silently changes the target offset of the operation and corrupts the file. To address this problem, update the shift preparation code to stabilize the start boundary along with the full range of the insert. Also update the existing corruption check to fail if any extent is shifted with a start offset behind the target offset of the insert range. This prevents insert from racing with COW writeback completion and fails loudly in the event of an unexpected extent shift. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-12-12 05:18:38 +08:00
if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
error = -EFSCORRUPTED;
goto del_cursor;
}
new_startoff = got.br_startoff + offset_shift_fsb;
if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
if (new_startoff + got.br_blockcount > next.br_startoff) {
error = -EINVAL;
goto del_cursor;
}
/*
* Unlike a left shift (which involves a hole punch), a right
* shift does not modify extent neighbors in any way. We should
* never find mergeable extents in this scenario. Check anyways
* and warn if we encounter two extents that could be one.
*/
if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
WARN_ON_ONCE(1);
}
error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
cur, &logflags, new_startoff);
if (error)
goto del_cursor;
if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
stop_fsb >= got.br_startoff + got.br_blockcount) {
*done = true;
goto del_cursor;
}
*next_fsb = got.br_startoff;
del_cursor:
if (cur)
xfs_btree_del_cursor(cur, error);
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
return error;
}
/*
* Splits an extent into two extents at split_fsb block such that it is the
* first block of the current_ext. @ext is a target extent to be split.
* @split_fsb is a block where the extents is split. If split_fsb lies in a
* hole or the first block of extents, just return 0.
*/
int
xfs_bmap_split_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
xfs_fileoff_t split_fsb)
{
int whichfork = XFS_DATA_FORK;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_btree_cur *cur = NULL;
struct xfs_bmbt_irec got;
struct xfs_bmbt_irec new; /* split extent */
struct xfs_mount *mp = ip->i_mount;
xfs_fsblock_t gotblkcnt; /* new block count for got */
struct xfs_iext_cursor icur;
int error = 0;
int logflags = 0;
int i = 0;
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
if (xfs_is_shutdown(mp))
return -EIO;
/* Read in all the extents */
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
/*
* If there are not extents, or split_fsb lies in a hole we are done.
*/
if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
got.br_startoff >= split_fsb)
return 0;
gotblkcnt = split_fsb - got.br_startoff;
new.br_startoff = split_fsb;
new.br_startblock = got.br_startblock + gotblkcnt;
new.br_blockcount = got.br_blockcount - gotblkcnt;
new.br_state = got.br_state;
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_ino.flags = 0;
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
goto del_cursor;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto del_cursor;
}
}
got.br_blockcount = gotblkcnt;
xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
&got);
logflags = XFS_ILOG_CORE;
if (cur) {
error = xfs_bmbt_update(cur, &got);
if (error)
goto del_cursor;
} else
logflags |= XFS_ILOG_DEXT;
/* Add new extent */
xfs_iext_next(ifp, &icur);
xfs_iext_insert(ip, &icur, &new, 0);
ifp->if_nextents++;
if (cur) {
error = xfs_bmbt_lookup_eq(cur, &new, &i);
if (error)
goto del_cursor;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 0)) {
error = -EFSCORRUPTED;
goto del_cursor;
}
error = xfs_btree_insert(cur, &i);
if (error)
goto del_cursor;
xfs: kill the XFS_WANT_CORRUPT_* macros The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the creation of local variables and redirections of the code flow. This is pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that remove both of those ugly points. The change was performed with the following coccinelle script: @@ expression mp, test; identifier label; @@ - XFS_WANT_CORRUPTED_GOTO(mp, test, label); + if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; } @@ expression mp, test; @@ - XFS_WANT_CORRUPTED_RETURN(mp, test); + if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED; @@ expression mp, lval, rval; @@ - XFS_IS_CORRUPT(mp, !(lval == rval)) + XFS_IS_CORRUPT(mp, lval != rval) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 && e2)) + XFS_IS_CORRUPT(mp, !e1 || !e2) @@ expression e1, e2; @@ - !(e1 == e2) + e1 != e2 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 && e3 == e4) || e5 != e6 + e1 != e2 || e3 != e4 || e5 != e6 @@ expression e1, e2, e3, e4, e5, e6; @@ - !(e1 == e2 || (e3 <= e4 && e5 <= e6)) + e1 != e2 && (e3 > e4 || e5 > e6) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2)) + XFS_IS_CORRUPT(mp, e1 > e2) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 < e2)) + XFS_IS_CORRUPT(mp, e1 >= e2) @@ expression mp, e1; @@ - XFS_IS_CORRUPT(mp, !!e1) + XFS_IS_CORRUPT(mp, e1) @@ expression mp, e1, e2; @@ - XFS_IS_CORRUPT(mp, !(e1 || e2)) + XFS_IS_CORRUPT(mp, !e1 && !e2) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4)) + XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4) @@ expression mp, e1, e2, e3, e4; @@ - XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4)) + XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4) Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-12 04:52:18 +08:00
if (XFS_IS_CORRUPT(mp, i != 1)) {
error = -EFSCORRUPTED;
goto del_cursor;
}
}
/*
* Convert to a btree if necessary.
*/
if (xfs_bmap_needs_btree(ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
&tmp_logflags, whichfork);
logflags |= tmp_logflags;
}
del_cursor:
if (cur) {
cur->bc_ino.allocated = 0;
xfs_btree_del_cursor(cur, error);
}
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
return error;
}
/* Deferred mapping is only for real extents in the data fork. */
static bool
xfs_bmap_is_update_needed(
struct xfs_bmbt_irec *bmap)
{
return bmap->br_startblock != HOLESTARTBLOCK &&
bmap->br_startblock != DELAYSTARTBLOCK;
}
/* Record a bmap intent. */
static int
__xfs_bmap_add(
struct xfs_trans *tp,
enum xfs_bmap_intent_type type,
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *bmap)
{
struct xfs_bmap_intent *bi;
trace_xfs_bmap_defer(tp->t_mountp,
XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
type,
XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
ip->i_ino, whichfork,
bmap->br_startoff,
bmap->br_blockcount,
bmap->br_state);
bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
INIT_LIST_HEAD(&bi->bi_list);
bi->bi_type = type;
bi->bi_owner = ip;
bi->bi_whichfork = whichfork;
bi->bi_bmap = *bmap;
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
return 0;
}
/* Map an extent into a file. */
void
xfs_bmap_map_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
struct xfs_bmbt_irec *PREV)
{
if (!xfs_bmap_is_update_needed(PREV))
return;
__xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
}
/* Unmap an extent out of a file. */
void
xfs_bmap_unmap_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
struct xfs_bmbt_irec *PREV)
{
if (!xfs_bmap_is_update_needed(PREV))
return;
__xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
}
/*
* Process one of the deferred bmap operations. We pass back the
* btree cursor to maintain our lock on the bmapbt between calls.
*/
int
xfs_bmap_finish_one(
struct xfs_trans *tp,
struct xfs_inode *ip,
enum xfs_bmap_intent_type type,
int whichfork,
xfs_fileoff_t startoff,
xfs_fsblock_t startblock,
xfs_filblks_t *blockcount,
xfs_exntst_t state)
{
int error = 0;
ASSERT(tp->t_firstblock == NULLFSBLOCK);
trace_xfs_bmap_deferred(tp->t_mountp,
XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
ip->i_ino, whichfork, startoff, *blockcount, state);
if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
return -EFSCORRUPTED;
if (XFS_TEST_ERROR(false, tp->t_mountp,
XFS_ERRTAG_BMAP_FINISH_ONE))
return -EIO;
switch (type) {
case XFS_BMAP_MAP:
error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
startblock, 0);
*blockcount = 0;
break;
case XFS_BMAP_UNMAP:
error = __xfs_bunmapi(tp, ip, startoff, blockcount,
XFS_BMAPI_REMAP, 1);
break;
default:
ASSERT(0);
error = -EFSCORRUPTED;
}
return error;
}
/* Check that an inode's extent does not have invalid flags or bad ranges. */
xfs_failaddr_t
xfs_bmap_validate_extent(
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *irec)
{
struct xfs_mount *mp = ip->i_mount;
if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
return __this_address;
if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
if (!xfs_verify_rtext(mp, irec->br_startblock,
irec->br_blockcount))
return __this_address;
} else {
if (!xfs_verify_fsbext(mp, irec->br_startblock,
irec->br_blockcount))
return __this_address;
}
if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
return __this_address;
return NULL;
}
int __init
xfs_bmap_intent_init_cache(void)
{
xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent",
sizeof(struct xfs_bmap_intent),
0, 0, NULL);
return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM;
}
void
xfs_bmap_intent_destroy_cache(void)
{
kmem_cache_destroy(xfs_bmap_intent_cache);
xfs_bmap_intent_cache = NULL;
}