xfs: bugfixes for 3.7-rc5
- fix for large transactions spanning multiple iclog buffers - zero the allocation_args structure on the stack before using it to determine whether to use a worker for allocation - move allocation stack switch to xfs_bmapi_allocate in order to prevent deadlock on AGF buffers - growfs no longer reads in garbage for new secondary superblocks - silence a build warning - ensure that invalid buffers never get written to disk while on free list - don't vmap inode cluster buffers during free - fix buffer shutdown reference count mismatch - fix reading of wrapped log data -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (GNU/Linux) iQIcBAABAgAGBQJQm/NqAAoJENaLyazVq6ZOhkgP/2wcLIfJBfdfFV/MBEYYNqdB ufgsl351L6d9xX9WdQNIB3ely7sbZhsh+9uZM1i7LuySXuYC1JC7OvOBByV0UV9W /WWIzonFC+8W0X9EsetUb2Ru+4C40RAodZMRwsdBQQXhSS3A4GoUbv4t/Ty3bEk/ 1v27diGjCKQYBongHhFVjaNjtuXIebPG+yIegkZJseKnd7zRZPo2AgULIxtWPcIT xkH3WP3fVU/zF6ESjF4cGqGMtKaw+a2nk8mfMqHNRIvehBQHZ7CRnBwgVMtQk6fr 0J62GvAFDA10bwz/toUd/8TJNewJKjBhEwfkITsO3EHw8SBp0KvrvrGvfDVew4Wr oOb0+YkI+jtFZkz3WpZqd+Kgryeyxh6j1OBQ09FVLe3SQ2ZKcDVKFOkaPjEqqNuF TUIwenWK4I5zmWeWGkLUJTzzY/ModVKUGTft57HUTyD318H7xFu2fBYj10jVE8dl fl4u8A3ifCXcancROEeCX7AXkY3SFrkvYqCWDWqNDzhyr1WsjEJmkf6F45makaD0 cPBsKYUi3P3pVvbcdHdGEgCWx5X1TzOyhX8pggWHbBRfc8frg9IIfMX4fl5f66vP fmxRHwP+4co1PQdu++LqJ92clz2ja2DYRQzKVJPNQkhRfHOFDEQ8DlxPFrCb8iK0 HHHlp4v+Gyme6YhgD317 =+24f -----END PGP SIGNATURE----- Merge tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs Pull xfs bugfixes from Ben Myers: - fix for large transactions spanning multiple iclog buffers - zero the allocation_args structure on the stack before using it to determine whether to use a worker for allocation - move allocation stack switch to xfs_bmapi_allocate in order to prevent deadlock on AGF buffers - growfs no longer reads in garbage for new secondary superblocks - silence a build warning - ensure that invalid buffers never get written to disk while on free list - don't vmap inode cluster buffers during free - fix buffer shutdown reference count mismatch - fix reading of wrapped log data * tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs: xfs: fix reading of wrapped log data xfs: fix buffer shudown reference count mismatch xfs: don't vmap inode cluster buffers during free xfs: invalidate allocbt blocks moved to the free list xfs: silence uninitialised f.file warning. xfs: growfs: don't read garbage for new secondary superblocks xfs: move allocation stack switch up to xfs_bmapi_allocate xfs: introduce XFS_BMAPI_STACK_SWITCH xfs: zero allocation_args on the kernel stack xfs: only update the last_sync_lsn when a transaction completes
This commit is contained in:
commit
a601e63717
|
@ -1866,6 +1866,7 @@ xfs_alloc_fix_freelist(
|
|||
/*
|
||||
* Initialize the args structure.
|
||||
*/
|
||||
memset(&targs, 0, sizeof(targs));
|
||||
targs.tp = tp;
|
||||
targs.mp = mp;
|
||||
targs.agbp = agbp;
|
||||
|
@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
|
|||
* group or loop over the allocation groups to find the result.
|
||||
*/
|
||||
int /* error */
|
||||
__xfs_alloc_vextent(
|
||||
xfs_alloc_vextent(
|
||||
xfs_alloc_arg_t *args) /* allocation argument structure */
|
||||
{
|
||||
xfs_agblock_t agsize; /* allocation group size */
|
||||
|
@ -2417,46 +2418,6 @@ error0:
|
|||
return error;
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_alloc_vextent_worker(
|
||||
struct work_struct *work)
|
||||
{
|
||||
struct xfs_alloc_arg *args = container_of(work,
|
||||
struct xfs_alloc_arg, work);
|
||||
unsigned long pflags;
|
||||
|
||||
/* we are in a transaction context here */
|
||||
current_set_flags_nested(&pflags, PF_FSTRANS);
|
||||
|
||||
args->result = __xfs_alloc_vextent(args);
|
||||
complete(args->done);
|
||||
|
||||
current_restore_flags_nested(&pflags, PF_FSTRANS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Data allocation requests often come in with little stack to work on. Push
|
||||
* them off to a worker thread so there is lots of stack to use. Metadata
|
||||
* requests, OTOH, are generally from low stack usage paths, so avoid the
|
||||
* context switch overhead here.
|
||||
*/
|
||||
int
|
||||
xfs_alloc_vextent(
|
||||
struct xfs_alloc_arg *args)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
|
||||
if (!args->userdata)
|
||||
return __xfs_alloc_vextent(args);
|
||||
|
||||
|
||||
args->done = &done;
|
||||
INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
|
||||
queue_work(xfs_alloc_wq, &args->work);
|
||||
wait_for_completion(&done);
|
||||
return args->result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an extent.
|
||||
* Just break up the extent address and hand off to xfs_free_ag_extent
|
||||
|
|
|
@ -120,9 +120,6 @@ typedef struct xfs_alloc_arg {
|
|||
char isfl; /* set if is freelist blocks - !acctg */
|
||||
char userdata; /* set if this is user data */
|
||||
xfs_fsblock_t firstblock; /* io first block allocated */
|
||||
struct completion *done;
|
||||
struct work_struct work;
|
||||
int result;
|
||||
} xfs_alloc_arg_t;
|
||||
|
||||
/*
|
||||
|
|
|
@ -121,6 +121,8 @@ xfs_allocbt_free_block(
|
|||
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
|
||||
XFS_EXTENT_BUSY_SKIP_DISCARD);
|
||||
xfs_trans_agbtree_delta(cur->bc_tp, -1);
|
||||
|
||||
xfs_trans_binval(cur->bc_tp, bp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
|
|||
* Normal allocation, done through xfs_alloc_vextent.
|
||||
*/
|
||||
tryagain = isaligned = 0;
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.tp = ap->tp;
|
||||
args.mp = mp;
|
||||
args.fsbno = ap->blkno;
|
||||
|
@ -3082,6 +3083,7 @@ xfs_bmap_extents_to_btree(
|
|||
* Convert to a btree with two levels, one record in root.
|
||||
*/
|
||||
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.tp = tp;
|
||||
args.mp = mp;
|
||||
args.firstblock = *firstblock;
|
||||
|
@ -3237,6 +3239,7 @@ xfs_bmap_local_to_extents(
|
|||
xfs_buf_t *bp; /* buffer for extent block */
|
||||
xfs_bmbt_rec_host_t *ep;/* extent record pointer */
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.tp = tp;
|
||||
args.mp = ip->i_mount;
|
||||
args.firstblock = *firstblock;
|
||||
|
@ -4616,12 +4619,11 @@ xfs_bmapi_delay(
|
|||
|
||||
|
||||
STATIC int
|
||||
xfs_bmapi_allocate(
|
||||
struct xfs_bmalloca *bma,
|
||||
int flags)
|
||||
__xfs_bmapi_allocate(
|
||||
struct xfs_bmalloca *bma)
|
||||
{
|
||||
struct xfs_mount *mp = bma->ip->i_mount;
|
||||
int whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
|
||||
int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
|
||||
XFS_ATTR_FORK : XFS_DATA_FORK;
|
||||
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
|
||||
int tmp_logflags = 0;
|
||||
|
@ -4654,24 +4656,27 @@ xfs_bmapi_allocate(
|
|||
* Indicate if this is the first user data in the file, or just any
|
||||
* user data.
|
||||
*/
|
||||
if (!(flags & XFS_BMAPI_METADATA)) {
|
||||
if (!(bma->flags & XFS_BMAPI_METADATA)) {
|
||||
bma->userdata = (bma->offset == 0) ?
|
||||
XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
|
||||
}
|
||||
|
||||
bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
|
||||
bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
|
||||
|
||||
/*
|
||||
* Only want to do the alignment at the eof if it is userdata and
|
||||
* allocation length is larger than a stripe unit.
|
||||
*/
|
||||
if (mp->m_dalign && bma->length >= mp->m_dalign &&
|
||||
!(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
|
||||
!(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
|
||||
error = xfs_bmap_isaeof(bma, whichfork);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
if (bma->flags & XFS_BMAPI_STACK_SWITCH)
|
||||
bma->stack_switch = 1;
|
||||
|
||||
error = xfs_bmap_alloc(bma);
|
||||
if (error)
|
||||
return error;
|
||||
|
@ -4706,7 +4711,7 @@ xfs_bmapi_allocate(
|
|||
* A wasdelay extent has been initialized, so shouldn't be flagged
|
||||
* as unwritten.
|
||||
*/
|
||||
if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) &&
|
||||
if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
|
||||
xfs_sb_version_hasextflgbit(&mp->m_sb))
|
||||
bma->got.br_state = XFS_EXT_UNWRITTEN;
|
||||
|
||||
|
@ -4734,6 +4739,45 @@ xfs_bmapi_allocate(
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_bmapi_allocate_worker(
|
||||
struct work_struct *work)
|
||||
{
|
||||
struct xfs_bmalloca *args = container_of(work,
|
||||
struct xfs_bmalloca, work);
|
||||
unsigned long pflags;
|
||||
|
||||
/* we are in a transaction context here */
|
||||
current_set_flags_nested(&pflags, PF_FSTRANS);
|
||||
|
||||
args->result = __xfs_bmapi_allocate(args);
|
||||
complete(args->done);
|
||||
|
||||
current_restore_flags_nested(&pflags, PF_FSTRANS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Some allocation requests often come in with little stack to work on. Push
|
||||
* them off to a worker thread so there is lots of stack to use. Otherwise just
|
||||
* call directly to avoid the context switch overhead here.
|
||||
*/
|
||||
int
|
||||
xfs_bmapi_allocate(
|
||||
struct xfs_bmalloca *args)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
|
||||
if (!args->stack_switch)
|
||||
return __xfs_bmapi_allocate(args);
|
||||
|
||||
|
||||
args->done = &done;
|
||||
INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
|
||||
queue_work(xfs_alloc_wq, &args->work);
|
||||
wait_for_completion(&done);
|
||||
return args->result;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_bmapi_convert_unwritten(
|
||||
struct xfs_bmalloca *bma,
|
||||
|
@ -4919,6 +4963,7 @@ xfs_bmapi_write(
|
|||
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
|
||||
bma.wasdel = wasdelay;
|
||||
bma.offset = bno;
|
||||
bma.flags = flags;
|
||||
|
||||
/*
|
||||
* There's a 32/64 bit type mismatch between the
|
||||
|
@ -4934,7 +4979,7 @@ xfs_bmapi_write(
|
|||
|
||||
ASSERT(len > 0);
|
||||
ASSERT(bma.length > 0);
|
||||
error = xfs_bmapi_allocate(&bma, flags);
|
||||
error = xfs_bmapi_allocate(&bma);
|
||||
if (error)
|
||||
goto error0;
|
||||
if (bma.blkno == NULLFSBLOCK)
|
||||
|
|
|
@ -77,6 +77,7 @@ typedef struct xfs_bmap_free
|
|||
* from written to unwritten, otherwise convert from unwritten to written.
|
||||
*/
|
||||
#define XFS_BMAPI_CONVERT 0x040
|
||||
#define XFS_BMAPI_STACK_SWITCH 0x080
|
||||
|
||||
#define XFS_BMAPI_FLAGS \
|
||||
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
|
||||
|
@ -85,7 +86,8 @@ typedef struct xfs_bmap_free
|
|||
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
|
||||
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
|
||||
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
|
||||
{ XFS_BMAPI_CONVERT, "CONVERT" }
|
||||
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
|
||||
{ XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
|
||||
|
||||
|
||||
static inline int xfs_bmapi_aflag(int w)
|
||||
|
@ -133,6 +135,11 @@ typedef struct xfs_bmalloca {
|
|||
char userdata;/* set if is user data */
|
||||
char aeof; /* allocated space at eof */
|
||||
char conv; /* overwriting unwritten extents */
|
||||
char stack_switch;
|
||||
int flags;
|
||||
struct completion *done;
|
||||
struct work_struct work;
|
||||
int result;
|
||||
} xfs_bmalloca_t;
|
||||
|
||||
/*
|
||||
|
|
|
@ -526,7 +526,25 @@ xfs_buf_item_unpin(
|
|||
}
|
||||
xfs_buf_relse(bp);
|
||||
} else if (freed && remove) {
|
||||
/*
|
||||
* There are currently two references to the buffer - the active
|
||||
* LRU reference and the buf log item. What we are about to do
|
||||
* here - simulate a failed IO completion - requires 3
|
||||
* references.
|
||||
*
|
||||
* The LRU reference is removed by the xfs_buf_stale() call. The
|
||||
* buf item reference is removed by the xfs_buf_iodone()
|
||||
* callback that is run by xfs_buf_do_callbacks() during ioend
|
||||
* processing (via the bp->b_iodone callback), and then finally
|
||||
* the ioend processing will drop the IO reference if the buffer
|
||||
* is marked XBF_ASYNC.
|
||||
*
|
||||
* Hence we need to take an additional reference here so that IO
|
||||
* completion processing doesn't free the buffer prematurely.
|
||||
*/
|
||||
xfs_buf_lock(bp);
|
||||
xfs_buf_hold(bp);
|
||||
bp->b_flags |= XBF_ASYNC;
|
||||
xfs_buf_ioerror(bp, EIO);
|
||||
XFS_BUF_UNDONE(bp);
|
||||
xfs_buf_stale(bp);
|
||||
|
|
|
@ -399,9 +399,26 @@ xfs_growfs_data_private(
|
|||
|
||||
/* update secondary superblocks. */
|
||||
for (agno = 1; agno < nagcount; agno++) {
|
||||
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
|
||||
error = 0;
|
||||
/*
|
||||
* new secondary superblocks need to be zeroed, not read from
|
||||
* disk as the contents of the new area we are growing into is
|
||||
* completely unknown.
|
||||
*/
|
||||
if (agno < oagcount) {
|
||||
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
|
||||
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
|
||||
XFS_FSS_TO_BB(mp, 1), 0, &bp);
|
||||
} else {
|
||||
bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
|
||||
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
|
||||
XFS_FSS_TO_BB(mp, 1), 0);
|
||||
if (bp)
|
||||
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
|
||||
else
|
||||
error = ENOMEM;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
xfs_warn(mp,
|
||||
"error %d reading secondary superblock for ag %d",
|
||||
|
@ -423,7 +440,7 @@ xfs_growfs_data_private(
|
|||
break; /* no point in continuing */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return error;
|
||||
|
||||
error0:
|
||||
xfs_trans_cancel(tp, XFS_TRANS_ABORT);
|
||||
|
|
|
@ -250,6 +250,7 @@ xfs_ialloc_ag_alloc(
|
|||
/* boundary */
|
||||
struct xfs_perag *pag;
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.tp = tp;
|
||||
args.mp = tp->t_mountp;
|
||||
|
||||
|
|
|
@ -1509,7 +1509,8 @@ xfs_ifree_cluster(
|
|||
* to mark all the active inodes on the buffer stale.
|
||||
*/
|
||||
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
|
||||
mp->m_bsize * blks_per_cluster, 0);
|
||||
mp->m_bsize * blks_per_cluster,
|
||||
XBF_UNMAPPED);
|
||||
|
||||
if (!bp)
|
||||
return ENOMEM;
|
||||
|
|
|
@ -70,7 +70,7 @@ xfs_find_handle(
|
|||
int hsize;
|
||||
xfs_handle_t handle;
|
||||
struct inode *inode;
|
||||
struct fd f;
|
||||
struct fd f = {0};
|
||||
struct path path;
|
||||
int error;
|
||||
struct xfs_inode *ip;
|
||||
|
|
|
@ -584,7 +584,9 @@ xfs_iomap_write_allocate(
|
|||
* pointer that the caller gave to us.
|
||||
*/
|
||||
error = xfs_bmapi_write(tp, ip, map_start_fsb,
|
||||
count_fsb, 0, &first_block, 1,
|
||||
count_fsb,
|
||||
XFS_BMAPI_STACK_SWITCH,
|
||||
&first_block, 1,
|
||||
imap, &nimaps, &free_list);
|
||||
if (error)
|
||||
goto trans_cancel;
|
||||
|
|
|
@ -2387,14 +2387,27 @@ xlog_state_do_callback(
|
|||
|
||||
|
||||
/*
|
||||
* update the last_sync_lsn before we drop the
|
||||
* Completion of a iclog IO does not imply that
|
||||
* a transaction has completed, as transactions
|
||||
* can be large enough to span many iclogs. We
|
||||
* cannot change the tail of the log half way
|
||||
* through a transaction as this may be the only
|
||||
* transaction in the log and moving th etail to
|
||||
* point to the middle of it will prevent
|
||||
* recovery from finding the start of the
|
||||
* transaction. Hence we should only update the
|
||||
* last_sync_lsn if this iclog contains
|
||||
* transaction completion callbacks on it.
|
||||
*
|
||||
* We have to do this before we drop the
|
||||
* icloglock to ensure we are the only one that
|
||||
* can update it.
|
||||
*/
|
||||
ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
|
||||
be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
|
||||
atomic64_set(&log->l_last_sync_lsn,
|
||||
be64_to_cpu(iclog->ic_header.h_lsn));
|
||||
if (iclog->ic_callback)
|
||||
atomic64_set(&log->l_last_sync_lsn,
|
||||
be64_to_cpu(iclog->ic_header.h_lsn));
|
||||
|
||||
} else
|
||||
ioerrors++;
|
||||
|
|
|
@ -3541,7 +3541,7 @@ xlog_do_recovery_pass(
|
|||
* - order is important.
|
||||
*/
|
||||
error = xlog_bread_offset(log, 0,
|
||||
bblks - split_bblks, hbp,
|
||||
bblks - split_bblks, dbp,
|
||||
offset + BBTOB(split_bblks));
|
||||
if (error)
|
||||
goto bread_err2;
|
||||
|
|
Loading…
Reference in New Issue