xfs: bugfixes for 3.7-rc5

- fix for large transactions spanning multiple iclog buffers
 - zero the allocation_args structure on the stack before using it
   to determine whether to use a worker for allocation
 - move allocation stack switch to xfs_bmapi_allocate in order
   to prevent deadlock on AGF buffers
 - growfs no longer reads in garbage for new secondary superblocks
 - silence a build warning
 - ensure that invalid buffers never get written to disk while on
   free list
 - don't vmap inode cluster buffers during free
 - fix buffer shutdown reference count mismatch
 - fix reading of wrapped log data
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.10 (GNU/Linux)
 
 iQIcBAABAgAGBQJQm/NqAAoJENaLyazVq6ZOhkgP/2wcLIfJBfdfFV/MBEYYNqdB
 ufgsl351L6d9xX9WdQNIB3ely7sbZhsh+9uZM1i7LuySXuYC1JC7OvOBByV0UV9W
 /WWIzonFC+8W0X9EsetUb2Ru+4C40RAodZMRwsdBQQXhSS3A4GoUbv4t/Ty3bEk/
 1v27diGjCKQYBongHhFVjaNjtuXIebPG+yIegkZJseKnd7zRZPo2AgULIxtWPcIT
 xkH3WP3fVU/zF6ESjF4cGqGMtKaw+a2nk8mfMqHNRIvehBQHZ7CRnBwgVMtQk6fr
 0J62GvAFDA10bwz/toUd/8TJNewJKjBhEwfkITsO3EHw8SBp0KvrvrGvfDVew4Wr
 oOb0+YkI+jtFZkz3WpZqd+Kgryeyxh6j1OBQ09FVLe3SQ2ZKcDVKFOkaPjEqqNuF
 TUIwenWK4I5zmWeWGkLUJTzzY/ModVKUGTft57HUTyD318H7xFu2fBYj10jVE8dl
 fl4u8A3ifCXcancROEeCX7AXkY3SFrkvYqCWDWqNDzhyr1WsjEJmkf6F45makaD0
 cPBsKYUi3P3pVvbcdHdGEgCWx5X1TzOyhX8pggWHbBRfc8frg9IIfMX4fl5f66vP
 fmxRHwP+4co1PQdu++LqJ92clz2ja2DYRQzKVJPNQkhRfHOFDEQ8DlxPFrCb8iK0
 HHHlp4v+Gyme6YhgD317
 =+24f
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs

Pull xfs bugfixes from Ben Myers:

 - fix for large transactions spanning multiple iclog buffers

 - zero the allocation_args structure on the stack before using it to
   determine whether to use a worker for allocation
 - move allocation stack switch to xfs_bmapi_allocate in order to
   prevent deadlock on AGF buffers

 - growfs no longer reads in garbage for new secondary superblocks

 - silence a build warning

 - ensure that invalid buffers never get written to disk while on free
   list

 - don't vmap inode cluster buffers during free

 - fix buffer shutdown reference count mismatch

 - fix reading of wrapped log data

* tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs:
  xfs: fix reading of wrapped log data
  xfs: fix buffer shudown reference count mismatch
  xfs: don't vmap inode cluster buffers during free
  xfs: invalidate allocbt blocks moved to the free list
  xfs: silence uninitialised f.file warning.
  xfs: growfs: don't read garbage for new secondary superblocks
  xfs: move allocation stack switch up to xfs_bmapi_allocate
  xfs: introduce XFS_BMAPI_STACK_SWITCH
  xfs: zero allocation_args on the kernel stack
  xfs: only update the last_sync_lsn when a transaction completes
This commit is contained in:
Linus Torvalds 2012-11-09 06:42:51 +01:00
commit a601e63717
13 changed files with 127 additions and 63 deletions

View File

@ -1866,6 +1866,7 @@ xfs_alloc_fix_freelist(
/* /*
* Initialize the args structure. * Initialize the args structure.
*/ */
memset(&targs, 0, sizeof(targs));
targs.tp = tp; targs.tp = tp;
targs.mp = mp; targs.mp = mp;
targs.agbp = agbp; targs.agbp = agbp;
@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
* group or loop over the allocation groups to find the result. * group or loop over the allocation groups to find the result.
*/ */
int /* error */ int /* error */
__xfs_alloc_vextent( xfs_alloc_vextent(
xfs_alloc_arg_t *args) /* allocation argument structure */ xfs_alloc_arg_t *args) /* allocation argument structure */
{ {
xfs_agblock_t agsize; /* allocation group size */ xfs_agblock_t agsize; /* allocation group size */
@ -2417,46 +2418,6 @@ error0:
return error; return error;
} }
static void
xfs_alloc_vextent_worker(
struct work_struct *work)
{
struct xfs_alloc_arg *args = container_of(work,
struct xfs_alloc_arg, work);
unsigned long pflags;
/* we are in a transaction context here */
current_set_flags_nested(&pflags, PF_FSTRANS);
args->result = __xfs_alloc_vextent(args);
complete(args->done);
current_restore_flags_nested(&pflags, PF_FSTRANS);
}
/*
* Data allocation requests often come in with little stack to work on. Push
* them off to a worker thread so there is lots of stack to use. Metadata
* requests, OTOH, are generally from low stack usage paths, so avoid the
* context switch overhead here.
*/
int
xfs_alloc_vextent(
struct xfs_alloc_arg *args)
{
DECLARE_COMPLETION_ONSTACK(done);
if (!args->userdata)
return __xfs_alloc_vextent(args);
args->done = &done;
INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
queue_work(xfs_alloc_wq, &args->work);
wait_for_completion(&done);
return args->result;
}
/* /*
* Free an extent. * Free an extent.
* Just break up the extent address and hand off to xfs_free_ag_extent * Just break up the extent address and hand off to xfs_free_ag_extent

View File

@ -120,9 +120,6 @@ typedef struct xfs_alloc_arg {
char isfl; /* set if is freelist blocks - !acctg */ char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */ char userdata; /* set if this is user data */
xfs_fsblock_t firstblock; /* io first block allocated */ xfs_fsblock_t firstblock; /* io first block allocated */
struct completion *done;
struct work_struct work;
int result;
} xfs_alloc_arg_t; } xfs_alloc_arg_t;
/* /*

View File

@ -121,6 +121,8 @@ xfs_allocbt_free_block(
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD); XFS_EXTENT_BUSY_SKIP_DISCARD);
xfs_trans_agbtree_delta(cur->bc_tp, -1); xfs_trans_agbtree_delta(cur->bc_tp, -1);
xfs_trans_binval(cur->bc_tp, bp);
return 0; return 0;
} }

View File

@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
* Normal allocation, done through xfs_alloc_vextent. * Normal allocation, done through xfs_alloc_vextent.
*/ */
tryagain = isaligned = 0; tryagain = isaligned = 0;
memset(&args, 0, sizeof(args));
args.tp = ap->tp; args.tp = ap->tp;
args.mp = mp; args.mp = mp;
args.fsbno = ap->blkno; args.fsbno = ap->blkno;
@ -3082,6 +3083,7 @@ xfs_bmap_extents_to_btree(
* Convert to a btree with two levels, one record in root. * Convert to a btree with two levels, one record in root.
*/ */
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
memset(&args, 0, sizeof(args));
args.tp = tp; args.tp = tp;
args.mp = mp; args.mp = mp;
args.firstblock = *firstblock; args.firstblock = *firstblock;
@ -3237,6 +3239,7 @@ xfs_bmap_local_to_extents(
xfs_buf_t *bp; /* buffer for extent block */ xfs_buf_t *bp; /* buffer for extent block */
xfs_bmbt_rec_host_t *ep;/* extent record pointer */ xfs_bmbt_rec_host_t *ep;/* extent record pointer */
memset(&args, 0, sizeof(args));
args.tp = tp; args.tp = tp;
args.mp = ip->i_mount; args.mp = ip->i_mount;
args.firstblock = *firstblock; args.firstblock = *firstblock;
@ -4616,12 +4619,11 @@ xfs_bmapi_delay(
STATIC int STATIC int
xfs_bmapi_allocate( __xfs_bmapi_allocate(
struct xfs_bmalloca *bma, struct xfs_bmalloca *bma)
int flags)
{ {
struct xfs_mount *mp = bma->ip->i_mount; struct xfs_mount *mp = bma->ip->i_mount;
int whichfork = (flags & XFS_BMAPI_ATTRFORK) ? int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
XFS_ATTR_FORK : XFS_DATA_FORK; XFS_ATTR_FORK : XFS_DATA_FORK;
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
int tmp_logflags = 0; int tmp_logflags = 0;
@ -4654,24 +4656,27 @@ xfs_bmapi_allocate(
* Indicate if this is the first user data in the file, or just any * Indicate if this is the first user data in the file, or just any
* user data. * user data.
*/ */
if (!(flags & XFS_BMAPI_METADATA)) { if (!(bma->flags & XFS_BMAPI_METADATA)) {
bma->userdata = (bma->offset == 0) ? bma->userdata = (bma->offset == 0) ?
XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
} }
bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1; bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
/* /*
* Only want to do the alignment at the eof if it is userdata and * Only want to do the alignment at the eof if it is userdata and
* allocation length is larger than a stripe unit. * allocation length is larger than a stripe unit.
*/ */
if (mp->m_dalign && bma->length >= mp->m_dalign && if (mp->m_dalign && bma->length >= mp->m_dalign &&
!(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
error = xfs_bmap_isaeof(bma, whichfork); error = xfs_bmap_isaeof(bma, whichfork);
if (error) if (error)
return error; return error;
} }
if (bma->flags & XFS_BMAPI_STACK_SWITCH)
bma->stack_switch = 1;
error = xfs_bmap_alloc(bma); error = xfs_bmap_alloc(bma);
if (error) if (error)
return error; return error;
@ -4706,7 +4711,7 @@ xfs_bmapi_allocate(
* A wasdelay extent has been initialized, so shouldn't be flagged * A wasdelay extent has been initialized, so shouldn't be flagged
* as unwritten. * as unwritten.
*/ */
if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) && if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
xfs_sb_version_hasextflgbit(&mp->m_sb)) xfs_sb_version_hasextflgbit(&mp->m_sb))
bma->got.br_state = XFS_EXT_UNWRITTEN; bma->got.br_state = XFS_EXT_UNWRITTEN;
@ -4734,6 +4739,45 @@ xfs_bmapi_allocate(
return 0; return 0;
} }
static void
xfs_bmapi_allocate_worker(
struct work_struct *work)
{
struct xfs_bmalloca *args = container_of(work,
struct xfs_bmalloca, work);
unsigned long pflags;
/* we are in a transaction context here */
current_set_flags_nested(&pflags, PF_FSTRANS);
args->result = __xfs_bmapi_allocate(args);
complete(args->done);
current_restore_flags_nested(&pflags, PF_FSTRANS);
}
/*
* Some allocation requests often come in with little stack to work on. Push
* them off to a worker thread so there is lots of stack to use. Otherwise just
* call directly to avoid the context switch overhead here.
*/
int
xfs_bmapi_allocate(
struct xfs_bmalloca *args)
{
DECLARE_COMPLETION_ONSTACK(done);
if (!args->stack_switch)
return __xfs_bmapi_allocate(args);
args->done = &done;
INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
queue_work(xfs_alloc_wq, &args->work);
wait_for_completion(&done);
return args->result;
}
STATIC int STATIC int
xfs_bmapi_convert_unwritten( xfs_bmapi_convert_unwritten(
struct xfs_bmalloca *bma, struct xfs_bmalloca *bma,
@ -4919,6 +4963,7 @@ xfs_bmapi_write(
bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.conv = !!(flags & XFS_BMAPI_CONVERT);
bma.wasdel = wasdelay; bma.wasdel = wasdelay;
bma.offset = bno; bma.offset = bno;
bma.flags = flags;
/* /*
* There's a 32/64 bit type mismatch between the * There's a 32/64 bit type mismatch between the
@ -4934,7 +4979,7 @@ xfs_bmapi_write(
ASSERT(len > 0); ASSERT(len > 0);
ASSERT(bma.length > 0); ASSERT(bma.length > 0);
error = xfs_bmapi_allocate(&bma, flags); error = xfs_bmapi_allocate(&bma);
if (error) if (error)
goto error0; goto error0;
if (bma.blkno == NULLFSBLOCK) if (bma.blkno == NULLFSBLOCK)

View File

@ -77,6 +77,7 @@ typedef struct xfs_bmap_free
* from written to unwritten, otherwise convert from unwritten to written. * from written to unwritten, otherwise convert from unwritten to written.
*/ */
#define XFS_BMAPI_CONVERT 0x040 #define XFS_BMAPI_CONVERT 0x040
#define XFS_BMAPI_STACK_SWITCH 0x080
#define XFS_BMAPI_FLAGS \ #define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
@ -85,7 +86,8 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \
{ XFS_BMAPI_CONVERT, "CONVERT" } { XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
static inline int xfs_bmapi_aflag(int w) static inline int xfs_bmapi_aflag(int w)
@ -133,6 +135,11 @@ typedef struct xfs_bmalloca {
char userdata;/* set if is user data */ char userdata;/* set if is user data */
char aeof; /* allocated space at eof */ char aeof; /* allocated space at eof */
char conv; /* overwriting unwritten extents */ char conv; /* overwriting unwritten extents */
char stack_switch;
int flags;
struct completion *done;
struct work_struct work;
int result;
} xfs_bmalloca_t; } xfs_bmalloca_t;
/* /*

View File

@ -526,7 +526,25 @@ xfs_buf_item_unpin(
} }
xfs_buf_relse(bp); xfs_buf_relse(bp);
} else if (freed && remove) { } else if (freed && remove) {
/*
* There are currently two references to the buffer - the active
* LRU reference and the buf log item. What we are about to do
* here - simulate a failed IO completion - requires 3
* references.
*
* The LRU reference is removed by the xfs_buf_stale() call. The
* buf item reference is removed by the xfs_buf_iodone()
* callback that is run by xfs_buf_do_callbacks() during ioend
* processing (via the bp->b_iodone callback), and then finally
* the ioend processing will drop the IO reference if the buffer
* is marked XBF_ASYNC.
*
* Hence we need to take an additional reference here so that IO
* completion processing doesn't free the buffer prematurely.
*/
xfs_buf_lock(bp); xfs_buf_lock(bp);
xfs_buf_hold(bp);
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioerror(bp, EIO); xfs_buf_ioerror(bp, EIO);
XFS_BUF_UNDONE(bp); XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp); xfs_buf_stale(bp);

View File

@ -399,9 +399,26 @@ xfs_growfs_data_private(
/* update secondary superblocks. */ /* update secondary superblocks. */
for (agno = 1; agno < nagcount; agno++) { for (agno = 1; agno < nagcount; agno++) {
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, error = 0;
/*
* new secondary superblocks need to be zeroed, not read from
* disk as the contents of the new area we are growing into is
* completely unknown.
*/
if (agno < oagcount) {
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp); XFS_FSS_TO_BB(mp, 1), 0, &bp);
} else {
bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0);
if (bp)
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
else
error = ENOMEM;
}
if (error) { if (error) {
xfs_warn(mp, xfs_warn(mp,
"error %d reading secondary superblock for ag %d", "error %d reading secondary superblock for ag %d",
@ -423,7 +440,7 @@ xfs_growfs_data_private(
break; /* no point in continuing */ break; /* no point in continuing */
} }
} }
return 0; return error;
error0: error0:
xfs_trans_cancel(tp, XFS_TRANS_ABORT); xfs_trans_cancel(tp, XFS_TRANS_ABORT);

View File

@ -250,6 +250,7 @@ xfs_ialloc_ag_alloc(
/* boundary */ /* boundary */
struct xfs_perag *pag; struct xfs_perag *pag;
memset(&args, 0, sizeof(args));
args.tp = tp; args.tp = tp;
args.mp = tp->t_mountp; args.mp = tp->t_mountp;

View File

@ -1509,7 +1509,8 @@ xfs_ifree_cluster(
* to mark all the active inodes on the buffer stale. * to mark all the active inodes on the buffer stale.
*/ */
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
mp->m_bsize * blks_per_cluster, 0); mp->m_bsize * blks_per_cluster,
XBF_UNMAPPED);
if (!bp) if (!bp)
return ENOMEM; return ENOMEM;

View File

@ -70,7 +70,7 @@ xfs_find_handle(
int hsize; int hsize;
xfs_handle_t handle; xfs_handle_t handle;
struct inode *inode; struct inode *inode;
struct fd f; struct fd f = {0};
struct path path; struct path path;
int error; int error;
struct xfs_inode *ip; struct xfs_inode *ip;

View File

@ -584,7 +584,9 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us. * pointer that the caller gave to us.
*/ */
error = xfs_bmapi_write(tp, ip, map_start_fsb, error = xfs_bmapi_write(tp, ip, map_start_fsb,
count_fsb, 0, &first_block, 1, count_fsb,
XFS_BMAPI_STACK_SWITCH,
&first_block, 1,
imap, &nimaps, &free_list); imap, &nimaps, &free_list);
if (error) if (error)
goto trans_cancel; goto trans_cancel;

View File

@ -2387,14 +2387,27 @@ xlog_state_do_callback(
/* /*
* update the last_sync_lsn before we drop the * Completion of a iclog IO does not imply that
* a transaction has completed, as transactions
* can be large enough to span many iclogs. We
* cannot change the tail of the log half way
* through a transaction as this may be the only
* transaction in the log and moving th etail to
* point to the middle of it will prevent
* recovery from finding the start of the
* transaction. Hence we should only update the
* last_sync_lsn if this iclog contains
* transaction completion callbacks on it.
*
* We have to do this before we drop the
* icloglock to ensure we are the only one that * icloglock to ensure we are the only one that
* can update it. * can update it.
*/ */
ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
atomic64_set(&log->l_last_sync_lsn, if (iclog->ic_callback)
be64_to_cpu(iclog->ic_header.h_lsn)); atomic64_set(&log->l_last_sync_lsn,
be64_to_cpu(iclog->ic_header.h_lsn));
} else } else
ioerrors++; ioerrors++;

View File

@ -3541,7 +3541,7 @@ xlog_do_recovery_pass(
* - order is important. * - order is important.
*/ */
error = xlog_bread_offset(log, 0, error = xlog_bread_offset(log, 0,
bblks - split_bblks, hbp, bblks - split_bblks, dbp,
offset + BBTOB(split_bblks)); offset + BBTOB(split_bblks));
if (error) if (error)
goto bread_err2; goto bread_err2;