Merge branch '2.6.36-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/xfsdev
* '2.6.36-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/xfsdev: xfs: do not discard page cache data on EAGAIN xfs: don't do memory allocation under the CIL context lock xfs: Reduce log force overhead for delayed logging xfs: dummy transactions should not dirty VFS state xfs: ensure f_ffree returned by statfs() is non-negative xfs: handle negative wbc->nr_to_write during sync writeback writeback: write_cache_pages doesn't terminate at nr_to_write <= 0 xfs: fix untrusted inode number lookup xfs: ensure we mark all inodes in a freed cluster XFS_ISTALE xfs: unlock items before allowing the CIL to commit
This commit is contained in:
commit
871eae4891
|
@ -852,8 +852,8 @@ xfs_convert_page(
|
|||
SetPageUptodate(page);
|
||||
|
||||
if (count) {
|
||||
wbc->nr_to_write--;
|
||||
if (wbc->nr_to_write <= 0)
|
||||
if (--wbc->nr_to_write <= 0 &&
|
||||
wbc->sync_mode == WB_SYNC_NONE)
|
||||
done = 1;
|
||||
}
|
||||
xfs_start_page_writeback(page, !page_dirty, count);
|
||||
|
@ -1068,7 +1068,7 @@ xfs_vm_writepage(
|
|||
* by themselves.
|
||||
*/
|
||||
if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
|
||||
goto out_fail;
|
||||
goto redirty;
|
||||
|
||||
/*
|
||||
* We need a transaction if there are delalloc or unwritten buffers
|
||||
|
@ -1080,7 +1080,7 @@ xfs_vm_writepage(
|
|||
*/
|
||||
xfs_count_page_state(page, &delalloc, &unwritten);
|
||||
if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
|
||||
goto out_fail;
|
||||
goto redirty;
|
||||
|
||||
/* Is this page beyond the end of the file? */
|
||||
offset = i_size_read(inode);
|
||||
|
@ -1245,12 +1245,15 @@ error:
|
|||
if (iohead)
|
||||
xfs_cancel_ioend(iohead);
|
||||
|
||||
if (err == -EAGAIN)
|
||||
goto redirty;
|
||||
|
||||
xfs_aops_discard_page(page);
|
||||
ClearPageUptodate(page);
|
||||
unlock_page(page);
|
||||
return err;
|
||||
|
||||
out_fail:
|
||||
redirty:
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
unlock_page(page);
|
||||
return 0;
|
||||
|
|
|
@ -1226,6 +1226,7 @@ xfs_fs_statfs(
|
|||
struct xfs_inode *ip = XFS_I(dentry->d_inode);
|
||||
__uint64_t fakeinos, id;
|
||||
xfs_extlen_t lsize;
|
||||
__int64_t ffree;
|
||||
|
||||
statp->f_type = XFS_SB_MAGIC;
|
||||
statp->f_namelen = MAXNAMELEN - 1;
|
||||
|
@ -1249,7 +1250,11 @@ xfs_fs_statfs(
|
|||
statp->f_files = min_t(typeof(statp->f_files),
|
||||
statp->f_files,
|
||||
mp->m_maxicount);
|
||||
statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
|
||||
|
||||
/* make sure statp->f_ffree does not underflow */
|
||||
ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
|
||||
statp->f_ffree = max_t(__int64_t, ffree, 0);
|
||||
|
||||
spin_unlock(&mp->m_sb_lock);
|
||||
|
||||
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
|
||||
|
@ -1402,7 +1407,7 @@ xfs_fs_freeze(
|
|||
|
||||
xfs_save_resvblks(mp);
|
||||
xfs_quiesce_attr(mp);
|
||||
return -xfs_fs_log_dummy(mp);
|
||||
return -xfs_fs_log_dummy(mp, SYNC_WAIT);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "xfs_inode_item.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_fsops.h"
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
|
@ -340,38 +341,6 @@ xfs_sync_attr(
|
|||
XFS_ICI_NO_TAG, 0, NULL);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_commit_dummy_trans(
|
||||
struct xfs_mount *mp,
|
||||
uint flags)
|
||||
{
|
||||
struct xfs_inode *ip = mp->m_rootip;
|
||||
struct xfs_trans *tp;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Put a dummy transaction in the log to tell recovery
|
||||
* that all others are OK.
|
||||
*/
|
||||
tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
|
||||
error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
|
||||
if (error) {
|
||||
xfs_trans_cancel(tp, 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
xfs_trans_ijoin(tp, ip);
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
error = xfs_trans_commit(tp, 0);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
/* the log force ensures this transaction is pushed to disk */
|
||||
xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_sync_fsdata(
|
||||
struct xfs_mount *mp)
|
||||
|
@ -432,7 +401,7 @@ xfs_quiesce_data(
|
|||
|
||||
/* mark the log as covered if needed */
|
||||
if (xfs_log_need_covered(mp))
|
||||
error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
|
||||
error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
|
||||
|
||||
/* flush data-only devices */
|
||||
if (mp->m_rtdev_targp)
|
||||
|
@ -563,7 +532,7 @@ xfs_flush_inodes(
|
|||
/*
|
||||
* Every sync period we need to unpin all items, reclaim inodes and sync
|
||||
* disk quotas. We might need to cover the log to indicate that the
|
||||
* filesystem is idle.
|
||||
* filesystem is idle and not frozen.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_sync_worker(
|
||||
|
@ -577,8 +546,9 @@ xfs_sync_worker(
|
|||
xfs_reclaim_inodes(mp, 0);
|
||||
/* dgc: errors ignored here */
|
||||
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
|
||||
if (xfs_log_need_covered(mp))
|
||||
error = xfs_commit_dummy_trans(mp, 0);
|
||||
if (mp->m_super->s_frozen == SB_UNFROZEN &&
|
||||
xfs_log_need_covered(mp))
|
||||
error = xfs_fs_log_dummy(mp, 0);
|
||||
}
|
||||
mp->m_sync_seq++;
|
||||
wake_up(&mp->m_wait_single_sync_task);
|
||||
|
|
|
@ -604,31 +604,36 @@ out:
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump a transaction into the log that contains no real change. This is needed
|
||||
* to be able to make the log dirty or stamp the current tail LSN into the log
|
||||
* during the covering operation.
|
||||
*
|
||||
* We cannot use an inode here for this - that will push dirty state back up
|
||||
* into the VFS and then periodic inode flushing will prevent log covering from
|
||||
* making progress. Hence we log a field in the superblock instead.
|
||||
*/
|
||||
int
|
||||
xfs_fs_log_dummy(
|
||||
xfs_mount_t *mp)
|
||||
xfs_mount_t *mp,
|
||||
int flags)
|
||||
{
|
||||
xfs_trans_t *tp;
|
||||
xfs_inode_t *ip;
|
||||
int error;
|
||||
|
||||
tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
|
||||
error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
|
||||
error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
|
||||
XFS_DEFAULT_LOG_COUNT);
|
||||
if (error) {
|
||||
xfs_trans_cancel(tp, 0);
|
||||
return error;
|
||||
}
|
||||
|
||||
ip = mp->m_rootip;
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
xfs_trans_ijoin(tp, ip);
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
xfs_trans_set_sync(tp);
|
||||
error = xfs_trans_commit(tp, 0);
|
||||
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
return error;
|
||||
/* log the UUID because it is an unchanging field */
|
||||
xfs_mod_sb(tp, XFS_SB_UUID);
|
||||
if (flags & SYNC_WAIT)
|
||||
xfs_trans_set_sync(tp);
|
||||
return xfs_trans_commit(tp, 0);
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
|
|||
extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
|
||||
xfs_fsop_resblks_t *outval);
|
||||
extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
|
||||
extern int xfs_fs_log_dummy(xfs_mount_t *mp);
|
||||
extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags);
|
||||
|
||||
#endif /* __XFS_FSOPS_H__ */
|
||||
|
|
|
@ -1213,7 +1213,6 @@ xfs_imap_lookup(
|
|||
struct xfs_inobt_rec_incore rec;
|
||||
struct xfs_btree_cur *cur;
|
||||
struct xfs_buf *agbp;
|
||||
xfs_agino_t startino;
|
||||
int error;
|
||||
int i;
|
||||
|
||||
|
@ -1227,13 +1226,13 @@ xfs_imap_lookup(
|
|||
}
|
||||
|
||||
/*
|
||||
* derive and lookup the exact inode record for the given agino. If the
|
||||
* record cannot be found, then it's an invalid inode number and we
|
||||
* should abort.
|
||||
* Lookup the inode record for the given agino. If the record cannot be
|
||||
* found, then it's an invalid inode number and we should abort. Once
|
||||
* we have a record, we need to ensure it contains the inode number
|
||||
* we are looking up.
|
||||
*/
|
||||
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
|
||||
startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
|
||||
error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
|
||||
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
|
||||
if (!error) {
|
||||
if (i)
|
||||
error = xfs_inobt_get_rec(cur, &rec, &i);
|
||||
|
@ -1246,6 +1245,11 @@ xfs_imap_lookup(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
/* check that the returned record contains the required inode */
|
||||
if (rec.ir_startino > agino ||
|
||||
rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
|
||||
return EINVAL;
|
||||
|
||||
/* for untrusted inodes check it is allocated first */
|
||||
if ((flags & XFS_IGET_UNTRUSTED) &&
|
||||
(rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
|
||||
|
|
|
@ -1914,6 +1914,11 @@ xfs_iunlink_remove(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* A big issue when freeing the inode cluster is is that we _cannot_ skip any
|
||||
* inodes that are in memory - they all must be marked stale and attached to
|
||||
* the cluster buffer.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_ifree_cluster(
|
||||
xfs_inode_t *free_ip,
|
||||
|
@ -1945,8 +1950,6 @@ xfs_ifree_cluster(
|
|||
}
|
||||
|
||||
for (j = 0; j < nbufs; j++, inum += ninodes) {
|
||||
int found = 0;
|
||||
|
||||
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
|
||||
XFS_INO_TO_AGBNO(mp, inum));
|
||||
|
||||
|
@ -1965,7 +1968,9 @@ xfs_ifree_cluster(
|
|||
/*
|
||||
* Walk the inodes already attached to the buffer and mark them
|
||||
* stale. These will all have the flush locks held, so an
|
||||
* in-memory inode walk can't lock them.
|
||||
* in-memory inode walk can't lock them. By marking them all
|
||||
* stale first, we will not attempt to lock them in the loop
|
||||
* below as the XFS_ISTALE flag will be set.
|
||||
*/
|
||||
lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
|
||||
while (lip) {
|
||||
|
@ -1977,11 +1982,11 @@ xfs_ifree_cluster(
|
|||
&iip->ili_flush_lsn,
|
||||
&iip->ili_item.li_lsn);
|
||||
xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
|
||||
found++;
|
||||
}
|
||||
lip = lip->li_bio_list;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* For each inode in memory attempt to add it to the inode
|
||||
* buffer and set it up for being staled on buffer IO
|
||||
|
@ -1993,6 +1998,7 @@ xfs_ifree_cluster(
|
|||
* even trying to lock them.
|
||||
*/
|
||||
for (i = 0; i < ninodes; i++) {
|
||||
retry:
|
||||
read_lock(&pag->pag_ici_lock);
|
||||
ip = radix_tree_lookup(&pag->pag_ici_root,
|
||||
XFS_INO_TO_AGINO(mp, (inum + i)));
|
||||
|
@ -2003,38 +2009,36 @@ xfs_ifree_cluster(
|
|||
continue;
|
||||
}
|
||||
|
||||
/* don't try to lock/unlock the current inode */
|
||||
/*
|
||||
* Don't try to lock/unlock the current inode, but we
|
||||
* _cannot_ skip the other inodes that we did not find
|
||||
* in the list attached to the buffer and are not
|
||||
* already marked stale. If we can't lock it, back off
|
||||
* and retry.
|
||||
*/
|
||||
if (ip != free_ip &&
|
||||
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
continue;
|
||||
delay(1);
|
||||
goto retry;
|
||||
}
|
||||
read_unlock(&pag->pag_ici_lock);
|
||||
|
||||
if (!xfs_iflock_nowait(ip)) {
|
||||
if (ip != free_ip)
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
continue;
|
||||
}
|
||||
|
||||
xfs_iflock(ip);
|
||||
xfs_iflags_set(ip, XFS_ISTALE);
|
||||
if (xfs_inode_clean(ip)) {
|
||||
ASSERT(ip != free_ip);
|
||||
xfs_ifunlock(ip);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* we don't need to attach clean inodes or those only
|
||||
* with unlogged changes (which we throw away, anyway).
|
||||
*/
|
||||
iip = ip->i_itemp;
|
||||
if (!iip) {
|
||||
/* inode with unlogged changes only */
|
||||
if (!iip || xfs_inode_clean(ip)) {
|
||||
ASSERT(ip != free_ip);
|
||||
ip->i_update_core = 0;
|
||||
xfs_ifunlock(ip);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
continue;
|
||||
}
|
||||
found++;
|
||||
|
||||
iip->ili_last_fields = iip->ili_format.ilf_fields;
|
||||
iip->ili_format.ilf_fields = 0;
|
||||
|
@ -2049,8 +2053,7 @@ xfs_ifree_cluster(
|
|||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
||||
if (found)
|
||||
xfs_trans_stale_inode_buf(tp, bp);
|
||||
xfs_trans_stale_inode_buf(tp, bp);
|
||||
xfs_trans_binval(tp, bp);
|
||||
}
|
||||
|
||||
|
|
|
@ -3015,7 +3015,8 @@ _xfs_log_force(
|
|||
|
||||
XFS_STATS_INC(xs_log_force);
|
||||
|
||||
xlog_cil_push(log, 1);
|
||||
if (log->l_cilp)
|
||||
xlog_cil_force(log);
|
||||
|
||||
spin_lock(&log->l_icloglock);
|
||||
|
||||
|
@ -3167,7 +3168,7 @@ _xfs_log_force_lsn(
|
|||
XFS_STATS_INC(xs_log_force);
|
||||
|
||||
if (log->l_cilp) {
|
||||
lsn = xlog_cil_push_lsn(log, lsn);
|
||||
lsn = xlog_cil_force_lsn(log, lsn);
|
||||
if (lsn == NULLCOMMITLSN)
|
||||
return 0;
|
||||
}
|
||||
|
@ -3724,7 +3725,7 @@ xfs_log_force_umount(
|
|||
* call below.
|
||||
*/
|
||||
if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
|
||||
xlog_cil_push(log, 1);
|
||||
xlog_cil_force(log);
|
||||
|
||||
/*
|
||||
* We must hold both the GRANT lock and the LOG lock,
|
||||
|
|
|
@ -68,6 +68,7 @@ xlog_cil_init(
|
|||
ctx->sequence = 1;
|
||||
ctx->cil = cil;
|
||||
cil->xc_ctx = ctx;
|
||||
cil->xc_current_sequence = ctx->sequence;
|
||||
|
||||
cil->xc_log = log;
|
||||
log->l_cilp = cil;
|
||||
|
@ -269,15 +270,10 @@ xlog_cil_insert(
|
|||
static void
|
||||
xlog_cil_format_items(
|
||||
struct log *log,
|
||||
struct xfs_log_vec *log_vector,
|
||||
struct xlog_ticket *ticket,
|
||||
xfs_lsn_t *start_lsn)
|
||||
struct xfs_log_vec *log_vector)
|
||||
{
|
||||
struct xfs_log_vec *lv;
|
||||
|
||||
if (start_lsn)
|
||||
*start_lsn = log->l_cilp->xc_ctx->sequence;
|
||||
|
||||
ASSERT(log_vector);
|
||||
for (lv = log_vector; lv; lv = lv->lv_next) {
|
||||
void *ptr;
|
||||
|
@ -301,11 +297,26 @@ xlog_cil_format_items(
|
|||
ptr += vec->i_len;
|
||||
}
|
||||
ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
|
||||
|
||||
xlog_cil_insert(log, ticket, lv->lv_item, lv);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
xlog_cil_insert_items(
|
||||
struct log *log,
|
||||
struct xfs_log_vec *log_vector,
|
||||
struct xlog_ticket *ticket,
|
||||
xfs_lsn_t *start_lsn)
|
||||
{
|
||||
struct xfs_log_vec *lv;
|
||||
|
||||
if (start_lsn)
|
||||
*start_lsn = log->l_cilp->xc_ctx->sequence;
|
||||
|
||||
ASSERT(log_vector);
|
||||
for (lv = log_vector; lv; lv = lv->lv_next)
|
||||
xlog_cil_insert(log, ticket, lv->lv_item, lv);
|
||||
}
|
||||
|
||||
static void
|
||||
xlog_cil_free_logvec(
|
||||
struct xfs_log_vec *log_vector)
|
||||
|
@ -320,80 +331,6 @@ xlog_cil_free_logvec(
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Commit a transaction with the given vector to the Committed Item List.
|
||||
*
|
||||
* To do this, we need to format the item, pin it in memory if required and
|
||||
* account for the space used by the transaction. Once we have done that we
|
||||
* need to release the unused reservation for the transaction, attach the
|
||||
* transaction to the checkpoint context so we carry the busy extents through
|
||||
* to checkpoint completion, and then unlock all the items in the transaction.
|
||||
*
|
||||
* For more specific information about the order of operations in
|
||||
* xfs_log_commit_cil() please refer to the comments in
|
||||
* xfs_trans_commit_iclog().
|
||||
*
|
||||
* Called with the context lock already held in read mode to lock out
|
||||
* background commit, returns without it held once background commits are
|
||||
* allowed again.
|
||||
*/
|
||||
int
|
||||
xfs_log_commit_cil(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_log_vec *log_vector,
|
||||
xfs_lsn_t *commit_lsn,
|
||||
int flags)
|
||||
{
|
||||
struct log *log = mp->m_log;
|
||||
int log_flags = 0;
|
||||
int push = 0;
|
||||
|
||||
if (flags & XFS_TRANS_RELEASE_LOG_RES)
|
||||
log_flags = XFS_LOG_REL_PERM_RESERV;
|
||||
|
||||
if (XLOG_FORCED_SHUTDOWN(log)) {
|
||||
xlog_cil_free_logvec(log_vector);
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
|
||||
/* lock out background commit */
|
||||
down_read(&log->l_cilp->xc_ctx_lock);
|
||||
xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
|
||||
|
||||
/* check we didn't blow the reservation */
|
||||
if (tp->t_ticket->t_curr_res < 0)
|
||||
xlog_print_tic_res(log->l_mp, tp->t_ticket);
|
||||
|
||||
/* attach the transaction to the CIL if it has any busy extents */
|
||||
if (!list_empty(&tp->t_busy)) {
|
||||
spin_lock(&log->l_cilp->xc_cil_lock);
|
||||
list_splice_init(&tp->t_busy,
|
||||
&log->l_cilp->xc_ctx->busy_extents);
|
||||
spin_unlock(&log->l_cilp->xc_cil_lock);
|
||||
}
|
||||
|
||||
tp->t_commit_lsn = *commit_lsn;
|
||||
xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
|
||||
xfs_trans_unreserve_and_mod_sb(tp);
|
||||
|
||||
/* check for background commit before unlock */
|
||||
if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
|
||||
push = 1;
|
||||
up_read(&log->l_cilp->xc_ctx_lock);
|
||||
|
||||
/*
|
||||
* We need to push CIL every so often so we don't cache more than we
|
||||
* can fit in the log. The limit really is that a checkpoint can't be
|
||||
* more than half the log (the current checkpoint is not allowed to
|
||||
* overwrite the previous checkpoint), but commit latency and memory
|
||||
* usage limit this to a smaller size in most cases.
|
||||
*/
|
||||
if (push)
|
||||
xlog_cil_push(log, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark all items committed and clear busy extents. We free the log vector
|
||||
* chains in a separate pass so that we unpin the log items as quickly as
|
||||
|
@ -427,13 +364,23 @@ xlog_cil_committed(
|
|||
}
|
||||
|
||||
/*
|
||||
* Push the Committed Item List to the log. If the push_now flag is not set,
|
||||
* then it is a background flush and so we can chose to ignore it.
|
||||
* Push the Committed Item List to the log. If @push_seq flag is zero, then it
|
||||
* is a background flush and so we can chose to ignore it. Otherwise, if the
|
||||
* current sequence is the same as @push_seq we need to do a flush. If
|
||||
* @push_seq is less than the current sequence, then it has already been
|
||||
* flushed and we don't need to do anything - the caller will wait for it to
|
||||
* complete if necessary.
|
||||
*
|
||||
* @push_seq is a value rather than a flag because that allows us to do an
|
||||
* unlocked check of the sequence number for a match. Hence we can allows log
|
||||
* forces to run racily and not issue pushes for the same sequence twice. If we
|
||||
* get a race between multiple pushes for the same sequence they will block on
|
||||
* the first one and then abort, hence avoiding needless pushes.
|
||||
*/
|
||||
int
|
||||
STATIC int
|
||||
xlog_cil_push(
|
||||
struct log *log,
|
||||
int push_now)
|
||||
xfs_lsn_t push_seq)
|
||||
{
|
||||
struct xfs_cil *cil = log->l_cilp;
|
||||
struct xfs_log_vec *lv;
|
||||
|
@ -453,12 +400,14 @@ xlog_cil_push(
|
|||
if (!cil)
|
||||
return 0;
|
||||
|
||||
ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
|
||||
|
||||
new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
|
||||
new_ctx->ticket = xlog_cil_ticket_alloc(log);
|
||||
|
||||
/* lock out transaction commit, but don't block on background push */
|
||||
if (!down_write_trylock(&cil->xc_ctx_lock)) {
|
||||
if (!push_now)
|
||||
if (!push_seq)
|
||||
goto out_free_ticket;
|
||||
down_write(&cil->xc_ctx_lock);
|
||||
}
|
||||
|
@ -469,7 +418,11 @@ xlog_cil_push(
|
|||
goto out_skip;
|
||||
|
||||
/* check for spurious background flush */
|
||||
if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
|
||||
if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
|
||||
goto out_skip;
|
||||
|
||||
/* check for a previously pushed seqeunce */
|
||||
if (push_seq < cil->xc_ctx->sequence)
|
||||
goto out_skip;
|
||||
|
||||
/*
|
||||
|
@ -514,6 +467,13 @@ xlog_cil_push(
|
|||
new_ctx->cil = cil;
|
||||
cil->xc_ctx = new_ctx;
|
||||
|
||||
/*
|
||||
* mirror the new sequence into the cil structure so that we can do
|
||||
* unlocked checks against the current sequence in log forces without
|
||||
* risking deferencing a freed context pointer.
|
||||
*/
|
||||
cil->xc_current_sequence = new_ctx->sequence;
|
||||
|
||||
/*
|
||||
* The switch is now done, so we can drop the context lock and move out
|
||||
* of a shared context. We can't just go straight to the commit record,
|
||||
|
@ -625,6 +585,102 @@ out_abort:
|
|||
return XFS_ERROR(EIO);
|
||||
}
|
||||
|
||||
/*
|
||||
* Commit a transaction with the given vector to the Committed Item List.
|
||||
*
|
||||
* To do this, we need to format the item, pin it in memory if required and
|
||||
* account for the space used by the transaction. Once we have done that we
|
||||
* need to release the unused reservation for the transaction, attach the
|
||||
* transaction to the checkpoint context so we carry the busy extents through
|
||||
* to checkpoint completion, and then unlock all the items in the transaction.
|
||||
*
|
||||
* For more specific information about the order of operations in
|
||||
* xfs_log_commit_cil() please refer to the comments in
|
||||
* xfs_trans_commit_iclog().
|
||||
*
|
||||
* Called with the context lock already held in read mode to lock out
|
||||
* background commit, returns without it held once background commits are
|
||||
* allowed again.
|
||||
*/
|
||||
int
|
||||
xfs_log_commit_cil(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_log_vec *log_vector,
|
||||
xfs_lsn_t *commit_lsn,
|
||||
int flags)
|
||||
{
|
||||
struct log *log = mp->m_log;
|
||||
int log_flags = 0;
|
||||
int push = 0;
|
||||
|
||||
if (flags & XFS_TRANS_RELEASE_LOG_RES)
|
||||
log_flags = XFS_LOG_REL_PERM_RESERV;
|
||||
|
||||
if (XLOG_FORCED_SHUTDOWN(log)) {
|
||||
xlog_cil_free_logvec(log_vector);
|
||||
return XFS_ERROR(EIO);
|
||||
}
|
||||
|
||||
/*
|
||||
* do all the hard work of formatting items (including memory
|
||||
* allocation) outside the CIL context lock. This prevents stalling CIL
|
||||
* pushes when we are low on memory and a transaction commit spends a
|
||||
* lot of time in memory reclaim.
|
||||
*/
|
||||
xlog_cil_format_items(log, log_vector);
|
||||
|
||||
/* lock out background commit */
|
||||
down_read(&log->l_cilp->xc_ctx_lock);
|
||||
xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
|
||||
|
||||
/* check we didn't blow the reservation */
|
||||
if (tp->t_ticket->t_curr_res < 0)
|
||||
xlog_print_tic_res(log->l_mp, tp->t_ticket);
|
||||
|
||||
/* attach the transaction to the CIL if it has any busy extents */
|
||||
if (!list_empty(&tp->t_busy)) {
|
||||
spin_lock(&log->l_cilp->xc_cil_lock);
|
||||
list_splice_init(&tp->t_busy,
|
||||
&log->l_cilp->xc_ctx->busy_extents);
|
||||
spin_unlock(&log->l_cilp->xc_cil_lock);
|
||||
}
|
||||
|
||||
tp->t_commit_lsn = *commit_lsn;
|
||||
xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
|
||||
xfs_trans_unreserve_and_mod_sb(tp);
|
||||
|
||||
/*
|
||||
* Once all the items of the transaction have been copied to the CIL,
|
||||
* the items can be unlocked and freed.
|
||||
*
|
||||
* This needs to be done before we drop the CIL context lock because we
|
||||
* have to update state in the log items and unlock them before they go
|
||||
* to disk. If we don't, then the CIL checkpoint can race with us and
|
||||
* we can run checkpoint completion before we've updated and unlocked
|
||||
* the log items. This affects (at least) processing of stale buffers,
|
||||
* inodes and EFIs.
|
||||
*/
|
||||
xfs_trans_free_items(tp, *commit_lsn, 0);
|
||||
|
||||
/* check for background commit before unlock */
|
||||
if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
|
||||
push = 1;
|
||||
|
||||
up_read(&log->l_cilp->xc_ctx_lock);
|
||||
|
||||
/*
|
||||
* We need to push CIL every so often so we don't cache more than we
|
||||
* can fit in the log. The limit really is that a checkpoint can't be
|
||||
* more than half the log (the current checkpoint is not allowed to
|
||||
* overwrite the previous checkpoint), but commit latency and memory
|
||||
* usage limit this to a smaller size in most cases.
|
||||
*/
|
||||
if (push)
|
||||
xlog_cil_push(log, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditionally push the CIL based on the sequence passed in.
|
||||
*
|
||||
|
@ -639,39 +695,34 @@ out_abort:
|
|||
* commit lsn is there. It'll be empty, so this is broken for now.
|
||||
*/
|
||||
xfs_lsn_t
|
||||
xlog_cil_push_lsn(
|
||||
xlog_cil_force_lsn(
|
||||
struct log *log,
|
||||
xfs_lsn_t push_seq)
|
||||
xfs_lsn_t sequence)
|
||||
{
|
||||
struct xfs_cil *cil = log->l_cilp;
|
||||
struct xfs_cil_ctx *ctx;
|
||||
xfs_lsn_t commit_lsn = NULLCOMMITLSN;
|
||||
|
||||
restart:
|
||||
down_write(&cil->xc_ctx_lock);
|
||||
ASSERT(push_seq <= cil->xc_ctx->sequence);
|
||||
ASSERT(sequence <= cil->xc_current_sequence);
|
||||
|
||||
/* check to see if we need to force out the current context */
|
||||
if (push_seq == cil->xc_ctx->sequence) {
|
||||
up_write(&cil->xc_ctx_lock);
|
||||
xlog_cil_push(log, 1);
|
||||
goto restart;
|
||||
}
|
||||
/*
|
||||
* check to see if we need to force out the current context.
|
||||
* xlog_cil_push() handles racing pushes for the same sequence,
|
||||
* so no need to deal with it here.
|
||||
*/
|
||||
if (sequence == cil->xc_current_sequence)
|
||||
xlog_cil_push(log, sequence);
|
||||
|
||||
/*
|
||||
* See if we can find a previous sequence still committing.
|
||||
* We can drop the flush lock as soon as we have the cil lock
|
||||
* because we are now only comparing contexts protected by
|
||||
* the cil lock.
|
||||
*
|
||||
* We need to wait for all previous sequence commits to complete
|
||||
* before allowing the force of push_seq to go ahead. Hence block
|
||||
* on commits for those as well.
|
||||
*/
|
||||
restart:
|
||||
spin_lock(&cil->xc_cil_lock);
|
||||
up_write(&cil->xc_ctx_lock);
|
||||
list_for_each_entry(ctx, &cil->xc_committing, committing) {
|
||||
if (ctx->sequence > push_seq)
|
||||
if (ctx->sequence > sequence)
|
||||
continue;
|
||||
if (!ctx->commit_lsn) {
|
||||
/*
|
||||
|
@ -681,7 +732,7 @@ restart:
|
|||
sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
|
||||
goto restart;
|
||||
}
|
||||
if (ctx->sequence != push_seq)
|
||||
if (ctx->sequence != sequence)
|
||||
continue;
|
||||
/* found it! */
|
||||
commit_lsn = ctx->commit_lsn;
|
||||
|
|
|
@ -422,6 +422,7 @@ struct xfs_cil {
|
|||
struct rw_semaphore xc_ctx_lock;
|
||||
struct list_head xc_committing;
|
||||
sv_t xc_commit_wait;
|
||||
xfs_lsn_t xc_current_sequence;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -562,8 +563,16 @@ int xlog_cil_init(struct log *log);
|
|||
void xlog_cil_init_post_recovery(struct log *log);
|
||||
void xlog_cil_destroy(struct log *log);
|
||||
|
||||
int xlog_cil_push(struct log *log, int push_now);
|
||||
xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);
|
||||
/*
|
||||
* CIL force routines
|
||||
*/
|
||||
xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
|
||||
|
||||
static inline void
|
||||
xlog_cil_force(struct log *log)
|
||||
{
|
||||
xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmount record type is used as a pseudo transaction type for the ticket.
|
||||
|
|
|
@ -1167,7 +1167,7 @@ xfs_trans_del_item(
|
|||
* Unlock all of the items of a transaction and free all the descriptors
|
||||
* of that transaction.
|
||||
*/
|
||||
STATIC void
|
||||
void
|
||||
xfs_trans_free_items(
|
||||
struct xfs_trans *tp,
|
||||
xfs_lsn_t commit_lsn,
|
||||
|
@ -1653,9 +1653,6 @@ xfs_trans_commit_cil(
|
|||
return error;
|
||||
|
||||
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
|
||||
|
||||
/* xfs_trans_free_items() unlocks them first */
|
||||
xfs_trans_free_items(tp, *commit_lsn, 0);
|
||||
xfs_trans_free(tp);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,8 @@ struct xfs_trans;
|
|||
|
||||
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
|
||||
void xfs_trans_del_item(struct xfs_log_item *);
|
||||
|
||||
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
|
||||
int flags);
|
||||
void xfs_trans_item_committed(struct xfs_log_item *lip,
|
||||
xfs_lsn_t commit_lsn, int aborted);
|
||||
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
|
||||
|
|
|
@ -985,22 +985,16 @@ continue_unlock:
|
|||
}
|
||||
}
|
||||
|
||||
if (wbc->nr_to_write > 0) {
|
||||
if (--wbc->nr_to_write == 0 &&
|
||||
wbc->sync_mode == WB_SYNC_NONE) {
|
||||
/*
|
||||
* We stop writing back only if we are
|
||||
* not doing integrity sync. In case of
|
||||
* integrity sync we have to keep going
|
||||
* because someone may be concurrently
|
||||
* dirtying pages, and we might have
|
||||
* synced a lot of newly appeared dirty
|
||||
* pages, but have not synced all of the
|
||||
* old dirty pages.
|
||||
*/
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* We stop writing back only if we are not doing
|
||||
* integrity sync. In case of integrity sync we have to
|
||||
* keep going until we have written all the pages
|
||||
* we tagged for writeback prior to entering this loop.
|
||||
*/
|
||||
if (--wbc->nr_to_write <= 0 &&
|
||||
wbc->sync_mode == WB_SYNC_NONE) {
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pagevec_release(&pvec);
|
||||
|
|
Loading…
Reference in New Issue