Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

Two bugfixes in XFS for 3.3: one fix passes KMEM_SLEEP to kmem_realloc
instead of 0, and the other resolves a possible deadlock in xfs quotas.

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: use a normal shrinker for the dquot freelist
  xfs: pass KM_SLEEP flag to kmem_realloc() in xlog_recover_add_to_cnt_trans()
This commit is contained in:
Linus Torvalds 2012-02-13 14:19:45 -08:00
commit 19be13cfe3
7 changed files with 145 additions and 286 deletions

View File

@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone)
extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
static inline int
kmem_shake_allow(gfp_t gfp_mask)
{
return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
}
#endif /* __XFS_SUPPORT_KMEM_H__ */ #endif /* __XFS_SUPPORT_KMEM_H__ */

View File

@ -62,82 +62,6 @@ int xfs_dqerror_mod = 33;
static struct lock_class_key xfs_dquot_other_class; static struct lock_class_key xfs_dquot_other_class;
/*
* Allocate and initialize a dquot. We don't always allocate fresh memory;
* we try to reclaim a free dquot if the number of incore dquots are above
* a threshold.
* The only field inside the core that gets initialized at this point
* is the d_id field. The idea is to fill in the entire q_core
* when we read in the on disk dquot.
*/
STATIC xfs_dquot_t *
xfs_qm_dqinit(
xfs_mount_t *mp,
xfs_dqid_t id,
uint type)
{
xfs_dquot_t *dqp;
boolean_t brandnewdquot;
brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
dqp->dq_flags = type;
dqp->q_core.d_id = cpu_to_be32(id);
dqp->q_mount = mp;
/*
* No need to re-initialize these if this is a reclaimed dquot.
*/
if (brandnewdquot) {
INIT_LIST_HEAD(&dqp->q_freelist);
mutex_init(&dqp->q_qlock);
init_waitqueue_head(&dqp->q_pinwait);
/*
* Because we want to use a counting completion, complete
* the flush completion once to allow a single access to
* the flush completion without blocking.
*/
init_completion(&dqp->q_flush);
complete(&dqp->q_flush);
trace_xfs_dqinit(dqp);
} else {
/*
* Only the q_core portion was zeroed in dqreclaim_one().
* So, we need to reset others.
*/
dqp->q_nrefs = 0;
dqp->q_blkno = 0;
INIT_LIST_HEAD(&dqp->q_mplist);
INIT_LIST_HEAD(&dqp->q_hashlist);
dqp->q_bufoffset = 0;
dqp->q_fileoffset = 0;
dqp->q_transp = NULL;
dqp->q_gdquot = NULL;
dqp->q_res_bcount = 0;
dqp->q_res_icount = 0;
dqp->q_res_rtbcount = 0;
atomic_set(&dqp->q_pincount, 0);
dqp->q_hash = NULL;
ASSERT(list_empty(&dqp->q_freelist));
trace_xfs_dqreuse(dqp);
}
/*
* In either case we need to make sure group quotas have a different
* lock class than user quotas, to make sure lockdep knows we can
* locks of one of each at the same time.
*/
if (!(type & XFS_DQ_USER))
lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
/*
* log item gets initialized later
*/
return (dqp);
}
/* /*
* This is called to free all the memory associated with a dquot * This is called to free all the memory associated with a dquot
*/ */
@ -567,7 +491,32 @@ xfs_qm_dqread(
int error; int error;
int cancelflags = 0; int cancelflags = 0;
dqp = xfs_qm_dqinit(mp, id, type);
dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
dqp->dq_flags = type;
dqp->q_core.d_id = cpu_to_be32(id);
dqp->q_mount = mp;
INIT_LIST_HEAD(&dqp->q_freelist);
mutex_init(&dqp->q_qlock);
init_waitqueue_head(&dqp->q_pinwait);
/*
* Because we want to use a counting completion, complete
* the flush completion once to allow a single access to
* the flush completion without blocking.
*/
init_completion(&dqp->q_flush);
complete(&dqp->q_flush);
/*
* Make sure group quotas have a different lock class than user
* quotas.
*/
if (!(type & XFS_DQ_USER))
lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
atomic_inc(&xfs_Gqm->qm_totaldquots);
trace_xfs_dqread(dqp); trace_xfs_dqread(dqp);

View File

@ -1489,7 +1489,7 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len; old_len = item->ri_buf[item->ri_cnt-1].i_len;
ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
memcpy(&ptr[old_len], dp, len); /* d, s, l */ memcpy(&ptr[old_len], dp, len); /* d, s, l */
item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr; item->ri_buf[item->ri_cnt-1].i_addr = ptr;

View File

@ -50,7 +50,6 @@
*/ */
struct mutex xfs_Gqm_lock; struct mutex xfs_Gqm_lock;
struct xfs_qm *xfs_Gqm; struct xfs_qm *xfs_Gqm;
uint ndquot;
kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqzone;
kmem_zone_t *qm_dqtrxzone; kmem_zone_t *qm_dqtrxzone;
@ -93,7 +92,6 @@ xfs_Gqm_init(void)
goto out_free_udqhash; goto out_free_udqhash;
hsize /= sizeof(xfs_dqhash_t); hsize /= sizeof(xfs_dqhash_t);
ndquot = hsize << 8;
xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
xqm->qm_dqhashmask = hsize - 1; xqm->qm_dqhashmask = hsize - 1;
@ -137,7 +135,6 @@ xfs_Gqm_init(void)
xqm->qm_dqtrxzone = qm_dqtrxzone; xqm->qm_dqtrxzone = qm_dqtrxzone;
atomic_set(&xqm->qm_totaldquots, 0); atomic_set(&xqm->qm_totaldquots, 0);
xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
xqm->qm_nrefs = 0; xqm->qm_nrefs = 0;
return xqm; return xqm;
@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos(
return 0; return 0;
} }
STATIC void
xfs_qm_dqfree_one(
/* struct xfs_dquot *dqp)
* Pop the least recently used dquot off the freelist and recycle it.
*/
STATIC struct xfs_dquot *
xfs_qm_dqreclaim_one(void)
{ {
struct xfs_dquot *dqp; struct xfs_mount *mp = dqp->q_mount;
int restarts = 0; struct xfs_quotainfo *qi = mp->m_quotainfo;
mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); mutex_lock(&dqp->q_hash->qh_lock);
restart: list_del_init(&dqp->q_hashlist);
list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { dqp->q_hash->qh_version++;
struct xfs_mount *mp = dqp->q_mount; mutex_unlock(&dqp->q_hash->qh_lock);
if (!xfs_dqlock_nowait(dqp)) mutex_lock(&qi->qi_dqlist_lock);
continue; list_del_init(&dqp->q_mplist);
qi->qi_dquots--;
qi->qi_dqreclaims++;
mutex_unlock(&qi->qi_dqlist_lock);
/* xfs_qm_dqdestroy(dqp);
* This dquot has already been grabbed by dqlookup. }
* Remove it from the freelist and try again.
*/
if (dqp->q_nrefs) {
trace_xfs_dqreclaim_want(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dqwants);
list_del_init(&dqp->q_freelist); STATIC void
xfs_Gqm->qm_dqfrlist_cnt--; xfs_qm_dqreclaim_one(
restarts++; struct xfs_dquot *dqp,
goto dqunlock; struct list_head *dispose_list)
} {
struct xfs_mount *mp = dqp->q_mount;
int error;
ASSERT(dqp->q_hash); if (!xfs_dqlock_nowait(dqp))
ASSERT(!list_empty(&dqp->q_mplist)); goto out_busy;
/* /*
* Try to grab the flush lock. If this dquot is in the process * This dquot has acquired a reference in the meantime remove it from
* of getting flushed to disk, we don't want to reclaim it. * the freelist and try again.
*/ */
if (!xfs_dqflock_nowait(dqp)) if (dqp->q_nrefs) {
goto dqunlock;
/*
* We have the flush lock so we know that this is not in the
* process of being flushed. So, if this is dirty, flush it
* DELWRI so that we don't get a freelist infested with
* dirty dquots.
*/
if (XFS_DQ_IS_DIRTY(dqp)) {
int error;
trace_xfs_dqreclaim_dirty(dqp);
/*
* We flush it delayed write, so don't bother
* releasing the freelist lock.
*/
error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
if (error) {
xfs_warn(mp, "%s: dquot %p flush failed",
__func__, dqp);
}
goto dqunlock;
}
xfs_dqfunlock(dqp);
/*
* Prevent lookup now that we are going to reclaim the dquot.
* Once XFS_DQ_FREEING is set lookup won't touch the dquot,
* thus we can drop the lock now.
*/
dqp->dq_flags |= XFS_DQ_FREEING;
xfs_dqunlock(dqp); xfs_dqunlock(dqp);
mutex_lock(&dqp->q_hash->qh_lock); trace_xfs_dqreclaim_want(dqp);
list_del_init(&dqp->q_hashlist); XQM_STATS_INC(xqmstats.xs_qm_dqwants);
dqp->q_hash->qh_version++;
mutex_unlock(&dqp->q_hash->qh_lock);
mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
mp->m_quotainfo->qi_dquots--;
mp->m_quotainfo->qi_dqreclaims++;
mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
ASSERT(dqp->q_nrefs == 0);
list_del_init(&dqp->q_freelist); list_del_init(&dqp->q_freelist);
xfs_Gqm->qm_dqfrlist_cnt--; xfs_Gqm->qm_dqfrlist_cnt--;
return;
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
return dqp;
dqunlock:
xfs_dqunlock(dqp);
if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
break;
goto restart;
} }
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); ASSERT(dqp->q_hash);
return NULL; ASSERT(!list_empty(&dqp->q_mplist));
}
/* /*
* Traverse the freelist of dquots and attempt to reclaim a maximum of * Try to grab the flush lock. If this dquot is in the process of
* 'howmany' dquots. This operation races with dqlookup(), and attempts to * getting flushed to disk, we don't want to reclaim it.
* favor the lookup function ... */
*/ if (!xfs_dqflock_nowait(dqp))
STATIC int goto out_busy;
xfs_qm_shake_freelist(
int howmany)
{
int nreclaimed = 0;
xfs_dquot_t *dqp;
if (howmany <= 0) /*
return 0; * We have the flush lock so we know that this is not in the
* process of being flushed. So, if this is dirty, flush it
* DELWRI so that we don't get a freelist infested with
* dirty dquots.
*/
if (XFS_DQ_IS_DIRTY(dqp)) {
trace_xfs_dqreclaim_dirty(dqp);
while (nreclaimed < howmany) { /*
dqp = xfs_qm_dqreclaim_one(); * We flush it delayed write, so don't bother releasing the
if (!dqp) * freelist lock.
return nreclaimed; */
xfs_qm_dqdestroy(dqp); error = xfs_qm_dqflush(dqp, 0);
nreclaimed++; if (error) {
xfs_warn(mp, "%s: dquot %p flush failed",
__func__, dqp);
}
/*
* Give the dquot another try on the freelist, as the
* flushing will take some time.
*/
goto out_busy;
} }
return nreclaimed; xfs_dqfunlock(dqp);
/*
* Prevent lookups now that we are past the point of no return.
*/
dqp->dq_flags |= XFS_DQ_FREEING;
xfs_dqunlock(dqp);
ASSERT(dqp->q_nrefs == 0);
list_move_tail(&dqp->q_freelist, dispose_list);
xfs_Gqm->qm_dqfrlist_cnt--;
trace_xfs_dqreclaim_done(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
return;
out_busy:
xfs_dqunlock(dqp);
/*
* Move the dquot to the tail of the list so that we don't spin on it.
*/
list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
trace_xfs_dqreclaim_busy(dqp);
XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
} }
/*
* The kmem_shake interface is invoked when memory is running low.
*/
/* ARGSUSED */
STATIC int STATIC int
xfs_qm_shake( xfs_qm_shake(
struct shrinker *shrink, struct shrinker *shrink,
struct shrink_control *sc) struct shrink_control *sc)
{ {
int ndqused, nfree, n; int nr_to_scan = sc->nr_to_scan;
gfp_t gfp_mask = sc->gfp_mask; LIST_HEAD (dispose_list);
struct xfs_dquot *dqp;
if (!kmem_shake_allow(gfp_mask)) if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
return 0;
if (!xfs_Gqm)
return 0; return 0;
if (!nr_to_scan)
goto out;
nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
/* incore dquots in all f/s's */ while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; if (nr_to_scan-- <= 0)
break;
ASSERT(ndqused >= 0); dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
q_freelist);
if (nfree <= ndqused && nfree < ndquot) xfs_qm_dqreclaim_one(dqp, &dispose_list);
return 0;
ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */
n = nfree - ndqused - ndquot; /* # over target */
return xfs_qm_shake_freelist(MAX(nfree, n));
}
/*------------------------------------------------------------------*/
/*
* Return a new incore dquot. Depending on the number of
* dquots in the system, we either allocate a new one on the kernel heap,
* or reclaim a free one.
* Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
* to reclaim an existing one from the freelist.
*/
boolean_t
xfs_qm_dqalloc_incore(
xfs_dquot_t **O_dqpp)
{
xfs_dquot_t *dqp;
/*
* Check against high water mark to see if we want to pop
* a nincompoop dquot off the freelist.
*/
if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
/*
* Try to recycle a dquot from the freelist.
*/
if ((dqp = xfs_qm_dqreclaim_one())) {
XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
/*
* Just zero the core here. The rest will get
* reinitialized by caller. XXX we shouldn't even
* do this zero ...
*/
memset(&dqp->q_core, 0, sizeof(dqp->q_core));
*O_dqpp = dqp;
return B_FALSE;
}
XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
} }
mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
/* while (!list_empty(&dispose_list)) {
* Allocate a brand new dquot on the kernel heap and return it dqp = list_first_entry(&dispose_list, struct xfs_dquot,
* to the caller to initialize. q_freelist);
*/ list_del_init(&dqp->q_freelist);
ASSERT(xfs_Gqm->qm_dqzone != NULL); xfs_qm_dqfree_one(dqp);
*O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); }
atomic_inc(&xfs_Gqm->qm_totaldquots); out:
return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
return B_TRUE;
} }
/* /*
* Start a transaction and write the incore superblock changes to * Start a transaction and write the incore superblock changes to
* disk. flags parameter indicates which fields have changed. * disk. flags parameter indicates which fields have changed.

View File

@ -26,23 +26,11 @@
struct xfs_qm; struct xfs_qm;
struct xfs_inode; struct xfs_inode;
extern uint ndquot;
extern struct mutex xfs_Gqm_lock; extern struct mutex xfs_Gqm_lock;
extern struct xfs_qm *xfs_Gqm; extern struct xfs_qm *xfs_Gqm;
extern kmem_zone_t *qm_dqzone; extern kmem_zone_t *qm_dqzone;
extern kmem_zone_t *qm_dqtrxzone; extern kmem_zone_t *qm_dqtrxzone;
/*
* Ditto, for xfs_qm_dqreclaim_one.
*/
#define XFS_QM_RECLAIM_MAX_RESTARTS 4
/*
* Ideal ratio of free to in use dquots. Quota manager makes an attempt
* to keep this balance.
*/
#define XFS_QM_DQFREE_RATIO 2
/* /*
* Dquot hashtable constants/threshold values. * Dquot hashtable constants/threshold values.
*/ */
@ -74,7 +62,6 @@ typedef struct xfs_qm {
int qm_dqfrlist_cnt; int qm_dqfrlist_cnt;
atomic_t qm_totaldquots; /* total incore dquots */ atomic_t qm_totaldquots; /* total incore dquots */
uint qm_nrefs; /* file systems with quota on */ uint qm_nrefs; /* file systems with quota on */
int qm_dqfree_ratio;/* ratio of free to inuse dquots */
kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
} xfs_qm_t; } xfs_qm_t;
@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *);
extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
/* dquot stuff */ /* dquot stuff */
extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);

View File

@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v)
{ {
/* maximum; incore; ratio free to inuse; freelist */ /* maximum; incore; ratio free to inuse; freelist */
seq_printf(m, "%d\t%d\t%d\t%u\n", seq_printf(m, "%d\t%d\t%d\t%u\n",
ndquot, 0,
xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, 0,
xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
return 0; return 0;
} }

View File

@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \
DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqadjust);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_done);
DEFINE_DQUOT_EVENT(xfs_dqattach_found); DEFINE_DQUOT_EVENT(xfs_dqattach_found);
DEFINE_DQUOT_EVENT(xfs_dqattach_get); DEFINE_DQUOT_EVENT(xfs_dqattach_get);
DEFINE_DQUOT_EVENT(xfs_dqinit);
DEFINE_DQUOT_EVENT(xfs_dqreuse);
DEFINE_DQUOT_EVENT(xfs_dqalloc); DEFINE_DQUOT_EVENT(xfs_dqalloc);
DEFINE_DQUOT_EVENT(xfs_dqtobp_read); DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
DEFINE_DQUOT_EVENT(xfs_dqread); DEFINE_DQUOT_EVENT(xfs_dqread);