staging/lustre: Revert "LU-2139 osc: Track and limit "unstable" pages"
This seems to be causing multiple issues: LU-3274, LU-3277 [The original commit is folded in the large Lusre patch. So we don't have an exact commit to revert for kernel client -- Peng Tao] Signed-off-by: Peng Tao <tao.peng@emc.com> Signed-off-by: Andreas Dilger <andreas.dilger@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
50dc198aa8
commit
c52f69c578
|
@ -422,10 +422,8 @@ struct lov_stripe_md *ccc_inode_lsm_get(struct inode *inode);
|
|||
void ccc_inode_lsm_put(struct inode *inode, struct lov_stripe_md *lsm);
|
||||
|
||||
/**
|
||||
* Data structure managing a client's cached pages. A count of
|
||||
* "unstable" pages is maintained, and an LRU of clean pages is
|
||||
* maintained. "unstable" pages are pages pinned by the ptlrpc
|
||||
* layer for recovery purposes.
|
||||
* Data structure managing a client's cached clean pages. An LRU of
|
||||
* pages is maintained, along with other statistics.
|
||||
*/
|
||||
struct cl_client_cache {
|
||||
atomic_t ccc_users; /* # of users (OSCs) of this data */
|
||||
|
@ -434,8 +432,6 @@ struct cl_client_cache {
|
|||
atomic_t ccc_lru_left; /* # of LRU entries available */
|
||||
unsigned long ccc_lru_max; /* Max # of LRU entries possible */
|
||||
unsigned int ccc_lru_shrinkers; /* # of threads reclaiming */
|
||||
atomic_t ccc_unstable_nr; /* # of unstable pages pinned */
|
||||
wait_queue_head_t ccc_unstable_waitq; /* Signaled on BRW commit */
|
||||
};
|
||||
|
||||
#endif /*LCLIENT_H */
|
||||
|
|
|
@ -1838,9 +1838,7 @@ struct ptlrpc_request {
|
|||
rq_no_retry_einprogress:1,
|
||||
/* allow the req to be sent if the import is in recovery
|
||||
* status */
|
||||
rq_allow_replay:1,
|
||||
/* bulk request, sent to server, but uncommitted */
|
||||
rq_unstable:1;
|
||||
rq_allow_replay:1;
|
||||
|
||||
unsigned int rq_nr_resend;
|
||||
|
||||
|
|
|
@ -678,7 +678,7 @@ struct lov_obd {
|
|||
proc_dir_entry_t *lov_pool_proc_entry;
|
||||
enum lustre_sec_part lov_sp_me;
|
||||
|
||||
/* Cached LRU and unstable data from upper layer */
|
||||
/* Cached LRU pages from upper layer */
|
||||
void *lov_cache;
|
||||
|
||||
struct rw_semaphore lov_notify_lock;
|
||||
|
|
|
@ -67,7 +67,6 @@ extern int at_early_margin;
|
|||
extern int at_extra;
|
||||
extern unsigned int obd_sync_filter;
|
||||
extern unsigned int obd_max_dirty_pages;
|
||||
extern atomic_t obd_unstable_pages;
|
||||
extern atomic_t obd_dirty_pages;
|
||||
extern atomic_t obd_dirty_transit_pages;
|
||||
extern unsigned int obd_alloc_fail_rate;
|
||||
|
|
|
@ -474,10 +474,6 @@ struct ll_sb_info {
|
|||
|
||||
struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
|
||||
|
||||
/* Used to track "unstable" pages on a client, and maintain a
|
||||
* LRU list of clean pages. An "unstable" page is defined as
|
||||
* any page which is sent to a server as part of a bulk request,
|
||||
* but is uncommitted to stable storage. */
|
||||
struct cl_client_cache ll_cache;
|
||||
|
||||
struct lprocfs_stats *ll_ra_stats;
|
||||
|
|
|
@ -99,16 +99,13 @@ static struct ll_sb_info *ll_init_sbi(void)
|
|||
lru_page_max = (pages / 4) * 3;
|
||||
}
|
||||
|
||||
/* initialize ll_cache data */
|
||||
/* initialize lru data */
|
||||
atomic_set(&sbi->ll_cache.ccc_users, 0);
|
||||
sbi->ll_cache.ccc_lru_max = lru_page_max;
|
||||
atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max);
|
||||
spin_lock_init(&sbi->ll_cache.ccc_lru_lock);
|
||||
INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru);
|
||||
|
||||
atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0);
|
||||
init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq);
|
||||
|
||||
sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32,
|
||||
SBI_DEFAULT_READAHEAD_MAX);
|
||||
sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file;
|
||||
|
@ -1074,7 +1071,7 @@ void ll_put_super(struct super_block *sb)
|
|||
struct lustre_sb_info *lsi = s2lsi(sb);
|
||||
struct ll_sb_info *sbi = ll_s2sbi(sb);
|
||||
char *profilenm = get_profile_name(sb);
|
||||
int ccc_count, next, force = 1, rc = 0;
|
||||
int next, force = 1;
|
||||
ENTRY;
|
||||
|
||||
CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
|
||||
|
@ -1090,19 +1087,6 @@ void ll_put_super(struct super_block *sb)
|
|||
force = obd->obd_force;
|
||||
}
|
||||
|
||||
/* Wait for unstable pages to be committed to stable storage */
|
||||
if (force == 0) {
|
||||
struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
|
||||
rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq,
|
||||
atomic_read(&sbi->ll_cache.ccc_unstable_nr) == 0,
|
||||
&lwi);
|
||||
}
|
||||
|
||||
ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr);
|
||||
if (force == 0 && rc != -EINTR)
|
||||
LASSERTF(ccc_count == 0, "count: %i\n", ccc_count);
|
||||
|
||||
|
||||
/* We need to set force before the lov_disconnect in
|
||||
lustre_common_put_super, since l_d cleans up osc's as well. */
|
||||
if (force) {
|
||||
|
|
|
@ -725,23 +725,6 @@ static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
|
|||
}
|
||||
LPROC_SEQ_FOPS_RO(ll_sbi_flags);
|
||||
|
||||
static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct super_block *sb = m->private;
|
||||
struct ll_sb_info *sbi = ll_s2sbi(sb);
|
||||
struct cl_client_cache *cache = &sbi->ll_cache;
|
||||
int pages, mb, rc;
|
||||
|
||||
pages = atomic_read(&cache->ccc_unstable_nr);
|
||||
mb = (pages * PAGE_CACHE_SIZE) >> 20;
|
||||
|
||||
rc = seq_printf(m, "unstable_pages: %8d\n"
|
||||
"unstable_mb: %8d\n", pages, mb);
|
||||
|
||||
return rc;
|
||||
}
|
||||
LPROC_SEQ_FOPS_RO(ll_unstable_stats);
|
||||
|
||||
static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
|
||||
{ "uuid", &ll_sb_uuid_fops, 0, 0 },
|
||||
//{ "mntpt_path", ll_rd_path, 0, 0 },
|
||||
|
@ -770,7 +753,6 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
|
|||
{ "lazystatfs", &ll_lazystatfs_fops, 0 },
|
||||
{ "max_easize", &ll_maxea_size_fops, 0, 0 },
|
||||
{ "sbi_flags", &ll_sbi_flags_fops, 0, 0 },
|
||||
{ "unstable_stats", &ll_unstable_stats_fops, 0, 0},
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#include <lprocfs_status.h>
|
||||
#include <lustre_param.h>
|
||||
#include <cl_object.h>
|
||||
#include <lclient.h>
|
||||
#include <lclient.h> /* for cl_client_lru */
|
||||
#include <lustre/ll_fiemap.h>
|
||||
#include <lustre_log.h>
|
||||
#include <lustre_fid.h>
|
||||
|
|
|
@ -68,8 +68,6 @@ unsigned int obd_dump_on_eviction;
|
|||
EXPORT_SYMBOL(obd_dump_on_eviction);
|
||||
unsigned int obd_max_dirty_pages = 256;
|
||||
EXPORT_SYMBOL(obd_max_dirty_pages);
|
||||
atomic_t obd_unstable_pages;
|
||||
EXPORT_SYMBOL(obd_unstable_pages);
|
||||
atomic_t obd_dirty_pages;
|
||||
EXPORT_SYMBOL(obd_dirty_pages);
|
||||
unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */
|
||||
|
|
|
@ -1310,12 +1310,10 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
|
|||
#define OSC_DUMP_GRANT(cli, fmt, args...) do { \
|
||||
struct client_obd *__tmp = (cli); \
|
||||
CDEBUG(D_CACHE, "%s: { dirty: %ld/%ld dirty_pages: %d/%d " \
|
||||
"unstable_pages: %d/%d dropped: %ld avail: %ld, " \
|
||||
"reserved: %ld, flight: %d } " fmt, \
|
||||
"dropped: %ld avail: %ld, reserved: %ld, flight: %d } " fmt, \
|
||||
__tmp->cl_import->imp_obd->obd_name, \
|
||||
__tmp->cl_dirty, __tmp->cl_dirty_max, \
|
||||
atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \
|
||||
atomic_read(&obd_unstable_pages), obd_max_dirty_pages, \
|
||||
__tmp->cl_lost_grant, __tmp->cl_avail_grant, \
|
||||
__tmp->cl_reserved_grant, __tmp->cl_w_in_flight, ##args); \
|
||||
} while (0)
|
||||
|
@ -1465,8 +1463,7 @@ static int osc_enter_cache_try(struct client_obd *cli,
|
|||
return 0;
|
||||
|
||||
if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max &&
|
||||
atomic_read(&obd_unstable_pages) + 1 +
|
||||
atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
|
||||
atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
|
||||
osc_consume_write_grant(cli, &oap->oap_brw_page);
|
||||
if (transient) {
|
||||
cli->cl_dirty_transit += PAGE_CACHE_SIZE;
|
||||
|
@ -1579,9 +1576,9 @@ void osc_wake_cache_waiters(struct client_obd *cli)
|
|||
|
||||
ocw->ocw_rc = -EDQUOT;
|
||||
/* we can't dirty more */
|
||||
if (cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max ||
|
||||
atomic_read(&obd_unstable_pages) + 1 +
|
||||
atomic_read(&obd_dirty_pages) > obd_max_dirty_pages) {
|
||||
if ((cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max) ||
|
||||
(atomic_read(&obd_dirty_pages) + 1 >
|
||||
obd_max_dirty_pages)) {
|
||||
CDEBUG(D_CACHE, "no dirty room: dirty: %ld "
|
||||
"osc max %ld, sys max %d\n", cli->cl_dirty,
|
||||
cli->cl_dirty_max, obd_max_dirty_pages);
|
||||
|
@ -1749,84 +1746,6 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
|
|||
ar->ar_force_sync = 0;
|
||||
}
|
||||
|
||||
/* Performs "unstable" page accounting. This function balances the
|
||||
* increment operations performed in osc_inc_unstable_pages. It is
|
||||
* registered as the RPC request callback, and is executed when the
|
||||
* bulk RPC is committed on the server. Thus at this point, the pages
|
||||
* involved in the bulk transfer are no longer considered unstable. */
|
||||
void osc_dec_unstable_pages(struct ptlrpc_request *req)
|
||||
{
|
||||
struct ptlrpc_bulk_desc *desc = req->rq_bulk;
|
||||
struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
|
||||
obd_count page_count = desc->bd_iov_count;
|
||||
int i;
|
||||
|
||||
/* No unstable page tracking */
|
||||
if (cli->cl_cache == NULL)
|
||||
return;
|
||||
|
||||
LASSERT(page_count >= 0);
|
||||
|
||||
for (i = 0; i < page_count; i++)
|
||||
dec_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
|
||||
|
||||
atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
|
||||
LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
|
||||
|
||||
atomic_sub(page_count, &obd_unstable_pages);
|
||||
LASSERT(atomic_read(&obd_unstable_pages) >= 0);
|
||||
|
||||
spin_lock(&req->rq_lock);
|
||||
req->rq_committed = 1;
|
||||
req->rq_unstable = 0;
|
||||
spin_unlock(&req->rq_lock);
|
||||
|
||||
wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
|
||||
}
|
||||
|
||||
/* "unstable" page accounting. See: osc_dec_unstable_pages. */
|
||||
void osc_inc_unstable_pages(struct ptlrpc_request *req)
|
||||
{
|
||||
struct ptlrpc_bulk_desc *desc = req->rq_bulk;
|
||||
struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
|
||||
obd_count page_count = desc->bd_iov_count;
|
||||
int i;
|
||||
|
||||
/* No unstable page tracking */
|
||||
if (cli->cl_cache == NULL)
|
||||
return;
|
||||
|
||||
LASSERT(page_count >= 0);
|
||||
|
||||
for (i = 0; i < page_count; i++)
|
||||
inc_zone_page_state(desc->bd_iov[i].kiov_page, NR_UNSTABLE_NFS);
|
||||
|
||||
LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
|
||||
atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
|
||||
|
||||
LASSERT(atomic_read(&obd_unstable_pages) >= 0);
|
||||
atomic_add(page_count, &obd_unstable_pages);
|
||||
|
||||
spin_lock(&req->rq_lock);
|
||||
|
||||
/* If the request has already been committed (i.e. brw_commit
|
||||
* called via rq_commit_cb), we need to undo the unstable page
|
||||
* increments we just performed because rq_commit_cb wont be
|
||||
* called again. Otherwise, just set the commit callback so the
|
||||
* unstable page accounting is properly updated when the request
|
||||
* is committed */
|
||||
if (req->rq_committed) {
|
||||
/* Drop lock before calling osc_dec_unstable_pages */
|
||||
spin_unlock(&req->rq_lock);
|
||||
osc_dec_unstable_pages(req);
|
||||
spin_lock(&req->rq_lock);
|
||||
} else {
|
||||
req->rq_unstable = 1;
|
||||
req->rq_commit_cb = osc_dec_unstable_pages;
|
||||
}
|
||||
|
||||
spin_unlock(&req->rq_lock);
|
||||
}
|
||||
|
||||
/* this must be called holding the loi list lock to give coverage to exit_cache,
|
||||
* async_flag maintenance, and oap_request */
|
||||
|
@ -1839,9 +1758,6 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
|
|||
|
||||
ENTRY;
|
||||
if (oap->oap_request != NULL) {
|
||||
if (rc == 0)
|
||||
osc_inc_unstable_pages(oap->oap_request);
|
||||
|
||||
xid = ptlrpc_req_xid(oap->oap_request);
|
||||
ptlrpc_req_finished(oap->oap_request);
|
||||
oap->oap_request = NULL;
|
||||
|
|
|
@ -205,6 +205,4 @@ int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
|
|||
struct obd_quotactl *oqctl);
|
||||
int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
|
||||
|
||||
void osc_inc_unstable_pages(struct ptlrpc_request *req);
|
||||
void osc_dec_unstable_pages(struct ptlrpc_request *req);
|
||||
#endif /* OSC_INTERNAL_H */
|
||||
|
|
|
@ -826,16 +826,13 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
|
|||
CERROR("dirty %lu - %lu > dirty_max %lu\n",
|
||||
cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
|
||||
oa->o_undirty = 0;
|
||||
} else if (unlikely(atomic_read(&obd_unstable_pages) +
|
||||
atomic_read(&obd_dirty_pages) -
|
||||
} else if (unlikely(atomic_read(&obd_dirty_pages) -
|
||||
atomic_read(&obd_dirty_transit_pages) >
|
||||
(long)(obd_max_dirty_pages + 1))) {
|
||||
/* The atomic_read() allowing the atomic_inc() are
|
||||
* not covered by a lock thus they may safely race and trip
|
||||
* this CERROR() unless we add in a small fudge factor (+1). */
|
||||
CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
|
||||
cli->cl_import->imp_obd->obd_name,
|
||||
atomic_read(&obd_unstable_pages),
|
||||
CERROR("dirty %d - %d > system dirty_max %d\n",
|
||||
atomic_read(&obd_dirty_pages),
|
||||
atomic_read(&obd_dirty_transit_pages),
|
||||
obd_max_dirty_pages);
|
||||
|
@ -1743,7 +1740,6 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
|
|||
aa->aa_resends++;
|
||||
new_req->rq_interpret_reply = request->rq_interpret_reply;
|
||||
new_req->rq_async_args = request->rq_async_args;
|
||||
new_req->rq_commit_cb = request->rq_commit_cb;
|
||||
/* cap resend delay to the current request timeout, this is similar to
|
||||
* what ptlrpc does (see after_reply()) */
|
||||
if (aa->aa_resends > new_req->rq_timeout)
|
||||
|
@ -2037,20 +2033,6 @@ static int brw_interpret(const struct lu_env *env,
|
|||
RETURN(rc);
|
||||
}
|
||||
|
||||
static void brw_commit(struct ptlrpc_request *req)
|
||||
{
|
||||
spin_lock(&req->rq_lock);
|
||||
/* If osc_inc_unstable_pages (via osc_extent_finish) races with
|
||||
* this called via the rq_commit_cb, I need to ensure
|
||||
* osc_dec_unstable_pages is still called. Otherwise unstable
|
||||
* pages may be leaked. */
|
||||
if (req->rq_unstable)
|
||||
osc_dec_unstable_pages(req);
|
||||
else
|
||||
req->rq_committed = 1;
|
||||
spin_unlock(&req->rq_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an RPC by the list of extent @ext_list. The caller must ensure
|
||||
* that the total pages in this list are NOT over max pages per RPC.
|
||||
|
@ -2156,7 +2138,6 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
|
|||
GOTO(out, rc);
|
||||
}
|
||||
|
||||
req->rq_commit_cb = brw_commit;
|
||||
req->rq_interpret_reply = brw_interpret;
|
||||
|
||||
if (mem_tight != 0)
|
||||
|
|
Loading…
Reference in New Issue