OpenCloudOS-Kernel/fs/nfsd/nfs4state.c

4195 lines
109 KiB
C
Raw Normal View History

/*
* linux/fs/nfsd/nfs4state.c
*
* Copyright (c) 2001 The Regents of the University of Michigan.
* All rights reserved.
*
* Kendrick Smith <kmsmith@umich.edu>
* Andy Adamson <kandros@umich.edu>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <linux/param.h>
#include <linux/major.h>
#include <linux/slab.h>
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/cache.h>
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/workqueue.h>
#include <linux/smp_lock.h>
#include <linux/kthread.h>
#include <linux/nfs4.h>
#include <linux/nfsd/state.h>
#include <linux/nfsd/xdr4.h>
#include <linux/namei.h>
#include <linux/swap.h>
#include <linux/mutex.h>
#include <linux/lockd/bind.h>
#include <linux/module.h>
#include <linux/sunrpc/svcauth_gss.h>
#define NFSDDBG_FACILITY NFSDDBG_PROC
/* Globals */
static time_t lease_time = 90; /* default lease time */
static time_t user_lease_time = 90;
static time_t boot_time;
static u32 current_ownerid = 1;
static u32 current_fileid = 1;
static u32 current_delegid = 1;
static u32 nfs4_init;
static stateid_t zerostateid; /* bits all 0 */
static stateid_t onestateid; /* bits all 1 */
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
static u64 current_sessionid = 1;
#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
/* forward declarations */
static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
static void nfs4_set_recdir(char *recdir);
/* Locking: */
/* Currently used for almost all code touching nfsv4 state: */
static DEFINE_MUTEX(client_mutex);
/*
* Currently used for the del_recall_lru and file hash table. In an
* effort to decrease the scope of the client_mutex, this spinlock may
* eventually cover more:
*/
static DEFINE_SPINLOCK(recall_lock);
static struct kmem_cache *stateowner_slab = NULL;
static struct kmem_cache *file_slab = NULL;
static struct kmem_cache *stateid_slab = NULL;
static struct kmem_cache *deleg_slab = NULL;
void
nfs4_lock_state(void)
{
mutex_lock(&client_mutex);
}
void
nfs4_unlock_state(void)
{
mutex_unlock(&client_mutex);
}
static inline u32
opaque_hashval(const void *ptr, int nbytes)
{
unsigned char *cptr = (unsigned char *) ptr;
u32 x = 0;
while (nbytes--) {
x *= 37;
x += *cptr++;
}
return x;
}
static struct list_head del_recall_lru;
static inline void
put_nfs4_file(struct nfs4_file *fi)
{
if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
list_del(&fi->fi_hash);
spin_unlock(&recall_lock);
iput(fi->fi_inode);
kmem_cache_free(file_slab, fi);
}
}
static inline void
get_nfs4_file(struct nfs4_file *fi)
{
atomic_inc(&fi->fi_ref);
}
static int num_delegations;
unsigned int max_delegations;
/*
* Open owner state (share locks)
*/
/* hash tables for nfs4_stateowner */
#define OWNER_HASH_BITS 8
#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
#define ownerid_hashval(id) \
((id) & OWNER_HASH_MASK)
#define ownerstr_hashval(clientid, ownername) \
(((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
/* hash table for nfs4_file */
#define FILE_HASH_BITS 8
#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
/* hash table for (open)nfs4_stateid */
#define STATEID_HASH_BITS 10
#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
#define file_hashval(x) \
hash_ptr(x, FILE_HASH_BITS)
#define stateid_hashval(owner_id, file_id) \
(((owner_id) + (file_id)) & STATEID_HASH_MASK)
static struct list_head file_hashtbl[FILE_HASH_SIZE];
static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
{
struct nfs4_delegation *dp;
struct nfs4_file *fp = stp->st_file;
struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
dprintk("NFSD alloc_init_deleg\n");
if (fp->fi_had_conflict)
return NULL;
if (num_delegations > max_delegations)
return NULL;
dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
if (dp == NULL)
return dp;
num_delegations++;
INIT_LIST_HEAD(&dp->dl_perfile);
INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru);
dp->dl_client = clp;
get_nfs4_file(fp);
dp->dl_file = fp;
dp->dl_flock = NULL;
get_file(stp->st_vfs_file);
dp->dl_vfs_file = stp->st_vfs_file;
dp->dl_type = type;
dp->dl_recall.cbr_dp = NULL;
dp->dl_recall.cbr_ident = cb->cb_ident;
dp->dl_recall.cbr_trunc = 0;
dp->dl_stateid.si_boot = boot_time;
dp->dl_stateid.si_stateownerid = current_delegid++;
dp->dl_stateid.si_fileid = 0;
dp->dl_stateid.si_generation = 0;
fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
dp->dl_time = 0;
atomic_set(&dp->dl_count, 1);
list_add(&dp->dl_perfile, &fp->fi_delegations);
list_add(&dp->dl_perclnt, &clp->cl_delegations);
return dp;
}
void
nfs4_put_delegation(struct nfs4_delegation *dp)
{
if (atomic_dec_and_test(&dp->dl_count)) {
dprintk("NFSD: freeing dp %p\n",dp);
put_nfs4_file(dp->dl_file);
kmem_cache_free(deleg_slab, dp);
num_delegations--;
}
}
/* Remove the associated file_lock first, then remove the delegation.
* lease_modify() is called to remove the FS_LEASE file_lock from
* the i_flock list, eventually calling nfsd's lock_manager
* fl_release_callback.
*/
static void
nfs4_close_delegation(struct nfs4_delegation *dp)
{
struct file *filp = dp->dl_vfs_file;
dprintk("NFSD: close_delegation dp %p\n",dp);
dp->dl_vfs_file = NULL;
/* The following nfsd_close may not actually close the file,
* but we want to remove the lease in any case. */
if (dp->dl_flock)
vfs_setlease(filp, F_UNLCK, &dp->dl_flock);
nfsd_close(filp);
}
/* Called under the state lock. */
static void
unhash_delegation(struct nfs4_delegation *dp)
{
list_del_init(&dp->dl_perfile);
list_del_init(&dp->dl_perclnt);
spin_lock(&recall_lock);
list_del_init(&dp->dl_recall_lru);
spin_unlock(&recall_lock);
nfs4_close_delegation(dp);
nfs4_put_delegation(dp);
}
/*
* SETCLIENTID state
*/
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
#define clientid_hashval(id) \
((id) & CLIENT_HASH_MASK)
#define clientstr_hashval(name) \
(opaque_hashval((name), 8) & CLIENT_HASH_MASK)
/*
* reclaim_str_hashtbl[] holds known client info from previous reset/reboot
* used in reboot/reset lease grace period processing
*
* conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
* setclientid_confirmed info.
*
* unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed
* setclientid info.
*
* client_lru holds client queue ordered by nfs4_client.cl_time
* for lease renewal.
*
* close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
* for last close replay.
*/
static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE];
static int reclaim_str_hashtbl_size = 0;
static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE];
static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE];
static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE];
static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
static struct list_head client_lru;
static struct list_head close_lru;
static void unhash_generic_stateid(struct nfs4_stateid *stp)
{
list_del(&stp->st_hash);
list_del(&stp->st_perfile);
list_del(&stp->st_perstateowner);
}
static void free_generic_stateid(struct nfs4_stateid *stp)
{
put_nfs4_file(stp->st_file);
kmem_cache_free(stateid_slab, stp);
}
static void release_lock_stateid(struct nfs4_stateid *stp)
{
unhash_generic_stateid(stp);
locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner);
free_generic_stateid(stp);
}
static void unhash_lockowner(struct nfs4_stateowner *sop)
{
struct nfs4_stateid *stp;
list_del(&sop->so_idhash);
list_del(&sop->so_strhash);
list_del(&sop->so_perstateid);
while (!list_empty(&sop->so_stateids)) {
stp = list_first_entry(&sop->so_stateids,
struct nfs4_stateid, st_perstateowner);
release_lock_stateid(stp);
}
}
static void release_lockowner(struct nfs4_stateowner *sop)
{
unhash_lockowner(sop);
nfs4_put_stateowner(sop);
}
static void
release_stateid_lockowners(struct nfs4_stateid *open_stp)
{
struct nfs4_stateowner *lock_sop;
while (!list_empty(&open_stp->st_lockowners)) {
lock_sop = list_entry(open_stp->st_lockowners.next,
struct nfs4_stateowner, so_perstateid);
/* list_del(&open_stp->st_lockowners); */
BUG_ON(lock_sop->so_is_open_owner);
release_lockowner(lock_sop);
}
}
static void release_open_stateid(struct nfs4_stateid *stp)
{
unhash_generic_stateid(stp);
release_stateid_lockowners(stp);
nfsd_close(stp->st_vfs_file);
free_generic_stateid(stp);
}
static void unhash_openowner(struct nfs4_stateowner *sop)
{
struct nfs4_stateid *stp;
list_del(&sop->so_idhash);
list_del(&sop->so_strhash);
list_del(&sop->so_perclient);
list_del(&sop->so_perstateid); /* XXX: necessary? */
while (!list_empty(&sop->so_stateids)) {
stp = list_first_entry(&sop->so_stateids,
struct nfs4_stateid, st_perstateowner);
release_open_stateid(stp);
}
}
static void release_openowner(struct nfs4_stateowner *sop)
{
unhash_openowner(sop);
list_del(&sop->so_close_lru);
nfs4_put_stateowner(sop);
}
static DEFINE_SPINLOCK(sessionid_lock);
#define SESSION_HASH_SIZE 512
static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
static inline int
hash_sessionid(struct nfs4_sessionid *sessionid)
{
struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid;
return sid->sequence % SESSION_HASH_SIZE;
}
static inline void
dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
{
u32 *ptr = (u32 *)(&sessionid->data[0]);
dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
}
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
static void
gen_sessionid(struct nfsd4_session *ses)
{
struct nfs4_client *clp = ses->se_client;
struct nfsd4_sessionid *sid;
sid = (struct nfsd4_sessionid *)ses->se_sessionid.data;
sid->clientid = clp->cl_clientid;
sid->sequence = current_sessionid++;
sid->reserved = 0;
}
/*
* Give the client the number of slots it requests bound by
* NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages.
*
* If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we
* should (up to a point) re-negotiate active sessions and reduce their
* slot usage to make rooom for new connections. For now we just fail the
* create session.
*/
static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
{
int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
spin_lock(&nfsd_serv->sv_lock);
if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages)
np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used;
nfsd_serv->sv_drc_pages_used += np;
spin_unlock(&nfsd_serv->sv_lock);
if (np <= 0) {
status = nfserr_resource;
fchan->maxreqs = 0;
} else
fchan->maxreqs = np / NFSD_PAGES_PER_SLOT;
return status;
}
/*
* fchan holds the client values on input, and the server values on output
*/
static int init_forechannel_attrs(struct svc_rqst *rqstp,
struct nfsd4_session *session,
struct nfsd4_channel_attrs *fchan)
{
int status = 0;
__u32 maxcount = svc_max_payload(rqstp);
/* headerpadsz set to zero in encode routine */
/* Use the client's max request and max response size if possible */
if (fchan->maxreq_sz > maxcount)
fchan->maxreq_sz = maxcount;
session->se_fmaxreq_sz = fchan->maxreq_sz;
if (fchan->maxresp_sz > maxcount)
fchan->maxresp_sz = maxcount;
session->se_fmaxresp_sz = fchan->maxresp_sz;
/* Set the max response cached size our default which is
* a multiple of PAGE_SIZE and small */
session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
fchan->maxresp_cached = session->se_fmaxresp_cached;
/* Use the client's maxops if possible */
if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
session->se_fmaxops = fchan->maxops;
/* try to use the client requested number of slots */
if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
/* FIXME: Error means no more DRC pages so the server should
* recover pages from existing sessions. For now fail session
* creation.
*/
status = set_forechannel_maxreqs(fchan);
session->se_fnumslots = fchan->maxreqs;
return status;
}
static int
alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
struct nfsd4_create_session *cses)
{
struct nfsd4_session *new, tmp;
int idx, status = nfserr_resource, slotsize;
memset(&tmp, 0, sizeof(tmp));
/* FIXME: For now, we just accept the client back channel attributes. */
status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel);
if (status)
goto out;
/* allocate struct nfsd4_session and slot table in one piece */
slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot);
new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
if (!new)
goto out;
memcpy(new, &tmp, sizeof(*new));
new->se_client = clp;
gen_sessionid(new);
idx = hash_sessionid(&new->se_sessionid);
memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
new->se_flags = cses->flags;
kref_init(&new->se_ref);
spin_lock(&sessionid_lock);
list_add(&new->se_hash, &sessionid_hashtbl[idx]);
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&sessionid_lock);
status = nfs_ok;
out:
return status;
}
/* caller must hold sessionid_lock */
static struct nfsd4_session *
find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
{
struct nfsd4_session *elem;
int idx;
dump_sessionid(__func__, sessionid);
idx = hash_sessionid(sessionid);
dprintk("%s: idx is %d\n", __func__, idx);
/* Search in the appropriate list */
list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) {
dump_sessionid("list traversal", &elem->se_sessionid);
if (!memcmp(elem->se_sessionid.data, sessionid->data,
NFS4_MAX_SESSIONID_LEN)) {
return elem;
}
}
dprintk("%s: session not found\n", __func__);
return NULL;
}
/* caller must hold sessionid_lock */
static void
unhash_session(struct nfsd4_session *ses)
{
list_del(&ses->se_hash);
list_del(&ses->se_perclnt);
}
static void
release_session(struct nfsd4_session *ses)
{
spin_lock(&sessionid_lock);
unhash_session(ses);
spin_unlock(&sessionid_lock);
nfsd4_put_session(ses);
}
static void nfsd4_release_respages(struct page **respages, short resused);
void
free_session(struct kref *kref)
{
struct nfsd4_session *ses;
int i;
ses = container_of(kref, struct nfsd4_session, se_ref);
for (i = 0; i < ses->se_fnumslots; i++) {
struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
nfsd4_release_respages(e->ce_respages, e->ce_resused);
}
kfree(ses->se_slots);
kfree(ses);
}
static inline void
renew_client(struct nfs4_client *clp)
{
/*
* Move client to the end to the LRU list.
*/
dprintk("renewing client (clientid %08x/%08x)\n",
clp->cl_clientid.cl_boot,
clp->cl_clientid.cl_id);
list_move_tail(&clp->cl_lru, &client_lru);
clp->cl_time = get_seconds();
}
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
static int
STALE_CLIENTID(clientid_t *clid)
{
if (clid->cl_boot == boot_time)
return 0;
dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
clid->cl_boot, clid->cl_id, boot_time);
return 1;
}
/*
* XXX Should we use a slab cache ?
* This type of memory management is somewhat inefficient, but we use it
* anyway since SETCLIENTID is not a common operation.
*/
static struct nfs4_client *alloc_client(struct xdr_netobj name)
{
struct nfs4_client *clp;
clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
if (clp == NULL)
return NULL;
clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
if (clp->cl_name.data == NULL) {
kfree(clp);
return NULL;
}
memcpy(clp->cl_name.data, name.data, name.len);
clp->cl_name.len = name.len;
return clp;
}
static void
shutdown_callback_client(struct nfs4_client *clp)
{
struct rpc_clnt *clnt = clp->cl_callback.cb_client;
if (clnt) {
/*
* Callback threads take a reference on the client, so there
* should be no outstanding callbacks at this point.
*/
clp->cl_callback.cb_client = NULL;
rpc_shutdown_client(clnt);
}
}
static inline void
free_client(struct nfs4_client *clp)
{
shutdown_callback_client(clp);
nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages,
clp->cl_slot.sl_cache_entry.ce_resused);
if (clp->cl_cred.cr_group_info)
put_group_info(clp->cl_cred.cr_group_info);
kfree(clp->cl_principal);
kfree(clp->cl_name.data);
kfree(clp);
}
void
put_nfs4_client(struct nfs4_client *clp)
{
if (atomic_dec_and_test(&clp->cl_count))
free_client(clp);
}
static void
expire_client(struct nfs4_client *clp)
{
struct nfs4_stateowner *sop;
struct nfs4_delegation *dp;
struct list_head reaplist;
dprintk("NFSD: expire_client cl_count %d\n",
atomic_read(&clp->cl_count));
INIT_LIST_HEAD(&reaplist);
spin_lock(&recall_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
dp->dl_flock);
list_del_init(&dp->dl_perclnt);
list_move(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&recall_lock);
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
unhash_delegation(dp);
}
list_del(&clp->cl_idhash);
list_del(&clp->cl_strhash);
list_del(&clp->cl_lru);
while (!list_empty(&clp->cl_openowners)) {
sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
release_openowner(sop);
}
while (!list_empty(&clp->cl_sessions)) {
struct nfsd4_session *ses;
ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
se_perclnt);
release_session(ses);
}
put_nfs4_client(clp);
}
static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
{
struct nfs4_client *clp;
clp = alloc_client(name);
if (clp == NULL)
return NULL;
memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
atomic_set(&clp->cl_count, 1);
atomic_set(&clp->cl_callback.cb_set, 0);
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_strhash);
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
INIT_LIST_HEAD(&clp->cl_sessions);
INIT_LIST_HEAD(&clp->cl_lru);
return clp;
}
static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
{
memcpy(target->cl_verifier.data, source->data,
sizeof(target->cl_verifier.data));
}
static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
{
target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
target->cl_clientid.cl_id = source->cl_clientid.cl_id;
}
static void copy_cred(struct svc_cred *target, struct svc_cred *source)
{
target->cr_uid = source->cr_uid;
target->cr_gid = source->cr_gid;
target->cr_group_info = source->cr_group_info;
get_group_info(target->cr_group_info);
}
static int same_name(const char *n1, const char *n2)
{
return 0 == memcmp(n1, n2, HEXDIR_LEN);
}
static int
same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
{
return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
}
static int
same_clid(clientid_t *cl1, clientid_t *cl2)
{
return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
}
/* XXX what about NGROUP */
static int
same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
{
return cr1->cr_uid == cr2->cr_uid;
}
static void gen_clid(struct nfs4_client *clp)
{
static u32 current_clientid = 1;
clp->cl_clientid.cl_boot = boot_time;
clp->cl_clientid.cl_id = current_clientid++;
}
static void gen_confirm(struct nfs4_client *clp)
{
static u32 i;
u32 *p;
p = (u32 *)clp->cl_confirm.data;
*p++ = get_seconds();
*p++ = i++;
}
static int check_name(struct xdr_netobj name)
{
if (name.len == 0)
return 0;
if (name.len > NFS4_OPAQUE_LIMIT) {
dprintk("NFSD: check_name: name too long(%d)!\n", name.len);
return 0;
}
return 1;
}
static void
add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
{
unsigned int idhashval;
list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
idhashval = clientid_hashval(clp->cl_clientid.cl_id);
list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
list_add_tail(&clp->cl_lru, &client_lru);
clp->cl_time = get_seconds();
}
static void
move_to_confirmed(struct nfs4_client *clp)
{
unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
unsigned int strhashval;
dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
list_del_init(&clp->cl_strhash);
list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
strhashval = clientstr_hashval(clp->cl_recdir);
list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
renew_client(clp);
}
static struct nfs4_client *
find_confirmed_client(clientid_t *clid)
{
struct nfs4_client *clp;
unsigned int idhashval = clientid_hashval(clid->cl_id);
list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
if (same_clid(&clp->cl_clientid, clid))
return clp;
}
return NULL;
}
static struct nfs4_client *
find_unconfirmed_client(clientid_t *clid)
{
struct nfs4_client *clp;
unsigned int idhashval = clientid_hashval(clid->cl_id);
list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
if (same_clid(&clp->cl_clientid, clid))
return clp;
}
return NULL;
}
/*
* Return 1 iff clp's clientid establishment method matches the use_exchange_id
* parameter. Matching is based on the fact the at least one of the
* EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1
*
* FIXME: we need to unify the clientid namespaces for nfsv4.x
* and correctly deal with client upgrade/downgrade in EXCHANGE_ID
* and SET_CLIENTID{,_CONFIRM}
*/
static inline int
match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id)
{
bool has_exchange_flags = (clp->cl_exchange_flags != 0);
return use_exchange_id == has_exchange_flags;
}
static struct nfs4_client *
find_confirmed_client_by_str(const char *dname, unsigned int hashval,
bool use_exchange_id)
{
struct nfs4_client *clp;
list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
if (same_name(clp->cl_recdir, dname) &&
match_clientid_establishment(clp, use_exchange_id))
return clp;
}
return NULL;
}
static struct nfs4_client *
find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
bool use_exchange_id)
{
struct nfs4_client *clp;
list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
if (same_name(clp->cl_recdir, dname) &&
match_clientid_establishment(clp, use_exchange_id))
return clp;
}
return NULL;
}
/* a helper function for parse_callback */
static int
parse_octet(unsigned int *lenp, char **addrp)
{
unsigned int len = *lenp;
char *p = *addrp;
int n = -1;
char c;
for (;;) {
if (!len)
break;
len--;
c = *p++;
if (c == '.')
break;
if ((c < '0') || (c > '9')) {
n = -1;
break;
}
if (n < 0)
n = 0;
n = (n * 10) + (c - '0');
if (n > 255) {
n = -1;
break;
}
}
*lenp = len;
*addrp = p;
return n;
}
/* parse and set the setclientid ipv4 callback address */
static int
parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
{
int temp = 0;
u32 cbaddr = 0;
u16 cbport = 0;
u32 addrlen = addr_len;
char *addr = addr_val;
int i, shift;
/* ipaddress */
shift = 24;
for(i = 4; i > 0 ; i--) {
if ((temp = parse_octet(&addrlen, &addr)) < 0) {
return 0;
}
cbaddr |= (temp << shift);
if (shift > 0)
shift -= 8;
}
*cbaddrp = cbaddr;
/* port */
shift = 8;
for(i = 2; i > 0 ; i--) {
if ((temp = parse_octet(&addrlen, &addr)) < 0) {
return 0;
}
cbport |= (temp << shift);
if (shift > 0)
shift -= 8;
}
*cbportp = cbport;
return 1;
}
static void
gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
{
struct nfs4_callback *cb = &clp->cl_callback;
/* Currently, we only support tcp for the callback channel */
if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
goto out_err;
if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
&cb->cb_addr, &cb->cb_port)))
goto out_err;
cb->cb_prog = se->se_callback_prog;
cb->cb_ident = se->se_callback_ident;
return;
out_err:
dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
"will not receive delegations\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
return;
}
void
nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
resp->cstate.statp = statp;
}
/*
* Dereference the result pages.
*/
static void
nfsd4_release_respages(struct page **respages, short resused)
{
int i;
dprintk("--> %s\n", __func__);
for (i = 0; i < resused; i++) {
if (!respages[i])
continue;
put_page(respages[i]);
respages[i] = NULL;
}
}
static void
nfsd4_copy_pages(struct page **topages, struct page **frompages, short count)
{
int i;
for (i = 0; i < count; i++) {
topages[i] = frompages[i];
if (!topages[i])
continue;
get_page(topages[i]);
}
}
/*
* Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous
* pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total
* length of the XDR response is less than se_fmaxresp_cached
* (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a
* of the reply (e.g. readdir).
*
* Store the base and length of the rq_req.head[0] page
* of the NFSv4.1 data, just past the rpc header.
*/
void
nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
{
struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
struct svc_rqst *rqstp = resp->rqstp;
struct nfsd4_compoundargs *args = rqstp->rq_argp;
struct nfsd4_op *op = &args->ops[resp->opcnt];
struct kvec *resv = &rqstp->rq_res.head[0];
dprintk("--> %s entry %p\n", __func__, entry);
/* Don't cache a failed OP_SEQUENCE. */
if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
return;
nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
entry->ce_opcnt = resp->opcnt;
entry->ce_status = resp->cstate.status;
/*
* Don't need a page to cache just the sequence operation - the slot
* does this for us!
*/
if (nfsd4_not_cached(resp)) {
entry->ce_resused = 0;
entry->ce_rpchdrlen = 0;
dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__,
resp->cstate.slot->sl_cache_entry.ce_cachethis);
return;
}
entry->ce_resused = rqstp->rq_resused;
if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
entry->ce_resused);
entry->ce_datav.iov_base = resp->cstate.statp;
entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
(char *)page_address(rqstp->rq_respages[0]));
/* Current request rpc header length*/
entry->ce_rpchdrlen = (char *)resp->cstate.statp -
(char *)page_address(rqstp->rq_respages[0]);
}
/*
* We keep the rpc header, but take the nfs reply from the replycache.
*/
static int
nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
struct nfsd4_cache_entry *entry)
{
struct svc_rqst *rqstp = resp->rqstp;
struct kvec *resv = &resp->rqstp->rq_res.head[0];
int len;
/* Current request rpc header length*/
len = (char *)resp->cstate.statp -
(char *)page_address(rqstp->rq_respages[0]);
if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
entry->ce_datav.iov_len);
return 0;
}
/* copy the cached reply nfsd data past the current rpc header */
memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
entry->ce_datav.iov_len);
resv->iov_len = len + entry->ce_datav.iov_len;
return 1;
}
/*
* Keep the first page of the replay. Copy the NFSv4.1 data from the first
* cached page. Replace any futher replay pages from the cache.
*/
__be32
nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
struct nfsd4_sequence *seq)
{
struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
__be32 status;
dprintk("--> %s entry %p\n", __func__, entry);
/*
* If this is just the sequence operation, we did not keep
* a page in the cache entry because we can just use the
* slot info stored in struct nfsd4_sequence that was checked
* against the slot in nfsd4_sequence().
*
* This occurs when seq->cachethis is FALSE, or when the client
* session inactivity timer fires and a solo sequence operation
* is sent (lease renewal).
*/
if (seq && nfsd4_not_cached(resp)) {
seq->maxslots = resp->cstate.session->se_fnumslots;
return nfs_ok;
}
if (!nfsd41_copy_replay_data(resp, entry)) {
/*
* Not enough room to use the replay rpc header, send the
* cached header. Release all the allocated result pages.
*/
svc_free_res_pages(resp->rqstp);
nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
entry->ce_resused);
} else {
/* Release all but the first allocated result page */
resp->rqstp->rq_resused--;
svc_free_res_pages(resp->rqstp);
nfsd4_copy_pages(&resp->rqstp->rq_respages[1],
&entry->ce_respages[1],
entry->ce_resused - 1);
}
resp->rqstp->rq_resused = entry->ce_resused;
resp->opcnt = entry->ce_opcnt;
resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen;
status = entry->ce_status;
return status;
}
nfsd41: exchange_id operation Implement the exchange_id operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-28 Based on the client provided name, hash a client id. If a confirmed one is found, compare the op's creds and verifier. If the creds match and the verifier is different then expire the old client (client re-incarnated), otherwise, if both match, assume it's a replay and ignore it. If an unconfirmed client is found, then copy the new creds and verifer if need update, otherwise assume replay. The client is moved to a confirmed state on create_session. In the nfs41 branch set the exchange_id flags to EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_SUPP_MOVED_REFER (pNFS is not supported, Referrals are supported, Migration is not.). Address various scenarios from section 18.35 of the spec: 1. Check for EXCHGID4_FLAG_UPD_CONFIRMED_REC_A and set EXCHGID4_FLAG_CONFIRMED_R as appropriate. 2. Return error codes per 18.35.4 scenarios. 3. Update client records or generate new client ids depending on scenario. Note: 18.35.4 case 3 probably still needs revisiting. The handling seems not quite right. Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Andy Adamosn <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: use utsname for major_id (and copy to server_scope)] [nfsd41: fix handling of various exchange id scenarios] Signed-off-by: Mike Sager <sager@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: reverse use of EXCHGID4_INVAL_FLAG_MASK_A] [simplify nfsd4_encode_exchange_id error handling] [nfsd41: embed an xdr_netobj in nfsd4_exchange_id] [nfsd41: return nfserr_serverfault for spa_how == SP4_MACH_CRED] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:01 +08:00
/*
* Set the exchange_id flags returned by the server.
*/
static void
nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
{
/* pNFS is not supported */
new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
/* Referrals are supported, Migration is not. */
new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
/* set the wire flags to return to client. */
clid->flags = new->cl_exchange_flags;
}
__be32
nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
struct nfsd4_exchange_id *exid)
{
nfsd41: exchange_id operation Implement the exchange_id operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-28 Based on the client provided name, hash a client id. If a confirmed one is found, compare the op's creds and verifier. If the creds match and the verifier is different then expire the old client (client re-incarnated), otherwise, if both match, assume it's a replay and ignore it. If an unconfirmed client is found, then copy the new creds and verifer if need update, otherwise assume replay. The client is moved to a confirmed state on create_session. In the nfs41 branch set the exchange_id flags to EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_SUPP_MOVED_REFER (pNFS is not supported, Referrals are supported, Migration is not.). Address various scenarios from section 18.35 of the spec: 1. Check for EXCHGID4_FLAG_UPD_CONFIRMED_REC_A and set EXCHGID4_FLAG_CONFIRMED_R as appropriate. 2. Return error codes per 18.35.4 scenarios. 3. Update client records or generate new client ids depending on scenario. Note: 18.35.4 case 3 probably still needs revisiting. The handling seems not quite right. Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Andy Adamosn <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: use utsname for major_id (and copy to server_scope)] [nfsd41: fix handling of various exchange id scenarios] Signed-off-by: Mike Sager <sager@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: reverse use of EXCHGID4_INVAL_FLAG_MASK_A] [simplify nfsd4_encode_exchange_id error handling] [nfsd41: embed an xdr_netobj in nfsd4_exchange_id] [nfsd41: return nfserr_serverfault for spa_how == SP4_MACH_CRED] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:01 +08:00
struct nfs4_client *unconf, *conf, *new;
int status;
unsigned int strhashval;
char dname[HEXDIR_LEN];
nfs4_verifier verf = exid->verifier;
u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
" ip_addr=%u flags %x, spa_how %d\n",
__func__, rqstp, exid, exid->clname.len, exid->clname.data,
ip_addr, exid->flags, exid->spa_how);
if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
return nfserr_inval;
/* Currently only support SP4_NONE */
switch (exid->spa_how) {
case SP4_NONE:
break;
case SP4_SSV:
return nfserr_encr_alg_unsupp;
default:
BUG(); /* checked by xdr code */
case SP4_MACH_CRED:
return nfserr_serverfault; /* no excuse :-/ */
}
status = nfs4_make_rec_clidname(dname, &exid->clname);
if (status)
goto error;
strhashval = clientstr_hashval(dname);
nfs4_lock_state();
status = nfs_ok;
conf = find_confirmed_client_by_str(dname, strhashval, true);
nfsd41: exchange_id operation Implement the exchange_id operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-28 Based on the client provided name, hash a client id. If a confirmed one is found, compare the op's creds and verifier. If the creds match and the verifier is different then expire the old client (client re-incarnated), otherwise, if both match, assume it's a replay and ignore it. If an unconfirmed client is found, then copy the new creds and verifer if need update, otherwise assume replay. The client is moved to a confirmed state on create_session. In the nfs41 branch set the exchange_id flags to EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_SUPP_MOVED_REFER (pNFS is not supported, Referrals are supported, Migration is not.). Address various scenarios from section 18.35 of the spec: 1. Check for EXCHGID4_FLAG_UPD_CONFIRMED_REC_A and set EXCHGID4_FLAG_CONFIRMED_R as appropriate. 2. Return error codes per 18.35.4 scenarios. 3. Update client records or generate new client ids depending on scenario. Note: 18.35.4 case 3 probably still needs revisiting. The handling seems not quite right. Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Andy Adamosn <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: use utsname for major_id (and copy to server_scope)] [nfsd41: fix handling of various exchange id scenarios] Signed-off-by: Mike Sager <sager@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: reverse use of EXCHGID4_INVAL_FLAG_MASK_A] [simplify nfsd4_encode_exchange_id error handling] [nfsd41: embed an xdr_netobj in nfsd4_exchange_id] [nfsd41: return nfserr_serverfault for spa_how == SP4_MACH_CRED] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:01 +08:00
if (conf) {
if (!same_verf(&verf, &conf->cl_verifier)) {
/* 18.35.4 case 8 */
if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
status = nfserr_not_same;
goto out;
}
/* Client reboot: destroy old state */
expire_client(conf);
goto out_new;
}
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
/* 18.35.4 case 9 */
if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
status = nfserr_perm;
goto out;
}
expire_client(conf);
goto out_new;
}
if (ip_addr != conf->cl_addr &&
!(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) {
/* Client collision. 18.35.4 case 3 */
status = nfserr_clid_inuse;
goto out;
}
/*
* Set bit when the owner id and verifier map to an already
* confirmed client id (18.35.3).
*/
exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
/*
* Falling into 18.35.4 case 2, possible router replay.
* Leave confirmed record intact and return same result.
*/
copy_verf(conf, &verf);
new = conf;
goto out_copy;
} else {
/* 18.35.4 case 7 */
if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) {
status = nfserr_noent;
goto out;
}
}
unconf = find_unconfirmed_client_by_str(dname, strhashval, true);
nfsd41: exchange_id operation Implement the exchange_id operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-28 Based on the client provided name, hash a client id. If a confirmed one is found, compare the op's creds and verifier. If the creds match and the verifier is different then expire the old client (client re-incarnated), otherwise, if both match, assume it's a replay and ignore it. If an unconfirmed client is found, then copy the new creds and verifer if need update, otherwise assume replay. The client is moved to a confirmed state on create_session. In the nfs41 branch set the exchange_id flags to EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_SUPP_MOVED_REFER (pNFS is not supported, Referrals are supported, Migration is not.). Address various scenarios from section 18.35 of the spec: 1. Check for EXCHGID4_FLAG_UPD_CONFIRMED_REC_A and set EXCHGID4_FLAG_CONFIRMED_R as appropriate. 2. Return error codes per 18.35.4 scenarios. 3. Update client records or generate new client ids depending on scenario. Note: 18.35.4 case 3 probably still needs revisiting. The handling seems not quite right. Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Andy Adamosn <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: use utsname for major_id (and copy to server_scope)] [nfsd41: fix handling of various exchange id scenarios] Signed-off-by: Mike Sager <sager@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: reverse use of EXCHGID4_INVAL_FLAG_MASK_A] [simplify nfsd4_encode_exchange_id error handling] [nfsd41: embed an xdr_netobj in nfsd4_exchange_id] [nfsd41: return nfserr_serverfault for spa_how == SP4_MACH_CRED] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:01 +08:00
if (unconf) {
/*
* Possible retry or client restart. Per 18.35.4 case 4,
* a new unconfirmed record should be generated regardless
* of whether any properties have changed.
*/
expire_client(unconf);
}
out_new:
/* Normal case */
new = create_client(exid->clname, dname);
if (new == NULL) {
status = nfserr_resource;
goto out;
}
copy_verf(new, &verf);
copy_cred(&new->cl_cred, &rqstp->rq_cred);
new->cl_addr = ip_addr;
gen_clid(new);
gen_confirm(new);
add_to_unconfirmed(new, strhashval);
out_copy:
exid->clientid.cl_boot = new->cl_clientid.cl_boot;
exid->clientid.cl_id = new->cl_clientid.cl_id;
new->cl_slot.sl_seqid = 0;
exid->seqid = 1;
nfsd41: exchange_id operation Implement the exchange_id operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-28 Based on the client provided name, hash a client id. If a confirmed one is found, compare the op's creds and verifier. If the creds match and the verifier is different then expire the old client (client re-incarnated), otherwise, if both match, assume it's a replay and ignore it. If an unconfirmed client is found, then copy the new creds and verifer if need update, otherwise assume replay. The client is moved to a confirmed state on create_session. In the nfs41 branch set the exchange_id flags to EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_SUPP_MOVED_REFER (pNFS is not supported, Referrals are supported, Migration is not.). Address various scenarios from section 18.35 of the spec: 1. Check for EXCHGID4_FLAG_UPD_CONFIRMED_REC_A and set EXCHGID4_FLAG_CONFIRMED_R as appropriate. 2. Return error codes per 18.35.4 scenarios. 3. Update client records or generate new client ids depending on scenario. Note: 18.35.4 case 3 probably still needs revisiting. The handling seems not quite right. Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Andy Adamosn <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: use utsname for major_id (and copy to server_scope)] [nfsd41: fix handling of various exchange id scenarios] Signed-off-by: Mike Sager <sager@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: reverse use of EXCHGID4_INVAL_FLAG_MASK_A] [simplify nfsd4_encode_exchange_id error handling] [nfsd41: embed an xdr_netobj in nfsd4_exchange_id] [nfsd41: return nfserr_serverfault for spa_how == SP4_MACH_CRED] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:01 +08:00
nfsd4_set_ex_flags(new, exid);
dprintk("nfsd4_exchange_id seqid %d flags %x\n",
new->cl_slot.sl_seqid, new->cl_exchange_flags);
nfsd41: exchange_id operation Implement the exchange_id operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-28 Based on the client provided name, hash a client id. If a confirmed one is found, compare the op's creds and verifier. If the creds match and the verifier is different then expire the old client (client re-incarnated), otherwise, if both match, assume it's a replay and ignore it. If an unconfirmed client is found, then copy the new creds and verifer if need update, otherwise assume replay. The client is moved to a confirmed state on create_session. In the nfs41 branch set the exchange_id flags to EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_SUPP_MOVED_REFER (pNFS is not supported, Referrals are supported, Migration is not.). Address various scenarios from section 18.35 of the spec: 1. Check for EXCHGID4_FLAG_UPD_CONFIRMED_REC_A and set EXCHGID4_FLAG_CONFIRMED_R as appropriate. 2. Return error codes per 18.35.4 scenarios. 3. Update client records or generate new client ids depending on scenario. Note: 18.35.4 case 3 probably still needs revisiting. The handling seems not quite right. Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Andy Adamosn <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: use utsname for major_id (and copy to server_scope)] [nfsd41: fix handling of various exchange id scenarios] Signed-off-by: Mike Sager <sager@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41: reverse use of EXCHGID4_INVAL_FLAG_MASK_A] [simplify nfsd4_encode_exchange_id error handling] [nfsd41: embed an xdr_netobj in nfsd4_exchange_id] [nfsd41: return nfserr_serverfault for spa_how == SP4_MACH_CRED] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:01 +08:00
status = nfs_ok;
out:
nfs4_unlock_state();
error:
dprintk("nfsd4_exchange_id returns %d\n", ntohl(status));
return status;
}
static int
check_slot_seqid(u32 seqid, struct nfsd4_slot *slot)
{
dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid,
slot->sl_seqid);
/* The slot is in use, and no response has been sent. */
if (slot->sl_inuse) {
if (seqid == slot->sl_seqid)
return nfserr_jukebox;
else
return nfserr_seq_misordered;
}
/* Normal */
if (likely(seqid == slot->sl_seqid + 1))
return nfs_ok;
/* Replay */
if (seqid == slot->sl_seqid)
return nfserr_replay_cache;
/* Wraparound */
if (seqid == 1 && (slot->sl_seqid + 1) == 0)
return nfs_ok;
/* Misordered replay or misordered new request */
return nfserr_seq_misordered;
}
__be32
nfsd4_create_session(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
struct nfsd4_create_session *cr_ses)
{
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
struct nfs4_client *conf, *unconf;
struct nfsd4_slot *slot = NULL;
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
int status = 0;
nfs4_lock_state();
unconf = find_unconfirmed_client(&cr_ses->clientid);
conf = find_confirmed_client(&cr_ses->clientid);
if (conf) {
slot = &conf->cl_slot;
status = check_slot_seqid(cr_ses->seqid, slot);
if (status == nfserr_replay_cache) {
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
dprintk("Got a create_session replay! seqid= %d\n",
slot->sl_seqid);
cstate->slot = slot;
cstate->status = status;
/* Return the cached reply status */
status = nfsd4_replay_cache_entry(resp, NULL);
goto out;
} else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) {
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
status = nfserr_seq_misordered;
dprintk("Sequence misordered!\n");
dprintk("Expected seqid= %d but got seqid= %d\n",
slot->sl_seqid, cr_ses->seqid);
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
goto out;
}
conf->cl_slot.sl_seqid++;
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
} else if (unconf) {
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
(ip_addr != unconf->cl_addr)) {
status = nfserr_clid_inuse;
goto out;
}
slot = &unconf->cl_slot;
status = check_slot_seqid(cr_ses->seqid, slot);
if (status) {
/* an unconfirmed replay returns misordered */
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
status = nfserr_seq_misordered;
goto out;
}
slot->sl_seqid++; /* from 0 to 1 */
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
move_to_confirmed(unconf);
/*
* We do not support RDMA or persistent sessions
*/
cr_ses->flags &= ~SESSION4_PERSIST;
cr_ses->flags &= ~SESSION4_RDMA;
conf = unconf;
} else {
status = nfserr_stale_clientid;
goto out;
}
status = alloc_init_session(rqstp, conf, cr_ses);
if (status)
goto out;
memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
cr_ses->seqid = slot->sl_seqid;
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
slot->sl_inuse = true;
cstate->slot = slot;
/* Ensure a page is used for the cache */
slot->sl_cache_entry.ce_cachethis = 1;
nfsd41: create_session operation Implement the create_session operation confoming to http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-26 Look up the client id (generated by the server on exchange_id, given by the client on create_session). If neither a confirmed or unconfirmed client is found then the client id is stale If a confirmed cilent is found (i.e. we already received create_session for it) then compare the sequence id to determine if it's a replay or possibly a mis-ordered rpc. If the seqid is in order, update the confirmed client seqid and procedd with updating the session parameters. If an unconfirmed client_id is found then verify the creds and seqid. If both match move the client id to confirmed state and proceed with processing the create_session. Currently, we do not support persistent sessions, and RDMA. alloc_init_session generates a new sessionid and creates a session structure. NFSD_PAGES_PER_SLOT is used for the max response cached calculation, and for the counting of DRC pages using the hard limits set in struct srv_serv. A note on NFSD_PAGES_PER_SLOT: Other patches in this series allow for NFSD_PAGES_PER_SLOT + 1 pages to be cached in a DRC slot when the response size is less than NFSD_PAGES_PER_SLOT * PAGE_SIZE but xdr_buf pages are used. e.g. a READDIR operation will encode a small amount of data in the xdr_buf head, and then the READDIR in the xdr_buf pages. So, the hard limit calculation use of pages by a session is underestimated by the number of cached operations using the xdr_buf pages. Yet another patch caches no pages for the solo sequence operation, or any compound where cache_this is False. So the hard limit calculation use of pages by a session is overestimated by the number of these operations in the cache. TODO: improve resource pre-allocation and negotiate session parameters accordingly. Respect and possibly adjust backchannel attributes. Signed-off-by: Marc Eshel <eshel@almaden.ibm.com> Signed-off-by: Dean Hildebrand <dhildeb@us.ibm.com> [nfsd41: remove headerpadsz from channel attributes] Our client and server only support a headerpadsz of 0. [nfsd41: use DRC limits in fore channel init] [nfsd41: do not change CREATE_SESSION back channel attrs] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [use sessionid_lock spin lock] [nfsd41: use bool inuse for slot state] Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfsd41 remove sl_session from alloc_init_session] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [simplify nfsd4_encode_create_session error handling] [nfsd41: fix comment style in init_forechannel_attrs] [nfsd41: allocate struct nfsd4_session and slot table in one piece] [nfsd41: no need to INIT_LIST_HEAD in alloc_init_session just prior to list_add] Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-04-03 13:28:28 +08:00
out:
nfs4_unlock_state();
dprintk("%s returns %d\n", __func__, ntohl(status));
return status;
}
__be32
nfsd4_destroy_session(struct svc_rqst *r,
struct nfsd4_compound_state *cstate,
struct nfsd4_destroy_session *sessionid)
{
struct nfsd4_session *ses;
u32 status = nfserr_badsession;
/* Notes:
* - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid
* - Should we return nfserr_back_chan_busy if waiting for
* callbacks on to-be-destroyed session?
* - Do we need to clear any callback info from previous session?
*/
dump_sessionid(__func__, &sessionid->sessionid);
spin_lock(&sessionid_lock);
ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
if (!ses) {
spin_unlock(&sessionid_lock);
goto out;
}
unhash_session(ses);
spin_unlock(&sessionid_lock);
/* wait for callbacks */
shutdown_callback_client(ses->se_client);
nfsd4_put_session(ses);
status = nfs_ok;
out:
dprintk("%s returns %d\n", __func__, ntohl(status));
return status;
}
__be32
nfsd4_sequence(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
struct nfsd4_sequence *seq)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfsd4_session *session;
struct nfsd4_slot *slot;
int status;
if (resp->opcnt != 1)
return nfserr_sequence_pos;
spin_lock(&sessionid_lock);
status = nfserr_badsession;
session = find_in_sessionid_hashtbl(&seq->sessionid);
if (!session)
goto out;
status = nfserr_badslot;
if (seq->slotid >= session->se_fnumslots)
goto out;
slot = &session->se_slots[seq->slotid];
dprintk("%s: slotid %d\n", __func__, seq->slotid);
status = check_slot_seqid(seq->seqid, slot);
if (status == nfserr_replay_cache) {
cstate->slot = slot;
cstate->session = session;
/* Return the cached reply status and set cstate->status
* for nfsd4_svc_encode_compoundres processing */
status = nfsd4_replay_cache_entry(resp, seq);
cstate->status = nfserr_replay_cache;
goto replay_cache;
}
if (status)
goto out;
/* Success! bump slot seqid */
slot->sl_inuse = true;
slot->sl_seqid = seq->seqid;
slot->sl_cache_entry.ce_cachethis = seq->cachethis;
/* Always set the cache entry cachethis for solo sequence */
if (nfsd4_is_solo_sequence(resp))
slot->sl_cache_entry.ce_cachethis = 1;
cstate->slot = slot;
cstate->session = session;
replay_cache:
/* Renew the clientid on success and on replay.
* Hold a session reference until done processing the compound:
* nfsd4_put_session called only if the cstate slot is set.
*/
renew_client(session->se_client);
nfsd4_get_session(session);
out:
spin_unlock(&sessionid_lock);
dprintk("%s: return %d\n", __func__, ntohl(status));
return status;
}
__be32
nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid *setclid)
{
struct sockaddr_in *sin = svc_addr_in(rqstp);
struct xdr_netobj clname = {
.len = setclid->se_namelen,
.data = setclid->se_name,
};
nfs4_verifier clverifier = setclid->se_verf;
unsigned int strhashval;
struct nfs4_client *conf, *unconf, *new;
__be32 status;
char *princ;
char dname[HEXDIR_LEN];
if (!check_name(clname))
return nfserr_inval;
status = nfs4_make_rec_clidname(dname, &clname);
if (status)
return status;
/*
* XXX The Duplicate Request Cache (DRC) has been checked (??)
* We get here on a DRC miss.
*/
strhashval = clientstr_hashval(dname);
nfs4_lock_state();
conf = find_confirmed_client_by_str(dname, strhashval, false);
if (conf) {
/* RFC 3530 14.2.33 CASE 0: */
status = nfserr_clid_inuse;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
dprintk("NFSD: setclientid: string in use by client"
" at %pI4\n", &conf->cl_addr);
goto out;
}
}
/*
* section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION")
* has a description of SETCLIENTID request processing consisting
* of 5 bullet points, labeled as CASE0 - CASE4 below.
*/
unconf = find_unconfirmed_client_by_str(dname, strhashval, false);
status = nfserr_resource;
if (!conf) {
/*
* RFC 3530 14.2.33 CASE 4:
* placed first, because it is the normal case
*/
if (unconf)
expire_client(unconf);
new = create_client(clname, dname);
if (new == NULL)
goto out;
gen_clid(new);
} else if (same_verf(&conf->cl_verifier, &clverifier)) {
/*
* RFC 3530 14.2.33 CASE 1:
* probable callback update
*/
if (unconf) {
/* Note this is removing unconfirmed {*x***},
* which is stronger than RFC recommended {vxc**}.
* This has the advantage that there is at most
* one {*x***} in either list at any time.
*/
expire_client(unconf);
}
new = create_client(clname, dname);
if (new == NULL)
goto out;
copy_clid(new, conf);
} else if (!unconf) {
/*
* RFC 3530 14.2.33 CASE 2:
* probable client reboot; state will be removed if
* confirmed.
*/
new = create_client(clname, dname);
if (new == NULL)
goto out;
gen_clid(new);
} else {
/*
* RFC 3530 14.2.33 CASE 3:
* probable client reboot; state will be removed if
* confirmed.
*/
expire_client(unconf);
new = create_client(clname, dname);
if (new == NULL)
goto out;
gen_clid(new);
}
copy_verf(new, &clverifier);
new->cl_addr = sin->sin_addr.s_addr;
new->cl_flavor = rqstp->rq_flavor;
princ = svc_gss_principal(rqstp);
if (princ) {
new->cl_principal = kstrdup(princ, GFP_KERNEL);
if (new->cl_principal == NULL) {
free_client(new);
goto out;
}
}
copy_cred(&new->cl_cred, &rqstp->rq_cred);
gen_confirm(new);
gen_callback(new, setclid);
add_to_unconfirmed(new, strhashval);
setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
status = nfs_ok;
out:
nfs4_unlock_state();
return status;
}
/*
* Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has
* a description of SETCLIENTID_CONFIRM request processing consisting of 4
* bullets, labeled as CASE1 - CASE4 below.
*/
__be32
nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid_confirm *setclientid_confirm)
{
struct sockaddr_in *sin = svc_addr_in(rqstp);
struct nfs4_client *conf, *unconf;
nfs4_verifier confirm = setclientid_confirm->sc_confirm;
clientid_t * clid = &setclientid_confirm->sc_clientid;
__be32 status;
if (STALE_CLIENTID(clid))
return nfserr_stale_clientid;
/*
* XXX The Duplicate Request Cache (DRC) has been checked (??)
* We get here on a DRC miss.
*/
nfs4_lock_state();
conf = find_confirmed_client(clid);
unconf = find_unconfirmed_client(clid);
status = nfserr_clid_inuse;
if (conf && conf->cl_addr != sin->sin_addr.s_addr)
goto out;
if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
goto out;
/*
* section 14.2.34 of RFC 3530 has a description of
* SETCLIENTID_CONFIRM request processing consisting
* of 4 bullet points, labeled as CASE1 - CASE4 below.
*/
if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) {
/*
* RFC 3530 14.2.34 CASE 1:
* callback update
*/
if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
status = nfserr_clid_inuse;
else {
/* XXX: We just turn off callbacks until we can handle
* change request correctly. */
atomic_set(&conf->cl_callback.cb_set, 0);
gen_confirm(conf);
nfsd4_remove_clid_dir(unconf);
expire_client(unconf);
status = nfs_ok;
}
} else if (conf && !unconf) {
/*
* RFC 3530 14.2.34 CASE 2:
* probable retransmitted request; play it safe and
* do nothing.
*/
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
status = nfserr_clid_inuse;
else
status = nfs_ok;
} else if (!conf && unconf
&& same_verf(&unconf->cl_confirm, &confirm)) {
/*
* RFC 3530 14.2.34 CASE 3:
* Normal case; new or rebooted client:
*/
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
status = nfserr_clid_inuse;
} else {
unsigned int hash =
clientstr_hashval(unconf->cl_recdir);
conf = find_confirmed_client_by_str(unconf->cl_recdir,
hash, false);
if (conf) {
nfsd4_remove_clid_dir(conf);
expire_client(conf);
}
move_to_confirmed(unconf);
conf = unconf;
nfsd4_probe_callback(conf);
status = nfs_ok;
}
} else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
&& (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
&confirm)))) {
/*
* RFC 3530 14.2.34 CASE 4:
* Client probably hasn't noticed that we rebooted yet.
*/
status = nfserr_stale_clientid;
} else {
/* check that we have hit one of the cases...*/
status = nfserr_clid_inuse;
}
out:
nfs4_unlock_state();
return status;
}
/* OPEN Share state helper functions */
static inline struct nfs4_file *
alloc_init_file(struct inode *ino)
{
struct nfs4_file *fp;
unsigned int hashval = file_hashval(ino);
fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
if (fp) {
atomic_set(&fp->fi_ref, 1);
INIT_LIST_HEAD(&fp->fi_hash);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
spin_lock(&recall_lock);
list_add(&fp->fi_hash, &file_hashtbl[hashval]);
spin_unlock(&recall_lock);
fp->fi_inode = igrab(ino);
fp->fi_id = current_fileid++;
fp->fi_had_conflict = false;
return fp;
}
return NULL;
}
static void
nfsd4_free_slab(struct kmem_cache **slab)
{
if (*slab == NULL)
return;
kmem_cache_destroy(*slab);
*slab = NULL;
}
void
nfsd4_free_slabs(void)
{
nfsd4_free_slab(&stateowner_slab);
nfsd4_free_slab(&file_slab);
nfsd4_free_slab(&stateid_slab);
nfsd4_free_slab(&deleg_slab);
}
static int
nfsd4_init_slabs(void)
{
stateowner_slab = kmem_cache_create("nfsd4_stateowners",
sizeof(struct nfs4_stateowner), 0, 0, NULL);
if (stateowner_slab == NULL)
goto out_nomem;
file_slab = kmem_cache_create("nfsd4_files",
sizeof(struct nfs4_file), 0, 0, NULL);
if (file_slab == NULL)
goto out_nomem;
stateid_slab = kmem_cache_create("nfsd4_stateids",
sizeof(struct nfs4_stateid), 0, 0, NULL);
if (stateid_slab == NULL)
goto out_nomem;
deleg_slab = kmem_cache_create("nfsd4_delegations",
sizeof(struct nfs4_delegation), 0, 0, NULL);
if (deleg_slab == NULL)
goto out_nomem;
return 0;
out_nomem:
nfsd4_free_slabs();
dprintk("nfsd4: out of memory while initializing nfsv4\n");
return -ENOMEM;
}
void
nfs4_free_stateowner(struct kref *kref)
{
struct nfs4_stateowner *sop =
container_of(kref, struct nfs4_stateowner, so_ref);
kfree(sop->so_owner.data);
kmem_cache_free(stateowner_slab, sop);
}
static inline struct nfs4_stateowner *
alloc_stateowner(struct xdr_netobj *owner)
{
struct nfs4_stateowner *sop;
if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) {
if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
memcpy(sop->so_owner.data, owner->data, owner->len);
sop->so_owner.len = owner->len;
kref_init(&sop->so_ref);
return sop;
}
kmem_cache_free(stateowner_slab, sop);
}
return NULL;
}
static struct nfs4_stateowner *
alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
struct nfs4_stateowner *sop;
struct nfs4_replay *rp;
unsigned int idhashval;
if (!(sop = alloc_stateowner(&open->op_owner)))
return NULL;
idhashval = ownerid_hashval(current_ownerid);
INIT_LIST_HEAD(&sop->so_idhash);
INIT_LIST_HEAD(&sop->so_strhash);
INIT_LIST_HEAD(&sop->so_perclient);
INIT_LIST_HEAD(&sop->so_stateids);
INIT_LIST_HEAD(&sop->so_perstateid); /* not used */
INIT_LIST_HEAD(&sop->so_close_lru);
sop->so_time = 0;
list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
list_add(&sop->so_perclient, &clp->cl_openowners);
sop->so_is_open_owner = 1;
sop->so_id = current_ownerid++;
sop->so_client = clp;
sop->so_seqid = open->op_seqid;
sop->so_confirmed = 0;
rp = &sop->so_replay;
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
return sop;
}
static inline void
init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
struct nfs4_stateowner *sop = open->op_stateowner;
unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
INIT_LIST_HEAD(&stp->st_hash);
INIT_LIST_HEAD(&stp->st_perstateowner);
INIT_LIST_HEAD(&stp->st_lockowners);
INIT_LIST_HEAD(&stp->st_perfile);
list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
list_add(&stp->st_perstateowner, &sop->so_stateids);
list_add(&stp->st_perfile, &fp->fi_stateids);
stp->st_stateowner = sop;
get_nfs4_file(fp);
stp->st_file = fp;
stp->st_stateid.si_boot = boot_time;
stp->st_stateid.si_stateownerid = sop->so_id;
stp->st_stateid.si_fileid = fp->fi_id;
stp->st_stateid.si_generation = 0;
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
__set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK,
&stp->st_access_bmap);
__set_bit(open->op_share_deny, &stp->st_deny_bmap);
stp->st_openstp = NULL;
}
static void
move_to_close_lru(struct nfs4_stateowner *sop)
{
dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
list_move_tail(&sop->so_close_lru, &close_lru);
sop->so_time = get_seconds();
}
static int
same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
clientid_t *clid)
{
return (sop->so_owner.len == owner->len) &&
0 == memcmp(sop->so_owner.data, owner->data, owner->len) &&
(sop->so_client->cl_clientid.cl_id == clid->cl_id);
}
static struct nfs4_stateowner *
find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
{
struct nfs4_stateowner *so = NULL;
list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
if (same_owner_str(so, &open->op_owner, &open->op_clientid))
return so;
}
return NULL;
}
/* search file_hashtbl[] for file */
static struct nfs4_file *
find_file(struct inode *ino)
{
unsigned int hashval = file_hashval(ino);
struct nfs4_file *fp;
spin_lock(&recall_lock);
list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
if (fp->fi_inode == ino) {
get_nfs4_file(fp);
spin_unlock(&recall_lock);
return fp;
}
}
spin_unlock(&recall_lock);
return NULL;
}
static inline int access_valid(u32 x, u32 minorversion)
{
if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
return 0;
if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH)
return 0;
x &= ~NFS4_SHARE_ACCESS_MASK;
if (minorversion && x) {
if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL)
return 0;
if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED)
return 0;
x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK);
}
if (x)
return 0;
return 1;
}
static inline int deny_valid(u32 x)
{
/* Note: unlike access bits, deny bits may be zero. */
return x <= NFS4_SHARE_DENY_BOTH;
}
/*
* We store the NONE, READ, WRITE, and BOTH bits separately in the
* st_{access,deny}_bmap field of the stateid, in order to track not
* only what share bits are currently in force, but also what
* combinations of share bits previous opens have used. This allows us
* to enforce the recommendation of rfc 3530 14.2.19 that the server
* return an error if the client attempt to downgrade to a combination
* of share bits not explicable by closing some of its previous opens.
*
* XXX: This enforcement is actually incomplete, since we don't keep
* track of access/deny bit combinations; so, e.g., we allow:
*
* OPEN allow read, deny write
* OPEN allow both, deny none
* DOWNGRADE allow read, deny none
*
* which we should reject.
*/
static void
set_access(unsigned int *access, unsigned long bmap) {
int i;
*access = 0;
for (i = 1; i < 4; i++) {
if (test_bit(i, &bmap))
*access |= i;
}
}
static void
set_deny(unsigned int *deny, unsigned long bmap) {
int i;
*deny = 0;
for (i = 0; i < 4; i++) {
if (test_bit(i, &bmap))
*deny |= i ;
}
}
static int
test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
unsigned int access, deny;
set_access(&access, stp->st_access_bmap);
set_deny(&deny, stp->st_deny_bmap);
if ((access & open->op_share_deny) || (deny & open->op_share_access))
return 0;
return 1;
}
/*
* Called to check deny when READ with all zero stateid or
* WRITE with all zero or all one stateid
*/
static __be32
nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
{
struct inode *ino = current_fh->fh_dentry->d_inode;
struct nfs4_file *fp;
struct nfs4_stateid *stp;
__be32 ret;
dprintk("NFSD: nfs4_share_conflict\n");
fp = find_file(ino);
if (!fp)
return nfs_ok;
ret = nfserr_locked;
/* Search for conflicting share reservations */
list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
if (test_bit(deny_type, &stp->st_deny_bmap) ||
test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
goto out;
}
ret = nfs_ok;
out:
put_nfs4_file(fp);
return ret;
}
static inline void
nfs4_file_downgrade(struct file *filp, unsigned int share_access)
{
if (share_access & NFS4_SHARE_ACCESS_WRITE) {
drop_file_write_access(filp);
filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
}
}
/*
* Recall a delegation
*/
static int
do_recall(void *__dp)
{
struct nfs4_delegation *dp = __dp;
dp->dl_file->fi_had_conflict = true;
nfsd4_cb_recall(dp);
return 0;
}
/*
* Spawn a thread to perform a recall on the delegation represented
* by the lease (file_lock)
*
* Called from break_lease() with lock_kernel() held.
* Note: we assume break_lease will only call this *once* for any given
* lease.
*/
static
void nfsd_break_deleg_cb(struct file_lock *fl)
{
struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner;
struct task_struct *t;
dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
if (!dp)
return;
/* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the kernel
* lock) we know the server hasn't removed the lease yet, we know
* it's safe to take a reference: */
atomic_inc(&dp->dl_count);
atomic_inc(&dp->dl_client->cl_count);
spin_lock(&recall_lock);
list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
spin_unlock(&recall_lock);
/* only place dl_time is set. protected by lock_kernel*/
dp->dl_time = get_seconds();
/*
* We don't want the locks code to timeout the lease for us;
* we'll remove it ourself if the delegation isn't returned
* in time.
*/
fl->fl_break_time = 0;
t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
if (IS_ERR(t)) {
struct nfs4_client *clp = dp->dl_client;
printk(KERN_INFO "NFSD: Callback thread failed for "
"for client (clientid %08x/%08x)\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
put_nfs4_client(dp->dl_client);
nfs4_put_delegation(dp);
}
}
/*
* The file_lock is being reapd.
*
* Called by locks_free_lock() with lock_kernel() held.
*/
static
void nfsd_release_deleg_cb(struct file_lock *fl)
{
struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count));
if (!(fl->fl_flags & FL_LEASE) || !dp)
return;
dp->dl_flock = NULL;
}
/*
* Set the delegation file_lock back pointer.
*
* Called from setlease() with lock_kernel() held.
*/
static
void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
{
struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
if (!dp)
return;
dp->dl_flock = new;
}
/*
* Called from setlease() with lock_kernel() held
*/
static
int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
{
struct nfs4_delegation *onlistd =
(struct nfs4_delegation *)onlist->fl_owner;
struct nfs4_delegation *tryd =
(struct nfs4_delegation *)try->fl_owner;
if (onlist->fl_lmops != try->fl_lmops)
return 0;
return onlistd->dl_client == tryd->dl_client;
}
static
int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
{
if (arg & F_UNLCK)
return lease_modify(onlist, arg);
else
return -EAGAIN;
}
static struct lock_manager_operations nfsd_lease_mng_ops = {
.fl_break = nfsd_break_deleg_cb,
.fl_release_private = nfsd_release_deleg_cb,
.fl_copy_lock = nfsd_copy_lock_deleg_cb,
.fl_mylease = nfsd_same_client_deleg_cb,
.fl_change = nfsd_change_deleg_cb,
};
__be32
nfsd4_process_open1(struct nfsd4_compound_state *cstate,
struct nfsd4_open *open)
{
clientid_t *clientid = &open->op_clientid;
struct nfs4_client *clp = NULL;
unsigned int strhashval;
struct nfs4_stateowner *sop = NULL;
if (!check_name(open->op_owner))
return nfserr_inval;
if (STALE_CLIENTID(&open->op_clientid))
return nfserr_stale_clientid;
strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
sop = find_openstateowner_str(strhashval, open);
open->op_stateowner = sop;
if (!sop) {
/* Make sure the client's lease hasn't expired. */
clp = find_confirmed_client(clientid);
if (clp == NULL)
return nfserr_expired;
goto renew;
}
/* When sessions are used, skip open sequenceid processing */
if (nfsd4_has_session(cstate))
goto renew;
if (!sop->so_confirmed) {
/* Replace unconfirmed owners without checking for replay. */
clp = sop->so_client;
release_openowner(sop);
open->op_stateowner = NULL;
goto renew;
}
if (open->op_seqid == sop->so_seqid - 1) {
if (sop->so_replay.rp_buflen)
return nfserr_replay_me;
/* The original OPEN failed so spectacularly
* that we don't even have replay data saved!
* Therefore, we have no choice but to continue
* processing this OPEN; presumably, we'll
* fail again for the same reason.
*/
dprintk("nfsd4_process_open1: replay with no replay cache\n");
goto renew;
}
if (open->op_seqid != sop->so_seqid)
return nfserr_bad_seqid;
renew:
if (open->op_stateowner == NULL) {
sop = alloc_init_open_stateowner(strhashval, clp, open);
if (sop == NULL)
return nfserr_resource;
open->op_stateowner = sop;
}
list_del_init(&sop->so_close_lru);
renew_client(sop->so_client);
return nfs_ok;
}
static inline __be32
nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
{
if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
return nfserr_openmode;
else
return nfs_ok;
}
static struct nfs4_delegation *
find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
{
struct nfs4_delegation *dp;
list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
return dp;
}
return NULL;
}
static __be32
nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
struct nfs4_delegation **dp)
{
int flags;
__be32 status = nfserr_bad_stateid;
*dp = find_delegation_file(fp, &open->op_delegate_stateid);
if (*dp == NULL)
goto out;
flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
RD_STATE : WR_STATE;
status = nfs4_check_delegmode(*dp, flags);
if (status)
*dp = NULL;
out:
if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
return nfs_ok;
if (status)
return status;
open->op_stateowner->so_confirmed = 1;
return nfs_ok;
}
static __be32
nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
{
struct nfs4_stateid *local;
__be32 status = nfserr_share_denied;
struct nfs4_stateowner *sop = open->op_stateowner;
list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
/* ignore lock owners */
if (local->st_stateowner->so_is_open_owner == 0)
continue;
/* remember if we have seen this open owner */
if (local->st_stateowner == sop)
*stpp = local;
/* check for conflicting share reservations */
if (!test_share(local, open))
goto out;
}
status = 0;
out:
return status;
}
static inline struct nfs4_stateid *
nfs4_alloc_stateid(void)
{
return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
}
static __be32
nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
struct nfs4_delegation *dp,
struct svc_fh *cur_fh, int flags)
{
struct nfs4_stateid *stp;
stp = nfs4_alloc_stateid();
if (stp == NULL)
return nfserr_resource;
if (dp) {
get_file(dp->dl_vfs_file);
stp->st_vfs_file = dp->dl_vfs_file;
} else {
__be32 status;
status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
&stp->st_vfs_file);
if (status) {
if (status == nfserr_dropit)
status = nfserr_jukebox;
kmem_cache_free(stateid_slab, stp);
return status;
}
}
*stpp = stp;
return 0;
}
static inline __be32
nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
struct nfsd4_open *open)
{
struct iattr iattr = {
.ia_valid = ATTR_SIZE,
.ia_size = 0,
};
if (!open->op_truncate)
return 0;
if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
}
static __be32
nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
{
struct file *filp = stp->st_vfs_file;
struct inode *inode = filp->f_path.dentry->d_inode;
unsigned int share_access, new_writer;
__be32 status;
set_access(&share_access, stp->st_access_bmap);
new_writer = (~share_access) & open->op_share_access
& NFS4_SHARE_ACCESS_WRITE;
if (new_writer) {
int err = get_write_access(inode);
if (err)
return nfserrno(err);
nfsd: take file and mnt write in nfs4_upgrade_open testing with newpynfs revealed this warning: Jul 3 07:32:50 buml kernel: writeable file with no mnt_want_write() Jul 3 07:32:50 buml kernel: ------------[ cut here ]------------ Jul 3 07:32:50 buml kernel: WARNING: at /usr0/export/dev/bhalevy/git/linux-pnfs-bh-nfs41/include/linux/fs.h:855 drop_file_write_access+0x6b/0x7e() Jul 3 07:32:50 buml kernel: Modules linked in: nfsd auth_rpcgss exportfs nfs lockd nfs_acl sunrpc Jul 3 07:32:50 buml kernel: Call Trace: Jul 3 07:32:50 buml kernel: 6eaadc88: [<6002f471>] warn_on_slowpath+0x54/0x8e Jul 3 07:32:50 buml kernel: 6eaadcc8: [<601b790d>] printk+0xa0/0x793 Jul 3 07:32:50 buml kernel: 6eaadd38: [<601b6205>] __mutex_lock_slowpath+0x1db/0x1ea Jul 3 07:32:50 buml kernel: 6eaadd68: [<7107d4d5>] nfs4_preprocess_seqid_op+0x2a6/0x31c [nfsd] Jul 3 07:32:50 buml kernel: 6eaadda8: [<60078dc9>] drop_file_write_access+0x6b/0x7e Jul 3 07:32:50 buml kernel: 6eaaddc8: [<710804e4>] nfsd4_open_downgrade+0x114/0x1de [nfsd] Jul 3 07:32:50 buml kernel: 6eaade08: [<71076215>] nfsd4_proc_compound+0x1ba/0x2dc [nfsd] Jul 3 07:32:50 buml kernel: 6eaade48: [<71068221>] nfsd_dispatch+0xe5/0x1c2 [nfsd] Jul 3 07:32:50 buml kernel: 6eaade88: [<71312f81>] svc_process+0x3fd/0x714 [sunrpc] Jul 3 07:32:50 buml kernel: 6eaadea8: [<60039a81>] kernel_sigprocmask+0xf3/0x100 Jul 3 07:32:50 buml kernel: 6eaadee8: [<7106874b>] nfsd+0x182/0x29b [nfsd] Jul 3 07:32:50 buml kernel: 6eaadf48: [<60021cc9>] run_kernel_thread+0x41/0x4a Jul 3 07:32:50 buml kernel: 6eaadf58: [<710685c9>] nfsd+0x0/0x29b [nfsd] Jul 3 07:32:50 buml kernel: 6eaadf98: [<60021cb0>] run_kernel_thread+0x28/0x4a Jul 3 07:32:50 buml kernel: 6eaadfc8: [<60013829>] new_thread_handler+0x72/0x9c Jul 3 07:32:50 buml kernel: Jul 3 07:32:50 buml kernel: ---[ end trace 2426dd7cb2fba3bf ]--- Bruce Fields suggested this (Thanks!): maybe we need to be doing a mnt_want_write on open_upgrade and mnt_put_write on downgrade? This patch adds a call to mnt_want_write and file_take_write (which is doing the actual work). The counter-calls mnt_drop_write a file_release_write are now being properly called by drop_file_write_access in the exact path printed by the warning above. Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2008-07-04 20:38:41 +08:00
err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
if (err)
return nfserrno(err);
file_take_write(filp);
}
status = nfsd4_truncate(rqstp, cur_fh, open);
if (status) {
if (new_writer)
put_write_access(inode);
return status;
}
/* remember the open */
filp->f_mode |= open->op_share_access;
__set_bit(open->op_share_access, &stp->st_access_bmap);
__set_bit(open->op_share_deny, &stp->st_deny_bmap);
return nfs_ok;
}
static void
nfs4_set_claim_prev(struct nfsd4_open *open)
{
open->op_stateowner->so_confirmed = 1;
open->op_stateowner->so_client->cl_firststate = 1;
}
/*
* Attempt to hand out a delegation.
*/
static void
nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp)
{
struct nfs4_delegation *dp;
struct nfs4_stateowner *sop = stp->st_stateowner;
struct nfs4_callback *cb = &sop->so_client->cl_callback;
struct file_lock fl, *flp = &fl;
int status, flag = 0;
flag = NFS4_OPEN_DELEGATE_NONE;
open->op_recall = 0;
switch (open->op_claim_type) {
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!atomic_read(&cb->cb_set))
open->op_recall = 1;
flag = open->op_delegate_type;
if (flag == NFS4_OPEN_DELEGATE_NONE)
goto out;
break;
case NFS4_OPEN_CLAIM_NULL:
/* Let's not give out any delegations till everyone's
* had the chance to reclaim theirs.... */
if (locks_in_grace())
goto out;
if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
goto out;
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
flag = NFS4_OPEN_DELEGATE_WRITE;
else
flag = NFS4_OPEN_DELEGATE_READ;
break;
default:
goto out;
}
dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
if (dp == NULL) {
flag = NFS4_OPEN_DELEGATE_NONE;
goto out;
}
locks_init_lock(&fl);
fl.fl_lmops = &nfsd_lease_mng_ops;
fl.fl_flags = FL_LEASE;
fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
fl.fl_end = OFFSET_MAX;
fl.fl_owner = (fl_owner_t)dp;
fl.fl_file = stp->st_vfs_file;
fl.fl_pid = current->tgid;
/* vfs_setlease checks to see if delegation should be handed out.
* the lock_manager callbacks fl_mylease and fl_change are used
*/
if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) {
dprintk("NFSD: setlease failed [%d], no delegation\n", status);
unhash_delegation(dp);
flag = NFS4_OPEN_DELEGATE_NONE;
goto out;
}
memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
dp->dl_stateid.si_boot,
dp->dl_stateid.si_stateownerid,
dp->dl_stateid.si_fileid,
dp->dl_stateid.si_generation);
out:
if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
&& flag == NFS4_OPEN_DELEGATE_NONE
&& open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
dprintk("NFSD: WARNING: refusing delegation reclaim\n");
open->op_delegate_type = flag;
}
/*
* called with nfs4_lock_state() held.
*/
__be32
nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfs4_file *fp = NULL;
struct inode *ino = current_fh->fh_dentry->d_inode;
struct nfs4_stateid *stp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
status = nfserr_inval;
if (!access_valid(open->op_share_access, resp->cstate.minorversion)
|| !deny_valid(open->op_share_deny))
goto out;
/*
* Lookup file; if found, lookup stateid and check open request,
* and check for delegations in the process of being recalled.
* If not found, create the nfs4_file struct
*/
fp = find_file(ino);
if (fp) {
if ((status = nfs4_check_open(fp, open, &stp)))
goto out;
status = nfs4_check_deleg(fp, open, &dp);
if (status)
goto out;
} else {
status = nfserr_bad_stateid;
if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
goto out;
status = nfserr_resource;
fp = alloc_init_file(ino);
if (fp == NULL)
goto out;
}
/*
* OPEN the file, or upgrade an existing OPEN.
* If truncate fails, the OPEN fails.
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
status = nfs4_upgrade_open(rqstp, current_fh, stp, open);
if (status)
goto out;
update_stateid(&stp->st_stateid);
} else {
/* Stateid was not found, this is a new OPEN */
int flags = 0;
if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
flags |= NFSD_MAY_READ;
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
flags |= NFSD_MAY_WRITE;
status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
if (status)
goto out;
init_stateid(stp, fp, open);
status = nfsd4_truncate(rqstp, current_fh, open);
if (status) {
release_open_stateid(stp);
goto out;
}
if (nfsd4_has_session(&resp->cstate))
update_stateid(&stp->st_stateid);
}
memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
if (nfsd4_has_session(&resp->cstate))
open->op_stateowner->so_confirmed = 1;
/*
* Attempt to hand out a delegation. No error return, because the
* OPEN succeeds even if we fail.
*/
nfs4_open_delegation(current_fh, open, stp);
status = nfs_ok;
dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
out:
if (fp)
put_nfs4_file(fp);
if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
nfs4_set_claim_prev(open);
/*
* To finish the open response, we just need to set the rflags.
*/
open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
if (!open->op_stateowner->so_confirmed &&
!nfsd4_has_session(&resp->cstate))
open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
return status;
}
__be32
nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
clientid_t *clid)
{
struct nfs4_client *clp;
__be32 status;
nfs4_lock_state();
dprintk("process_renew(%08x/%08x): starting\n",
clid->cl_boot, clid->cl_id);
status = nfserr_stale_clientid;
if (STALE_CLIENTID(clid))
goto out;
clp = find_confirmed_client(clid);
status = nfserr_expired;
if (clp == NULL) {
/* We assume the client took too long to RENEW. */
dprintk("nfsd4_renew: clientid not found!\n");
goto out;
}
renew_client(clp);
status = nfserr_cb_path_down;
if (!list_empty(&clp->cl_delegations)
&& !atomic_read(&clp->cl_callback.cb_set))
goto out;
status = nfs_ok;
out:
nfs4_unlock_state();
return status;
}
struct lock_manager nfsd4_manager = {
};
static void
nfsd4_end_grace(void)
{
dprintk("NFSD: end of grace period\n");
nfsd4_recdir_purge_old();
locks_end_grace(&nfsd4_manager);
}
static time_t
nfs4_laundromat(void)
{
struct nfs4_client *clp;
struct nfs4_stateowner *sop;
struct nfs4_delegation *dp;
struct list_head *pos, *next, reaplist;
time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
time_t t, clientid_val = NFSD_LEASE_TIME;
time_t u, test_val = NFSD_LEASE_TIME;
nfs4_lock_state();
dprintk("NFSD: laundromat service - starting\n");
if (locks_in_grace())
nfsd4_end_grace();
list_for_each_safe(pos, next, &client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
t = clp->cl_time - cutoff;
if (clientid_val > t)
clientid_val = t;
break;
}
dprintk("NFSD: purging unused client (clientid %08x)\n",
clp->cl_clientid.cl_id);
nfsd4_remove_clid_dir(clp);
expire_client(clp);
}
INIT_LIST_HEAD(&reaplist);
spin_lock(&recall_lock);
list_for_each_safe(pos, next, &del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
u = dp->dl_time - cutoff;
if (test_val > u)
test_val = u;
break;
}
dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
dp, dp->dl_flock);
list_move(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&recall_lock);
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
unhash_delegation(dp);
}
test_val = NFSD_LEASE_TIME;
list_for_each_safe(pos, next, &close_lru) {
sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
u = sop->so_time - cutoff;
if (test_val > u)
test_val = u;
break;
}
dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
sop->so_id);
release_openowner(sop);
}
if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
nfs4_unlock_state();
return clientid_val;
}
static struct workqueue_struct *laundry_wq;
static void laundromat_main(struct work_struct *);
static DECLARE_DELAYED_WORK(laundromat_work, laundromat_main);
static void
laundromat_main(struct work_struct *not_used)
{
time_t t;
t = nfs4_laundromat();
dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
}
static struct nfs4_stateowner *
search_close_lru(u32 st_id, int flags)
{
struct nfs4_stateowner *local = NULL;
if (flags & CLOSE_STATE) {
list_for_each_entry(local, &close_lru, so_close_lru) {
if (local->so_id == st_id)
return local;
}
}
return NULL;
}
static inline int
nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
{
return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
}
static int
STALE_STATEID(stateid_t *stateid)
{
if (stateid->si_boot == boot_time)
return 0;
dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
stateid->si_generation);
return 1;
}
static inline int
access_permit_read(unsigned long access_bmap)
{
return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) ||
test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap);
}
static inline int
access_permit_write(unsigned long access_bmap)
{
return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
}
static
__be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
{
__be32 status = nfserr_openmode;
if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
goto out;
if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
goto out;
status = nfs_ok;
out:
return status;
}
static inline __be32
check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
{
if (ONE_STATEID(stateid) && (flags & RD_STATE))
return nfs_ok;
else if (locks_in_grace()) {
/* Answer in remaining cases depends on existance of
* conflicting state; so we must wait out the grace period. */
return nfserr_grace;
} else if (flags & WR_STATE)
return nfs4_share_conflict(current_fh,
NFS4_SHARE_DENY_WRITE);
else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
return nfs4_share_conflict(current_fh,
NFS4_SHARE_DENY_READ);
}
/*
* Allow READ/WRITE during grace period on recovered state only for files
* that are not able to provide mandatory locking.
*/
static inline int
grace_disallows_io(struct inode *inode)
{
return locks_in_grace() && mandatory_lock(inode);
}
static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags)
{
/*
* When sessions are used the stateid generation number is ignored
* when it is zero.
*/
if ((flags & HAS_SESSION) && in->si_generation == 0)
goto out;
/* If the client sends us a stateid from the future, it's buggy: */
if (in->si_generation > ref->si_generation)
return nfserr_bad_stateid;
/*
* The following, however, can happen. For example, if the
* client sends an open and some IO at the same time, the open
* may bump si_generation while the IO is still in flight.
* Thanks to hard links and renames, the client never knows what
* file an open will affect. So it could avoid that situation
* only by serializing all opens and IO from the same open
* owner. To recover from the old_stateid error, the client
* will just have to retry the IO:
*/
if (in->si_generation < ref->si_generation)
return nfserr_old_stateid;
out:
return nfs_ok;
}
static int is_delegation_stateid(stateid_t *stateid)
{
return stateid->si_fileid == 0;
}
/*
* Checks for stateid operations
*/
__be32
nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
stateid_t *stateid, int flags, struct file **filpp)
{
struct nfs4_stateid *stp = NULL;
struct nfs4_delegation *dp = NULL;
struct svc_fh *current_fh = &cstate->current_fh;
struct inode *ino = current_fh->fh_dentry->d_inode;
__be32 status;
if (filpp)
*filpp = NULL;
if (grace_disallows_io(ino))
return nfserr_grace;
if (nfsd4_has_session(cstate))
flags |= HAS_SESSION;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return check_special_stateids(current_fh, stateid, flags);
status = nfserr_stale_stateid;
if (STALE_STATEID(stateid))
goto out;
status = nfserr_bad_stateid;
if (is_delegation_stateid(stateid)) {
dp = find_delegation_stateid(ino, stateid);
if (!dp)
goto out;
status = check_stateid_generation(stateid, &dp->dl_stateid,
flags);
if (status)
goto out;
status = nfs4_check_delegmode(dp, flags);
if (status)
goto out;
renew_client(dp->dl_client);
if (filpp)
*filpp = dp->dl_vfs_file;
} else { /* open or lock stateid */
stp = find_stateid(stateid, flags);
if (!stp)
goto out;
if (nfs4_check_fh(current_fh, stp))
goto out;
if (!stp->st_stateowner->so_confirmed)
goto out;
status = check_stateid_generation(stateid, &stp->st_stateid,
flags);
if (status)
goto out;
status = nfs4_check_openmode(stp, flags);
if (status)
goto out;
renew_client(stp->st_stateowner->so_client);
if (filpp)
*filpp = stp->st_vfs_file;
}
status = nfs_ok;
out:
return status;
}
static inline int
setlkflg (int type)
{
return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
RD_STATE : WR_STATE;
}
/*
* Checks for sequence id mutating operations.
*/
static __be32
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
stateid_t *stateid, int flags,
struct nfs4_stateowner **sopp,
struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
{
struct nfs4_stateid *stp;
struct nfs4_stateowner *sop;
struct svc_fh *current_fh = &cstate->current_fh;
__be32 status;
dprintk("NFSD: preprocess_seqid_op: seqid=%d "
"stateid = (%08x/%08x/%08x/%08x)\n", seqid,
stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
stateid->si_generation);
*stpp = NULL;
*sopp = NULL;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
return nfserr_bad_stateid;
}
if (STALE_STATEID(stateid))
return nfserr_stale_stateid;
if (nfsd4_has_session(cstate))
flags |= HAS_SESSION;
/*
* We return BAD_STATEID if filehandle doesn't match stateid,
* the confirmed flag is incorrecly set, or the generation
* number is incorrect.
*/
stp = find_stateid(stateid, flags);
if (stp == NULL) {
/*
* Also, we should make sure this isn't just the result of
* a replayed close:
*/
sop = search_close_lru(stateid->si_stateownerid, flags);
if (sop == NULL)
return nfserr_bad_stateid;
*sopp = sop;
goto check_replay;
}
*stpp = stp;
*sopp = sop = stp->st_stateowner;
if (lock) {
clientid_t *lockclid = &lock->v.new.clientid;
struct nfs4_client *clp = sop->so_client;
int lkflg = 0;
__be32 status;
lkflg = setlkflg(lock->lk_type);
if (lock->lk_is_new) {
if (!sop->so_is_open_owner)
return nfserr_bad_stateid;
if (!(flags & HAS_SESSION) &&
!same_clid(&clp->cl_clientid, lockclid))
return nfserr_bad_stateid;
/* stp is the open stateid */
status = nfs4_check_openmode(stp, lkflg);
if (status)
return status;
} else {
/* stp is the lock stateid */
status = nfs4_check_openmode(stp->st_openstp, lkflg);
if (status)
return status;
}
}
if (nfs4_check_fh(current_fh, stp)) {
dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
return nfserr_bad_stateid;
}
/*
* We now validate the seqid and stateid generation numbers.
* For the moment, we ignore the possibility of
* generation number wraparound.
*/
if (!(flags & HAS_SESSION) && seqid != sop->so_seqid)
goto check_replay;
if (sop->so_confirmed && flags & CONFIRM) {
dprintk("NFSD: preprocess_seqid_op: expected"
" unconfirmed stateowner!\n");
return nfserr_bad_stateid;
}
if (!sop->so_confirmed && !(flags & CONFIRM)) {
dprintk("NFSD: preprocess_seqid_op: stateowner not"
" confirmed yet!\n");
return nfserr_bad_stateid;
}
status = check_stateid_generation(stateid, &stp->st_stateid, flags);
if (status)
return status;
renew_client(sop->so_client);
return nfs_ok;
check_replay:
[PATCH] nfsd4: fix open_reclaim seqid The sequence number we store in the sequence id is the last one we received from the client. So on the next operation we'll check that the client gives us the next higher number. We increment sequence id's at the last moment, in encode, so that we're sure of knowing the right error return. (The decision to increment the sequence id depends on the exact error returned.) However on the *first* use of a sequence number, if we set the sequence number to the one received from the client and then let the increment happen on encode, we'll be left with a sequence number one to high. For that reason, ENCODE_SEQID_OP_TAIL only increments the sequence id on *confirmed* stateowners. This creates a problem for open reclaims, which are confirmed on first use. Therefore the open reclaim code, as a special exception, *decrements* the sequence id, cancelling out the undesired increment on encode. But this prevents the sequence id from ever being incremented in the case where multiple reclaims are sent with the same openowner. Yuch! We could add another exception to the open reclaim code, decrementing the sequence id only if this is the first use of the open owner. But it's simpler by far to modify the meaning of the op_seqid field: instead of representing the previous value sent by the client, we take op_seqid, after encoding, to represent the *next* sequence id that we expect from the client. This eliminates the need for special-case handling of the first use of a stateowner. Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu> Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-08 08:59:19 +08:00
if (seqid == sop->so_seqid - 1) {
dprintk("NFSD: preprocess_seqid_op: retransmission?\n");
/* indicate replay to calling function */
return nfserr_replay_me;
}
dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
sop->so_seqid, seqid);
*sopp = NULL;
return nfserr_bad_seqid;
}
__be32
nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_open_confirm *oc)
{
__be32 status;
struct nfs4_stateowner *sop;
struct nfs4_stateid *stp;
dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
(int)cstate->current_fh.fh_dentry->d_name.len,
cstate->current_fh.fh_dentry->d_name.name);
status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
if (status)
return status;
nfs4_lock_state();
if ((status = nfs4_preprocess_seqid_op(cstate,
oc->oc_seqid, &oc->oc_req_stateid,
CONFIRM | OPEN_STATE,
&oc->oc_stateowner, &stp, NULL)))
goto out;
sop = oc->oc_stateowner;
sop->so_confirmed = 1;
update_stateid(&stp->st_stateid);
memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d "
"stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
stp->st_stateid.si_boot,
stp->st_stateid.si_stateownerid,
stp->st_stateid.si_fileid,
stp->st_stateid.si_generation);
nfsd4_create_clid_dir(sop->so_client);
out:
if (oc->oc_stateowner) {
nfs4_get_stateowner(oc->oc_stateowner);
cstate->replay_owner = oc->oc_stateowner;
}
nfs4_unlock_state();
return status;
}
/*
* unset all bits in union bitmap (bmap) that
* do not exist in share (from successful OPEN_DOWNGRADE)
*/
static void
reset_union_bmap_access(unsigned long access, unsigned long *bmap)
{
int i;
for (i = 1; i < 4; i++) {
if ((i & access) != i)
__clear_bit(i, bmap);
}
}
static void
reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
{
int i;
for (i = 0; i < 4; i++) {
if ((i & deny) != i)
__clear_bit(i, bmap);
}
}
__be32
nfsd4_open_downgrade(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
struct nfsd4_open_downgrade *od)
{
__be32 status;
struct nfs4_stateid *stp;
unsigned int share_access;
dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",
(int)cstate->current_fh.fh_dentry->d_name.len,
cstate->current_fh.fh_dentry->d_name.name);
if (!access_valid(od->od_share_access, cstate->minorversion)
|| !deny_valid(od->od_share_deny))
return nfserr_inval;
nfs4_lock_state();
if ((status = nfs4_preprocess_seqid_op(cstate,
od->od_seqid,
&od->od_stateid,
OPEN_STATE,
&od->od_stateowner, &stp, NULL)))
goto out;
status = nfserr_inval;
if (!test_bit(od->od_share_access, &stp->st_access_bmap)) {
dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n",
stp->st_access_bmap, od->od_share_access);
goto out;
}
if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) {
dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
stp->st_deny_bmap, od->od_share_deny);
goto out;
}
set_access(&share_access, stp->st_access_bmap);
nfs4_file_downgrade(stp->st_vfs_file,
share_access & ~od->od_share_access);
reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
update_stateid(&stp->st_stateid);
memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
status = nfs_ok;
out:
if (od->od_stateowner) {
nfs4_get_stateowner(od->od_stateowner);
cstate->replay_owner = od->od_stateowner;
}
nfs4_unlock_state();
return status;
}
/*
* nfs4_unlock_state() called after encode
*/
__be32
nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_close *close)
{
__be32 status;
struct nfs4_stateid *stp;
dprintk("NFSD: nfsd4_close on file %.*s\n",
(int)cstate->current_fh.fh_dentry->d_name.len,
cstate->current_fh.fh_dentry->d_name.name);
nfs4_lock_state();
/* check close_lru for replay */
if ((status = nfs4_preprocess_seqid_op(cstate,
close->cl_seqid,
&close->cl_stateid,
OPEN_STATE | CLOSE_STATE,
&close->cl_stateowner, &stp, NULL)))
goto out;
status = nfs_ok;
update_stateid(&stp->st_stateid);
memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t));
/* release_stateid() calls nfsd_close() if needed */
release_open_stateid(stp);
/* place unused nfs4_stateowners on so_close_lru list to be
* released by the laundromat service after the lease period
* to enable us to handle CLOSE replay
*/
if (list_empty(&close->cl_stateowner->so_stateids))
move_to_close_lru(close->cl_stateowner);
out:
if (close->cl_stateowner) {
nfs4_get_stateowner(close->cl_stateowner);
cstate->replay_owner = close->cl_stateowner;
}
nfs4_unlock_state();
return status;
}
__be32
nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_delegreturn *dr)
{
struct nfs4_delegation *dp;
stateid_t *stateid = &dr->dr_stateid;
struct inode *inode;
__be32 status;
int flags = 0;
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
return status;
inode = cstate->current_fh.fh_dentry->d_inode;
if (nfsd4_has_session(cstate))
flags |= HAS_SESSION;
nfs4_lock_state();
status = nfserr_bad_stateid;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
goto out;
status = nfserr_stale_stateid;
if (STALE_STATEID(stateid))
goto out;
status = nfserr_bad_stateid;
if (!is_delegation_stateid(stateid))
goto out;
dp = find_delegation_stateid(inode, stateid);
if (!dp)
goto out;
status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
if (status)
goto out;
renew_client(dp->dl_client);
unhash_delegation(dp);
out:
nfs4_unlock_state();
return status;
}
/*
* Lock owner state (byte-range locks)
*/
#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
#define LOCK_HASH_BITS 8
#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS)
#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1)
static inline u64
end_offset(u64 start, u64 len)
{
u64 end;
end = start + len;
return end >= start ? end: NFS4_MAX_UINT64;
}
/* last octet in a range */
static inline u64
last_byte_offset(u64 start, u64 len)
{
u64 end;
BUG_ON(!len);
end = start + len;
return end > start ? end - 1: NFS4_MAX_UINT64;
}
#define lockownerid_hashval(id) \
((id) & LOCK_HASH_MASK)
static inline unsigned int
lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
struct xdr_netobj *ownername)
{
return (file_hashval(inode) + cl_id
+ opaque_hashval(ownername->data, ownername->len))
& LOCK_HASH_MASK;
}
static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
static struct nfs4_stateid *
find_stateid(stateid_t *stid, int flags)
{
struct nfs4_stateid *local;
u32 st_id = stid->si_stateownerid;
u32 f_id = stid->si_fileid;
unsigned int hashval;
dprintk("NFSD: find_stateid flags 0x%x\n",flags);
if (flags & (LOCK_STATE | RD_STATE | WR_STATE)) {
hashval = stateid_hashval(st_id, f_id);
list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
if ((local->st_stateid.si_stateownerid == st_id) &&
(local->st_stateid.si_fileid == f_id))
return local;
}
}
if (flags & (OPEN_STATE | RD_STATE | WR_STATE)) {
hashval = stateid_hashval(st_id, f_id);
list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
if ((local->st_stateid.si_stateownerid == st_id) &&
(local->st_stateid.si_fileid == f_id))
return local;
}
}
return NULL;
}
static struct nfs4_delegation *
find_delegation_stateid(struct inode *ino, stateid_t *stid)
{
struct nfs4_file *fp;
struct nfs4_delegation *dl;
dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
stid->si_boot, stid->si_stateownerid,
stid->si_fileid, stid->si_generation);
fp = find_file(ino);
if (!fp)
return NULL;
dl = find_delegation_file(fp, stid);
put_nfs4_file(fp);
return dl;
}
/*
* TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
* we can't properly handle lock requests that go beyond the (2^63 - 1)-th
* byte, because of sign extension problems. Since NFSv4 calls for 64-bit
* locking, this prevents us from being completely protocol-compliant. The
* real solution to this problem is to start using unsigned file offsets in
* the VFS, but this is a very deep change!
*/
static inline void
nfs4_transform_lock_offset(struct file_lock *lock)
{
if (lock->fl_start < 0)
lock->fl_start = OFFSET_MAX;
if (lock->fl_end < 0)
lock->fl_end = OFFSET_MAX;
}
/* Hack!: For now, we're defining this just so we can use a pointer to it
* as a unique cookie to identify our (NFSv4's) posix locks. */
static struct lock_manager_operations nfsd_posix_mng_ops = {
};
static inline void
nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
{
struct nfs4_stateowner *sop;
unsigned int hval;
if (fl->fl_lmops == &nfsd_posix_mng_ops) {
sop = (struct nfs4_stateowner *) fl->fl_owner;
hval = lockownerid_hashval(sop->so_id);
kref_get(&sop->so_ref);
deny->ld_sop = sop;
deny->ld_clientid = sop->so_client->cl_clientid;
} else {
deny->ld_sop = NULL;
deny->ld_clientid.cl_boot = 0;
deny->ld_clientid.cl_id = 0;
}
deny->ld_start = fl->fl_start;
deny->ld_length = NFS4_MAX_UINT64;
if (fl->fl_end != NFS4_MAX_UINT64)
deny->ld_length = fl->fl_end - fl->fl_start + 1;
deny->ld_type = NFS4_READ_LT;
if (fl->fl_type != F_RDLCK)
deny->ld_type = NFS4_WRITE_LT;
}
static struct nfs4_stateowner *
find_lockstateowner_str(struct inode *inode, clientid_t *clid,
struct xdr_netobj *owner)
{
unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
struct nfs4_stateowner *op;
list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
if (same_owner_str(op, owner, clid))
return op;
}
return NULL;
}
/*
* Alloc a lock owner structure.
* Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
* occured.
*
* strhashval = lock_ownerstr_hashval
*/
static struct nfs4_stateowner *
alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) {
struct nfs4_stateowner *sop;
struct nfs4_replay *rp;
unsigned int idhashval;
if (!(sop = alloc_stateowner(&lock->lk_new_owner)))
return NULL;
idhashval = lockownerid_hashval(current_ownerid);
INIT_LIST_HEAD(&sop->so_idhash);
INIT_LIST_HEAD(&sop->so_strhash);
INIT_LIST_HEAD(&sop->so_perclient);
INIT_LIST_HEAD(&sop->so_stateids);
INIT_LIST_HEAD(&sop->so_perstateid);
INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
sop->so_time = 0;
list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
list_add(&sop->so_perstateid, &open_stp->st_lockowners);
sop->so_is_open_owner = 0;
sop->so_id = current_ownerid++;
sop->so_client = clp;
/* It is the openowner seqid that will be incremented in encode in the
* case of new lockowners; so increment the lock seqid manually: */
sop->so_seqid = lock->lk_new_lock_seqid + 1;
sop->so_confirmed = 1;
rp = &sop->so_replay;
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
return sop;
}
static struct nfs4_stateid *
alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
{
struct nfs4_stateid *stp;
unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
stp = nfs4_alloc_stateid();
if (stp == NULL)
goto out;
INIT_LIST_HEAD(&stp->st_hash);
INIT_LIST_HEAD(&stp->st_perfile);
INIT_LIST_HEAD(&stp->st_perstateowner);
INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
list_add(&stp->st_perfile, &fp->fi_stateids);
list_add(&stp->st_perstateowner, &sop->so_stateids);
stp->st_stateowner = sop;
get_nfs4_file(fp);
stp->st_file = fp;
stp->st_stateid.si_boot = boot_time;
stp->st_stateid.si_stateownerid = sop->so_id;
stp->st_stateid.si_fileid = fp->fi_id;
stp->st_stateid.si_generation = 0;
stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
stp->st_access_bmap = open_stp->st_access_bmap;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
out:
return stp;
}
static int
check_lock_length(u64 offset, u64 length)
{
return ((length == 0) || ((length != NFS4_MAX_UINT64) &&
LOFF_OVERFLOW(offset, length)));
}
/*
* LOCK operation
*/
__be32
nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_lock *lock)
{
struct nfs4_stateowner *open_sop = NULL;
struct nfs4_stateowner *lock_sop = NULL;
struct nfs4_stateid *lock_stp;
struct file *filp;
struct file_lock file_lock;
struct file_lock conflock;
__be32 status = 0;
unsigned int strhashval;
unsigned int cmd;
int err;
dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
(long long) lock->lk_offset,
(long long) lock->lk_length);
if (check_lock_length(lock->lk_offset, lock->lk_length))
return nfserr_inval;
if ((status = fh_verify(rqstp, &cstate->current_fh,
S_IFREG, NFSD_MAY_LOCK))) {
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
nfs4_lock_state();
if (lock->lk_is_new) {
/*
* Client indicates that this is a new lockowner.
* Use open owner and open stateid to create lock owner and
* lock stateid.
*/
struct nfs4_stateid *open_stp = NULL;
struct nfs4_file *fp;
status = nfserr_stale_clientid;
if (!nfsd4_has_session(cstate) &&
STALE_CLIENTID(&lock->lk_new_clientid))
goto out;
/* validate and update open stateid and open seqid */
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_new_open_seqid,
&lock->lk_new_open_stateid,
OPEN_STATE,
&lock->lk_replay_owner, &open_stp,
lock);
if (status)
goto out;
open_sop = lock->lk_replay_owner;
/* create lockowner and lock stateid */
fp = open_stp->st_file;
strhashval = lock_ownerstr_hashval(fp->fi_inode,
open_sop->so_client->cl_clientid.cl_id,
&lock->v.new.owner);
/* XXX: Do we need to check for duplicate stateowners on
* the same file, or should they just be allowed (and
* create new stateids)? */
status = nfserr_resource;
lock_sop = alloc_init_lock_stateowner(strhashval,
open_sop->so_client, open_stp, lock);
if (lock_sop == NULL)
goto out;
lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
if (lock_stp == NULL)
goto out;
} else {
/* lock (lock owner + lock stateid) already exists */
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
&lock->lk_old_lock_stateid,
LOCK_STATE,
&lock->lk_replay_owner, &lock_stp, lock);
if (status)
goto out;
lock_sop = lock->lk_replay_owner;
}
/* lock->lk_replay_owner and lock_stp have been created or found */
filp = lock_stp->st_vfs_file;
status = nfserr_grace;
if (locks_in_grace() && !lock->lk_reclaim)
goto out;
status = nfserr_no_grace;
if (!locks_in_grace() && lock->lk_reclaim)
goto out;
locks_init_lock(&file_lock);
switch (lock->lk_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
file_lock.fl_type = F_RDLCK;
cmd = F_SETLK;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
file_lock.fl_type = F_WRLCK;
cmd = F_SETLK;
break;
default:
status = nfserr_inval;
goto out;
}
file_lock.fl_owner = (fl_owner_t)lock_sop;
file_lock.fl_pid = current->tgid;
file_lock.fl_file = filp;
file_lock.fl_flags = FL_POSIX;
file_lock.fl_lmops = &nfsd_posix_mng_ops;
file_lock.fl_start = lock->lk_offset;
file_lock.fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
nfs4_transform_lock_offset(&file_lock);
/*
* Try to lock the file in the VFS.
* Note: locks.c uses the BKL to protect the inode's lock list.
*/
err = vfs_lock_file(filp, cmd, &file_lock, &conflock);
switch (-err) {
case 0: /* success! */
update_stateid(&lock_stp->st_stateid);
memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid,
sizeof(stateid_t));
status = 0;
break;
case (EAGAIN): /* conflock holds conflicting lock */
status = nfserr_denied;
dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
nfs4_set_lock_denied(&conflock, &lock->lk_denied);
break;
case (EDEADLK):
status = nfserr_deadlock;
break;
default:
dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
status = nfserr_resource;
break;
}
out:
if (status && lock->lk_is_new && lock_sop)
release_lockowner(lock_sop);
if (lock->lk_replay_owner) {
nfs4_get_stateowner(lock->lk_replay_owner);
cstate->replay_owner = lock->lk_replay_owner;
}
nfs4_unlock_state();
return status;
}
/*
* The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
* so we do a temporary open here just to get an open file to pass to
* vfs_test_lock. (Arguably perhaps test_lock should be done with an
* inode operation.)
*/
static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
struct file *file;
int err;
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
return err;
err = vfs_test_lock(file, lock);
nfsd_close(file);
return err;
}
/*
* LOCKT operation
*/
__be32
nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_lockt *lockt)
{
struct inode *inode;
struct file_lock file_lock;
int error;
__be32 status;
if (locks_in_grace())
return nfserr_grace;
if (check_lock_length(lockt->lt_offset, lockt->lt_length))
return nfserr_inval;
lockt->lt_stateowner = NULL;
nfs4_lock_state();
status = nfserr_stale_clientid;
if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid))
goto out;
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) {
dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
if (status == nfserr_symlink)
status = nfserr_inval;
goto out;
}
inode = cstate->current_fh.fh_dentry->d_inode;
locks_init_lock(&file_lock);
switch (lockt->lt_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
file_lock.fl_type = F_RDLCK;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
file_lock.fl_type = F_WRLCK;
break;
default:
dprintk("NFSD: nfs4_lockt: bad lock type!\n");
status = nfserr_inval;
goto out;
}
lockt->lt_stateowner = find_lockstateowner_str(inode,
&lockt->lt_clientid, &lockt->lt_owner);
if (lockt->lt_stateowner)
file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
file_lock.fl_pid = current->tgid;
file_lock.fl_flags = FL_POSIX;
file_lock.fl_start = lockt->lt_offset;
file_lock.fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
nfs4_transform_lock_offset(&file_lock);
status = nfs_ok;
error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
if (error) {
status = nfserrno(error);
goto out;
}
if (file_lock.fl_type != F_UNLCK) {
status = nfserr_denied;
nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
}
out:
nfs4_unlock_state();
return status;
}
__be32
nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_locku *locku)
{
struct nfs4_stateid *stp;
struct file *filp = NULL;
struct file_lock file_lock;
__be32 status;
int err;
dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
(long long) locku->lu_offset,
(long long) locku->lu_length);
if (check_lock_length(locku->lu_offset, locku->lu_length))
return nfserr_inval;
nfs4_lock_state();
if ((status = nfs4_preprocess_seqid_op(cstate,
locku->lu_seqid,
&locku->lu_stateid,
LOCK_STATE,
&locku->lu_stateowner, &stp, NULL)))
goto out;
filp = stp->st_vfs_file;
BUG_ON(!filp);
locks_init_lock(&file_lock);
file_lock.fl_type = F_UNLCK;
file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner;
file_lock.fl_pid = current->tgid;
file_lock.fl_file = filp;
file_lock.fl_flags = FL_POSIX;
file_lock.fl_lmops = &nfsd_posix_mng_ops;
file_lock.fl_start = locku->lu_offset;
file_lock.fl_end = last_byte_offset(locku->lu_offset, locku->lu_length);
nfs4_transform_lock_offset(&file_lock);
/*
* Try to unlock the file in the VFS.
*/
err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL);
if (err) {
dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
goto out_nfserr;
}
/*
* OK, unlock succeeded; the only thing left to do is update the stateid.
*/
update_stateid(&stp->st_stateid);
memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
out:
if (locku->lu_stateowner) {
nfs4_get_stateowner(locku->lu_stateowner);
cstate->replay_owner = locku->lu_stateowner;
}
nfs4_unlock_state();
return status;
out_nfserr:
status = nfserrno(err);
goto out;
}
/*
* returns
* 1: locks held by lockowner
* 0: no locks held by lockowner
*/
static int
check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
{
struct file_lock **flpp;
struct inode *inode = filp->f_path.dentry->d_inode;
int status = 0;
lock_kernel();
for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
status = 1;
goto out;
}
}
out:
unlock_kernel();
return status;
}
__be32
nfsd4_release_lockowner(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
struct nfsd4_release_lockowner *rlockowner)
{
clientid_t *clid = &rlockowner->rl_clientid;
struct nfs4_stateowner *sop;
struct nfs4_stateid *stp;
struct xdr_netobj *owner = &rlockowner->rl_owner;
struct list_head matches;
int i;
__be32 status;
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
/* XXX check for lease expiration */
status = nfserr_stale_clientid;
if (STALE_CLIENTID(clid))
return status;
nfs4_lock_state();
status = nfserr_locks_held;
/* XXX: we're doing a linear search through all the lockowners.
* Yipes! For now we'll just hope clients aren't really using
* release_lockowner much, but eventually we have to fix these
* data structures. */
INIT_LIST_HEAD(&matches);
for (i = 0; i < LOCK_HASH_SIZE; i++) {
list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
if (!same_owner_str(sop, owner, clid))
continue;
list_for_each_entry(stp, &sop->so_stateids,
st_perstateowner) {
if (check_for_locks(stp->st_vfs_file, sop))
goto out;
/* Note: so_perclient unused for lockowners,
* so it's OK to fool with here. */
list_add(&sop->so_perclient, &matches);
}
}
}
/* Clients probably won't expect us to return with some (but not all)
* of the lockowner state released; so don't release any until all
* have been checked. */
status = nfs_ok;
while (!list_empty(&matches)) {
sop = list_entry(matches.next, struct nfs4_stateowner,
so_perclient);
/* unhash_stateowner deletes so_perclient only
* for openowners. */
list_del(&sop->so_perclient);
release_lockowner(sop);
}
out:
nfs4_unlock_state();
return status;
}
static inline struct nfs4_client_reclaim *
alloc_reclaim(void)
{
return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
}
int
nfs4_has_reclaimed_state(const char *name, bool use_exchange_id)
{
unsigned int strhashval = clientstr_hashval(name);
struct nfs4_client *clp;
clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id);
return clp ? 1 : 0;
}
/*
* failure => all reset bets are off, nfserr_no_grace...
*/
int
nfs4_client_to_reclaim(const char *name)
{
unsigned int strhashval;
struct nfs4_client_reclaim *crp = NULL;
dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
crp = alloc_reclaim();
if (!crp)
return 0;
strhashval = clientstr_hashval(name);
INIT_LIST_HEAD(&crp->cr_strhash);
list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
memcpy(crp->cr_recdir, name, HEXDIR_LEN);
reclaim_str_hashtbl_size++;
return 1;
}
static void
nfs4_release_reclaim(void)
{
struct nfs4_client_reclaim *crp = NULL;
int i;
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&reclaim_str_hashtbl[i])) {
crp = list_entry(reclaim_str_hashtbl[i].next,
struct nfs4_client_reclaim, cr_strhash);
list_del(&crp->cr_strhash);
kfree(crp);
reclaim_str_hashtbl_size--;
}
}
BUG_ON(reclaim_str_hashtbl_size);
}
/*
* called from OPEN, CLAIM_PREVIOUS with a new clientid. */
static struct nfs4_client_reclaim *
nfs4_find_reclaim_client(clientid_t *clid)
{
unsigned int strhashval;
struct nfs4_client *clp;
struct nfs4_client_reclaim *crp = NULL;
/* find clientid in conf_id_hashtbl */
clp = find_confirmed_client(clid);
if (clp == NULL)
return NULL;
dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
clp->cl_name.len, clp->cl_name.data,
clp->cl_recdir);
/* find clp->cl_name in reclaim_str_hashtbl */
strhashval = clientstr_hashval(clp->cl_recdir);
list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
if (same_name(crp->cr_recdir, clp->cl_recdir)) {
return crp;
}
}
return NULL;
}
/*
* Called from OPEN. Look for clientid in reclaim list.
*/
__be32
nfs4_check_open_reclaim(clientid_t *clid)
{
return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
}
/* initialization to perform at module load time: */
int
nfs4_state_init(void)
{
int i, status;
status = nfsd4_init_slabs();
if (status)
return status;
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
INIT_LIST_HEAD(&conf_id_hashtbl[i]);
INIT_LIST_HEAD(&conf_str_hashtbl[i]);
INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
}
for (i = 0; i < SESSION_HASH_SIZE; i++)
INIT_LIST_HEAD(&sessionid_hashtbl[i]);
for (i = 0; i < FILE_HASH_SIZE; i++) {
INIT_LIST_HEAD(&file_hashtbl[i]);
}
for (i = 0; i < OWNER_HASH_SIZE; i++) {
INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
INIT_LIST_HEAD(&ownerid_hashtbl[i]);
}
for (i = 0; i < STATEID_HASH_SIZE; i++) {
INIT_LIST_HEAD(&stateid_hashtbl[i]);
INIT_LIST_HEAD(&lockstateid_hashtbl[i]);
}
for (i = 0; i < LOCK_HASH_SIZE; i++) {
INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
}
memset(&onestateid, ~0, sizeof(stateid_t));
INIT_LIST_HEAD(&close_lru);
INIT_LIST_HEAD(&client_lru);
INIT_LIST_HEAD(&del_recall_lru);
for (i = 0; i < CLIENT_HASH_SIZE; i++)
INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
reclaim_str_hashtbl_size = 0;
return 0;
}
static void
nfsd4_load_reboot_recovery_data(void)
{
int status;
nfs4_lock_state();
nfsd4_init_recdir(user_recovery_dirname);
status = nfsd4_recdir_load();
nfs4_unlock_state();
if (status)
printk("NFSD: Failure reading reboot recovery data\n");
}
unsigned long
get_nfs4_grace_period(void)
{
return max(user_lease_time, lease_time) * HZ;
}
/*
* Since the lifetime of a delegation isn't limited to that of an open, a
* client may quite reasonably hang on to a delegation as long as it has
* the inode cached. This becomes an obvious problem the first time a
* client's inode cache approaches the size of the server's total memory.
*
* For now we avoid this problem by imposing a hard limit on the number
* of delegations, which varies according to the server's memory size.
*/
static void
set_max_delegations(void)
{
/*
* Allow at most 4 delegations per megabyte of RAM. Quick
* estimates suggest that in the worst case (where every delegation
* is for a different inode), a delegation could take about 1.5K,
* giving a worst case usage of about 6% of memory.
*/
max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
}
/* initialization to perform when the nfsd service is started: */
static void
__nfs4_state_start(void)
{
unsigned long grace_time;
boot_time = get_seconds();
grace_time = get_nfs4_grace_period();
lease_time = user_lease_time;
locks_start_grace(&nfsd4_manager);
printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
grace_time/HZ);
laundry_wq = create_singlethread_workqueue("nfsd4");
queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
set_max_delegations();
}
void
nfs4_state_start(void)
{
if (nfs4_init)
return;
nfsd4_load_reboot_recovery_data();
__nfs4_state_start();
nfs4_init = 1;
return;
}
time_t
nfs4_lease_time(void)
{
return lease_time;
}
static void
__nfs4_state_shutdown(void)
{
int i;
struct nfs4_client *clp = NULL;
struct nfs4_delegation *dp = NULL;
struct list_head *pos, *next, reaplist;
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&conf_id_hashtbl[i])) {
clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
expire_client(clp);
}
while (!list_empty(&unconf_str_hashtbl[i])) {
clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash);
expire_client(clp);
}
}
INIT_LIST_HEAD(&reaplist);
spin_lock(&recall_lock);
list_for_each_safe(pos, next, &del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_move(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&recall_lock);
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
unhash_delegation(dp);
}
nfsd4_shutdown_recdir();
nfs4_init = 0;
}
void
nfs4_state_shutdown(void)
{
cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
destroy_workqueue(laundry_wq);
locks_end_grace(&nfsd4_manager);
nfs4_lock_state();
nfs4_release_reclaim();
__nfs4_state_shutdown();
nfs4_unlock_state();
}
/*
* user_recovery_dirname is protected by the nfsd_mutex since it's only
* accessed when nfsd is starting.
*/
static void
nfs4_set_recdir(char *recdir)
{
strcpy(user_recovery_dirname, recdir);
}
/*
* Change the NFSv4 recovery directory to recdir.
*/
int
nfs4_reset_recoverydir(char *recdir)
{
int status;
struct path path;
status = kern_path(recdir, LOOKUP_FOLLOW, &path);
if (status)
return status;
status = -ENOTDIR;
if (S_ISDIR(path.dentry->d_inode->i_mode)) {
nfs4_set_recdir(recdir);
status = 0;
}
path_put(&path);
return status;
}
char *
nfs4_recoverydir(void)
{
return user_recovery_dirname;
}
/*
* Called when leasetime is changed.
*
* The only way the protocol gives us to handle on-the-fly lease changes is to
* simulate a reboot. Instead of doing that, we just wait till the next time
* we start to register any changes in lease time. If the administrator
* really wants to change the lease time *now*, they can go ahead and bring
* nfsd down and then back up again after changing the lease time.
*
* user_lease_time is protected by nfsd_mutex since it's only really accessed
* when nfsd is starting
*/
void
nfs4_reset_lease(time_t leasetime)
{
user_lease_time = leasetime;
}