OpenCloudOS-Kernel/net/sunrpc/clnt.c

2479 lines
59 KiB
C
Raw Normal View History

/*
* linux/net/sunrpc/clnt.c
*
* This file contains the high-level RPC interface.
* It is modeled as a finite state machine to support both synchronous
* and asynchronous requests.
*
* - RPC header generation and argument serialization.
* - Credential refresh.
* - TCP connect handling.
* - Retry of operation when it is suspected the operation failed because
* of uid squashing on the server, or when the credentials were stale
* and need to be refreshed, or when a packet was damaged in transit.
* This may be have to be moved to the VFS layer.
*
* Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
* Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
*/
#include <linux/module.h>
#include <linux/types.h>
SUNRPC: Display some debugging information as text rather than numbers In rpc_show_tasks(), display the program name, version number, procedure name and tk_action as human-readable variable-length text fields rather than columnar numbers. Doing the symbol lookup here helps in cases where we have actual debugging output from a kernel log, but don't have access to the kernel image or RPC module that generated the output. Sample output: -pid- flgs status -client- --rqstp- -timeout ---ops-- 5608 0001 -11 eeb42690 f6d93710 0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none 5609 0001 -11 eeb42690 f6d937e0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5610 0001 -11 eeb42690 f6d93230 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5611 0001 -11 eeb42690 f6d93300 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5612 0001 -11 eeb42690 f6d93090 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5613 0001 -11 eeb42690 f6d933d0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5614 0001 -11 eeb42690 f6d93cc0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5615 0001 -11 eeb42690 f6d93a50 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5616 0001 -11 eeb42690 f6d93640 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5617 0001 -11 eeb42690 f6d93b20 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5618 0001 -11 eeb42690 f6d93160 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2008-05-22 05:09:41 +08:00
#include <linux/kallsyms.h>
#include <linux/mm.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <linux/utsname.h>
#include <linux/workqueue.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/un.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/metrics.h>
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
#include <linux/sunrpc/bc_xprt.h>
#include <trace/events/sunrpc.h>
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
#include "sunrpc.h"
#include "netns.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_CALL
#endif
#define dprint_status(t) \
dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \
__func__, t->tk_status)
/*
* All RPC clients are linked into this list
*/
static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
static void call_start(struct rpc_task *task);
static void call_reserve(struct rpc_task *task);
static void call_reserveresult(struct rpc_task *task);
static void call_allocate(struct rpc_task *task);
static void call_decode(struct rpc_task *task);
static void call_bind(struct rpc_task *task);
static void call_bind_status(struct rpc_task *task);
static void call_transmit(struct rpc_task *task);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
static void call_bc_transmit(struct rpc_task *task);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
static void call_status(struct rpc_task *task);
static void call_transmit_status(struct rpc_task *task);
static void call_refresh(struct rpc_task *task);
static void call_refreshresult(struct rpc_task *task);
static void call_timeout(struct rpc_task *task);
static void call_connect(struct rpc_task *task);
static void call_connect_status(struct rpc_task *task);
static __be32 *rpc_encode_header(struct rpc_task *task);
static __be32 *rpc_verify_header(struct rpc_task *task);
static int rpc_ping(struct rpc_clnt *clnt);
static void rpc_register_client(struct rpc_clnt *clnt)
{
struct net *net = rpc_net_ns(clnt);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
spin_lock(&sn->rpc_client_lock);
list_add(&clnt->cl_clients, &sn->all_clients);
spin_unlock(&sn->rpc_client_lock);
}
static void rpc_unregister_client(struct rpc_clnt *clnt)
{
struct net *net = rpc_net_ns(clnt);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
spin_lock(&sn->rpc_client_lock);
list_del(&clnt->cl_clients);
spin_unlock(&sn->rpc_client_lock);
}
static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
{
rpc_remove_client_dir(clnt);
}
static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
{
struct net *net = rpc_net_ns(clnt);
struct super_block *pipefs_sb;
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
__rpc_clnt_remove_pipedir(clnt);
rpc_put_sb_net(net);
}
}
static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
struct rpc_clnt *clnt)
{
static uint32_t clntid;
const char *dir_name = clnt->cl_program->pipe_dir_name;
char name[15];
struct dentry *dir, *dentry;
dir = rpc_d_lookup_sb(sb, dir_name);
if (dir == NULL) {
pr_info("RPC: pipefs directory doesn't exist: %s\n", dir_name);
return dir;
}
for (;;) {
snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
name[sizeof(name) - 1] = '\0';
dentry = rpc_create_client_dir(dir, name, clnt);
if (!IS_ERR(dentry))
break;
if (dentry == ERR_PTR(-EEXIST))
continue;
printk(KERN_INFO "RPC: Couldn't create pipefs entry"
" %s/%s, error %ld\n",
dir_name, name, PTR_ERR(dentry));
break;
}
dput(dir);
return dentry;
}
static int
rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
struct dentry *dentry;
if (clnt->cl_program->pipe_dir_name != NULL) {
dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
return 0;
}
static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
{
if (clnt->cl_program->pipe_dir_name == NULL)
return 1;
switch (event) {
case RPC_PIPEFS_MOUNT:
if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
return 1;
if (atomic_read(&clnt->cl_count) == 0)
return 1;
break;
case RPC_PIPEFS_UMOUNT:
if (clnt->cl_pipedir_objects.pdh_dentry == NULL)
return 1;
break;
}
return 0;
}
static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
struct dentry *dentry;
int err = 0;
switch (event) {
case RPC_PIPEFS_MOUNT:
dentry = rpc_setup_pipedir_sb(sb, clnt);
if (!dentry)
return -ENOENT;
if (IS_ERR(dentry))
return PTR_ERR(dentry);
break;
case RPC_PIPEFS_UMOUNT:
__rpc_clnt_remove_pipedir(clnt);
break;
default:
printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event);
return -ENOTSUPP;
}
return err;
}
static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
int error = 0;
for (;; clnt = clnt->cl_parent) {
if (!rpc_clnt_skip_event(clnt, event))
error = __rpc_clnt_handle_event(clnt, event, sb);
if (error || clnt == clnt->cl_parent)
break;
}
return error;
}
static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_clnt *clnt;
spin_lock(&sn->rpc_client_lock);
list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
if (rpc_clnt_skip_event(clnt, event))
continue;
spin_unlock(&sn->rpc_client_lock);
return clnt;
}
spin_unlock(&sn->rpc_client_lock);
return NULL;
}
static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct super_block *sb = ptr;
struct rpc_clnt *clnt;
int error = 0;
while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) {
error = __rpc_pipefs_event(clnt, event, sb);
if (error)
break;
}
return error;
}
static struct notifier_block rpc_clients_block = {
.notifier_call = rpc_pipefs_event,
.priority = SUNRPC_PIPEFS_RPC_PRIO,
};
int rpc_clients_notifier_register(void)
{
return rpc_pipefs_notifier_register(&rpc_clients_block);
}
void rpc_clients_notifier_unregister(void)
{
return rpc_pipefs_notifier_unregister(&rpc_clients_block);
}
static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
const struct rpc_timeout *timeout)
{
struct rpc_xprt *old;
spin_lock(&clnt->cl_lock);
old = rcu_dereference_protected(clnt->cl_xprt,
lockdep_is_held(&clnt->cl_lock));
if (!xprt_bound(xprt))
clnt->cl_autobind = 1;
clnt->cl_timeout = timeout;
rcu_assign_pointer(clnt->cl_xprt, xprt);
spin_unlock(&clnt->cl_lock);
return old;
}
static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
{
clnt->cl_nodelen = strlcpy(clnt->cl_nodename,
nodename, sizeof(clnt->cl_nodename));
}
static int rpc_client_register(struct rpc_clnt *clnt,
rpc_authflavor_t pseudoflavor,
const char *client_name)
{
struct rpc_auth_create_args auth_args = {
.pseudoflavor = pseudoflavor,
.target_name = client_name,
};
struct rpc_auth *auth;
struct net *net = rpc_net_ns(clnt);
struct super_block *pipefs_sb;
int err;
rpc_clnt_debugfs_register(clnt);
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
err = rpc_setup_pipedir(pipefs_sb, clnt);
if (err)
goto out;
}
rpc_register_client(clnt);
if (pipefs_sb)
rpc_put_sb_net(net);
auth = rpcauth_create(&auth_args, clnt);
if (IS_ERR(auth)) {
dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
pseudoflavor);
err = PTR_ERR(auth);
goto err_auth;
}
return 0;
err_auth:
pipefs_sb = rpc_get_sb_net(net);
rpc_unregister_client(clnt);
__rpc_clnt_remove_pipedir(clnt);
out:
if (pipefs_sb)
rpc_put_sb_net(net);
rpc_clnt_debugfs_unregister(clnt);
return err;
}
static DEFINE_IDA(rpc_clids);
static int rpc_alloc_clid(struct rpc_clnt *clnt)
{
int clid;
clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL);
if (clid < 0)
return clid;
clnt->cl_clid = clid;
return 0;
}
static void rpc_free_clid(struct rpc_clnt *clnt)
{
ida_simple_remove(&rpc_clids, clnt->cl_clid);
}
static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
struct rpc_xprt *xprt,
struct rpc_clnt *parent)
{
const struct rpc_program *program = args->program;
const struct rpc_version *version;
struct rpc_clnt *clnt = NULL;
const struct rpc_timeout *timeout;
const char *nodename = args->nodename;
int err;
/* sanity check the name before trying to print it */
dprintk("RPC: creating %s client for %s (xprt %p)\n",
program->name, args->servername, xprt);
err = rpciod_up();
if (err)
goto out_no_rpciod;
err = -EINVAL;
if (args->version >= program->nrvers)
goto out_err;
version = program->version[args->version];
if (version == NULL)
goto out_err;
err = -ENOMEM;
clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
goto out_err;
clnt->cl_parent = parent ? : clnt;
err = rpc_alloc_clid(clnt);
if (err)
goto out_no_clid;
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
clnt->cl_prog = args->prognumber ? : program->number;
clnt->cl_vers = version->number;
clnt->cl_stats = program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
err = -ENOMEM;
if (clnt->cl_metrics == NULL)
goto out_no_stats;
clnt->cl_program = program;
INIT_LIST_HEAD(&clnt->cl_tasks);
spin_lock_init(&clnt->cl_lock);
timeout = xprt->timeout;
if (args->timeout != NULL) {
memcpy(&clnt->cl_timeout_default, args->timeout,
sizeof(clnt->cl_timeout_default));
timeout = &clnt->cl_timeout_default;
}
rpc_clnt_set_transport(clnt, xprt, timeout);
clnt->cl_rtt = &clnt->cl_rtt_default;
rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
atomic_set(&clnt->cl_count, 1);
if (nodename == NULL)
nodename = utsname()->nodename;
/* save the nodename */
rpc_clnt_set_nodename(clnt, nodename);
err = rpc_client_register(clnt, args->authflavor, args->client_name);
if (err)
goto out_no_path;
if (parent)
atomic_inc(&parent->cl_count);
return clnt;
out_no_path:
rpc_free_iostats(clnt->cl_metrics);
out_no_stats:
rpc_free_clid(clnt);
out_no_clid:
kfree(clnt);
out_err:
rpciod_down();
out_no_rpciod:
xprt_put(xprt);
return ERR_PTR(err);
}
struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
struct rpc_xprt *xprt)
{
struct rpc_clnt *clnt = NULL;
clnt = rpc_new_client(args, xprt, NULL);
if (IS_ERR(clnt))
return clnt;
if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
int err = rpc_ping(clnt);
if (err != 0) {
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
}
clnt->cl_softrtry = 1;
if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
clnt->cl_softrtry = 0;
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
if (args->flags & RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT)
clnt->cl_noretranstimeo = 1;
if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
clnt->cl_discrtry = 1;
if (!(args->flags & RPC_CLNT_CREATE_QUIET))
clnt->cl_chatty = 1;
return clnt;
}
EXPORT_SYMBOL_GPL(rpc_create_xprt);
/**
* rpc_create - create an RPC client and transport with one call
* @args: rpc_clnt create argument structure
*
* Creates and initializes an RPC transport and an RPC client.
*
* It can ping the server in order to determine if it is up, and to see if
* it supports this program and version. RPC_CLNT_CREATE_NOPING disables
* this behavior so asynchronous tasks can also use rpc_create.
*/
struct rpc_clnt *rpc_create(struct rpc_create_args *args)
{
struct rpc_xprt *xprt;
struct xprt_create xprtargs = {
.net = args->net,
.ident = args->protocol,
.srcaddr = args->saddress,
.dstaddr = args->address,
.addrlen = args->addrsize,
.servername = args->servername,
.bc_xprt = args->bc_xprt,
};
char servername[48];
if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
xprtargs.flags |= XPRT_CREATE_NO_IDLE_TIMEOUT;
/*
* If the caller chooses not to specify a hostname, whip
* up a string representation of the passed-in address.
*/
if (xprtargs.servername == NULL) {
struct sockaddr_un *sun =
(struct sockaddr_un *)args->address;
struct sockaddr_in *sin =
(struct sockaddr_in *)args->address;
struct sockaddr_in6 *sin6 =
(struct sockaddr_in6 *)args->address;
servername[0] = '\0';
switch (args->address->sa_family) {
case AF_LOCAL:
snprintf(servername, sizeof(servername), "%s",
sun->sun_path);
break;
case AF_INET:
snprintf(servername, sizeof(servername), "%pI4",
&sin->sin_addr.s_addr);
break;
case AF_INET6:
snprintf(servername, sizeof(servername), "%pI6",
&sin6->sin6_addr);
break;
default:
/* caller wants default server name, but
* address family isn't recognized. */
return ERR_PTR(-EINVAL);
}
xprtargs.servername = servername;
}
xprt = xprt_create_transport(&xprtargs);
if (IS_ERR(xprt))
return (struct rpc_clnt *)xprt;
/*
* By default, kernel RPC client connects from a reserved port.
* CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
* but it is always enabled for rpciod, which handles the connect
* operation.
*/
xprt->resvport = 1;
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
return rpc_create_xprt(args, xprt);
}
EXPORT_SYMBOL_GPL(rpc_create);
/*
* This function clones the RPC client structure. It allows us to share the
* same transport while varying parameters such as the authentication
* flavour.
*/
static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
struct rpc_clnt *clnt)
{
struct rpc_xprt *xprt;
struct rpc_clnt *new;
int err;
err = -ENOMEM;
rcu_read_lock();
xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
rcu_read_unlock();
if (xprt == NULL)
goto out_err;
args->servername = xprt->servername;
args->nodename = clnt->cl_nodename;
new = rpc_new_client(args, xprt, clnt);
if (IS_ERR(new)) {
err = PTR_ERR(new);
goto out_err;
}
/* Turn off autobind on clones */
new->cl_autobind = 0;
new->cl_softrtry = clnt->cl_softrtry;
new->cl_noretranstimeo = clnt->cl_noretranstimeo;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
return new;
out_err:
dprintk("RPC: %s: returned error %d\n", __func__, err);
return ERR_PTR(err);
}
/**
* rpc_clone_client - Clone an RPC client structure
*
* @clnt: RPC client whose parameters are copied
*
* Returns a fresh RPC client or an ERR_PTR.
*/
struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
{
struct rpc_create_args args = {
.program = clnt->cl_program,
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
};
return __rpc_clone_client(&args, clnt);
}
EXPORT_SYMBOL_GPL(rpc_clone_client);
/**
* rpc_clone_client_set_auth - Clone an RPC client structure and set its auth
*
* @clnt: RPC client whose parameters are copied
* @flavor: security flavor for new client
*
* Returns a fresh RPC client or an ERR_PTR.
*/
struct rpc_clnt *
rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
{
struct rpc_create_args args = {
.program = clnt->cl_program,
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = flavor,
};
return __rpc_clone_client(&args, clnt);
}
EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth);
/**
* rpc_switch_client_transport: switch the RPC transport on the fly
* @clnt: pointer to a struct rpc_clnt
* @args: pointer to the new transport arguments
* @timeout: pointer to the new timeout parameters
*
* This function allows the caller to switch the RPC transport for the
* rpc_clnt structure 'clnt' to allow it to connect to a mirrored NFS
* server, for instance. It assumes that the caller has ensured that
* there are no active RPC tasks by using some form of locking.
*
* Returns zero if "clnt" is now using the new xprt. Otherwise a
* negative errno is returned, and "clnt" continues to use the old
* xprt.
*/
int rpc_switch_client_transport(struct rpc_clnt *clnt,
struct xprt_create *args,
const struct rpc_timeout *timeout)
{
const struct rpc_timeout *old_timeo;
rpc_authflavor_t pseudoflavor;
struct rpc_xprt *xprt, *old;
struct rpc_clnt *parent;
int err;
xprt = xprt_create_transport(args);
if (IS_ERR(xprt)) {
dprintk("RPC: failed to create new xprt for clnt %p\n",
clnt);
return PTR_ERR(xprt);
}
pseudoflavor = clnt->cl_auth->au_flavor;
old_timeo = clnt->cl_timeout;
old = rpc_clnt_set_transport(clnt, xprt, timeout);
rpc_unregister_client(clnt);
__rpc_clnt_remove_pipedir(clnt);
rpc_clnt_debugfs_unregister(clnt);
/*
* A new transport was created. "clnt" therefore
* becomes the root of a new cl_parent tree. clnt's
* children, if it has any, still point to the old xprt.
*/
parent = clnt->cl_parent;
clnt->cl_parent = clnt;
/*
* The old rpc_auth cache cannot be re-used. GSS
* contexts in particular are between a single
* client and server.
*/
err = rpc_client_register(clnt, pseudoflavor, NULL);
if (err)
goto out_revert;
synchronize_rcu();
if (parent != clnt)
rpc_release_client(parent);
xprt_put(old);
dprintk("RPC: replaced xprt for clnt %p\n", clnt);
return 0;
out_revert:
rpc_clnt_set_transport(clnt, old, old_timeo);
clnt->cl_parent = parent;
rpc_client_register(clnt, pseudoflavor, NULL);
xprt_put(xprt);
dprintk("RPC: failed to switch xprt for clnt %p\n", clnt);
return err;
}
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
/*
* Kill all tasks for the given client.
* XXX: kill their descendants as well?
*/
void rpc_killall_tasks(struct rpc_clnt *clnt)
{
struct rpc_task *rovr;
if (list_empty(&clnt->cl_tasks))
return;
dprintk("RPC: killing all tasks for client %p\n", clnt);
/*
* Spin lock all_tasks to prevent changes...
*/
spin_lock(&clnt->cl_lock);
list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
if (!RPC_IS_ACTIVATED(rovr))
continue;
if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
rovr->tk_flags |= RPC_TASK_KILLED;
rpc_exit(rovr, -EIO);
if (RPC_IS_QUEUED(rovr))
rpc_wake_up_queued_task(rovr->tk_waitqueue,
rovr);
}
}
spin_unlock(&clnt->cl_lock);
}
EXPORT_SYMBOL_GPL(rpc_killall_tasks);
/*
* Properly shut down an RPC client, terminating all outstanding
* requests.
*/
void rpc_shutdown_client(struct rpc_clnt *clnt)
{
might_sleep();
dprintk_rcu("RPC: shutting down %s client for %s\n",
clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
while (!list_empty(&clnt->cl_tasks)) {
rpc_killall_tasks(clnt);
wait_event_timeout(destroy_wait,
list_empty(&clnt->cl_tasks), 1*HZ);
}
rpc_release_client(clnt);
}
EXPORT_SYMBOL_GPL(rpc_shutdown_client);
/*
* Free an RPC client
*/
static struct rpc_clnt *
rpc_free_client(struct rpc_clnt *clnt)
{
struct rpc_clnt *parent = NULL;
dprintk_rcu("RPC: destroying %s client for %s\n",
clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
if (clnt->cl_parent != clnt)
parent = clnt->cl_parent;
rpc_clnt_debugfs_unregister(clnt);
rpc_clnt_remove_pipedir(clnt);
rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
clnt->cl_metrics = NULL;
xprt_put(rcu_dereference_raw(clnt->cl_xprt));
rpciod_down();
rpc_free_clid(clnt);
kfree(clnt);
return parent;
}
/*
* Free an RPC client
*/
static struct rpc_clnt *
rpc_free_auth(struct rpc_clnt *clnt)
{
if (clnt->cl_auth == NULL)
return rpc_free_client(clnt);
/*
* Note: RPCSEC_GSS may need to send NULL RPC calls in order to
* release remaining GSS contexts. This mechanism ensures
* that it can do so safely.
*/
atomic_inc(&clnt->cl_count);
rpcauth_release(clnt->cl_auth);
clnt->cl_auth = NULL;
if (atomic_dec_and_test(&clnt->cl_count))
return rpc_free_client(clnt);
return NULL;
}
/*
* Release reference to the RPC client
*/
void
rpc_release_client(struct rpc_clnt *clnt)
{
dprintk("RPC: rpc_release_client(%p)\n", clnt);
do {
if (list_empty(&clnt->cl_tasks))
wake_up(&destroy_wait);
if (!atomic_dec_and_test(&clnt->cl_count))
break;
clnt = rpc_free_auth(clnt);
} while (clnt != NULL);
}
EXPORT_SYMBOL_GPL(rpc_release_client);
/**
* rpc_bind_new_program - bind a new RPC program to an existing client
* @old: old rpc_client
* @program: rpc program to set
* @vers: rpc program version
*
* Clones the rpc client and sets up a new RPC program. This is mainly
* of use for enabling different RPC programs to share the same transport.
* The Sun NFSv2/v3 ACL protocol can do this.
*/
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
const struct rpc_program *program,
u32 vers)
{
struct rpc_create_args args = {
.program = program,
.prognumber = program->number,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
};
struct rpc_clnt *clnt;
int err;
clnt = __rpc_clone_client(&args, old);
if (IS_ERR(clnt))
goto out;
err = rpc_ping(clnt);
if (err != 0) {
rpc_shutdown_client(clnt);
clnt = ERR_PTR(err);
}
out:
return clnt;
}
EXPORT_SYMBOL_GPL(rpc_bind_new_program);
void rpc_task_release_client(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
if (clnt != NULL) {
/* Remove from client task list */
spin_lock(&clnt->cl_lock);
list_del(&task->tk_task);
spin_unlock(&clnt->cl_lock);
task->tk_client = NULL;
rpc_release_client(clnt);
}
}
static
void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
{
if (clnt != NULL) {
rpc_task_release_client(task);
task->tk_client = clnt;
atomic_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
if (sk_memalloc_socks()) {
struct rpc_xprt *xprt;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
if (xprt->swapper)
task->tk_flags |= RPC_TASK_SWAPPER;
rcu_read_unlock();
}
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
spin_unlock(&clnt->cl_lock);
}
}
void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
{
rpc_task_release_client(task);
rpc_task_set_client(task, clnt);
}
EXPORT_SYMBOL_GPL(rpc_task_reset_client);
static void
rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
{
if (msg != NULL) {
task->tk_msg.rpc_proc = msg->rpc_proc;
task->tk_msg.rpc_argp = msg->rpc_argp;
task->tk_msg.rpc_resp = msg->rpc_resp;
if (msg->rpc_cred != NULL)
task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred);
}
}
/*
* Default callback for async RPC calls
*/
static void
rpc_default_callback(struct rpc_task *task, void *data)
{
}
static const struct rpc_call_ops rpc_default_ops = {
.rpc_call_done = rpc_default_callback,
};
/**
* rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
* @task_setup_data: pointer to task initialisation data
*/
struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
{
struct rpc_task *task;
task = rpc_new_task(task_setup_data);
if (IS_ERR(task))
goto out;
rpc_task_set_client(task, task_setup_data->rpc_client);
rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
if (task->tk_action == NULL)
rpc_call_start(task);
atomic_inc(&task->tk_count);
rpc_execute(task);
out:
return task;
}
EXPORT_SYMBOL_GPL(rpc_run_task);
/**
* rpc_call_sync - Perform a synchronous RPC call
* @clnt: pointer to RPC client
* @msg: RPC call parameters
* @flags: RPC call flags
*/
int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags)
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.rpc_message = msg,
.callback_ops = &rpc_default_ops,
.flags = flags,
};
int status;
WARN_ON_ONCE(flags & RPC_TASK_ASYNC);
if (flags & RPC_TASK_ASYNC) {
rpc_release_calldata(task_setup_data.callback_ops,
task_setup_data.callback_data);
return -EINVAL;
}
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = task->tk_status;
rpc_put_task(task);
return status;
}
EXPORT_SYMBOL_GPL(rpc_call_sync);
/**
* rpc_call_async - Perform an asynchronous RPC call
* @clnt: pointer to RPC client
* @msg: RPC call parameters
* @flags: RPC call flags
* @tk_ops: RPC call ops
* @data: user call data
*/
int
rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
const struct rpc_call_ops *tk_ops, void *data)
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.rpc_message = msg,
.callback_ops = tk_ops,
.callback_data = data,
.flags = flags|RPC_TASK_ASYNC,
};
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
return 0;
}
EXPORT_SYMBOL_GPL(rpc_call_async);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
/**
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
* @req: RPC request
* @tk_ops: RPC call ops
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
*/
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
const struct rpc_call_ops *tk_ops)
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
{
struct rpc_task *task;
struct xdr_buf *xbufp = &req->rq_snd_buf;
struct rpc_task_setup task_setup_data = {
.callback_ops = tk_ops,
};
dprintk("RPC: rpc_run_bc_task req= %p\n", req);
/*
* Create an rpc_task to send the data
*/
task = rpc_new_task(&task_setup_data);
if (IS_ERR(task)) {
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
xprt_free_bc_request(req);
goto out;
}
task->tk_rqstp = req;
/*
* Set up the xdr_buf length.
* This also indicates that the buffer is XDR encoded already.
*/
xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
xbufp->tail[0].iov_len;
task->tk_action = call_bc_transmit;
atomic_inc(&task->tk_count);
WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
rpc_execute(task);
out:
dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
return task;
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
void
rpc_call_start(struct rpc_task *task)
{
task->tk_action = call_start;
}
EXPORT_SYMBOL_GPL(rpc_call_start);
/**
* rpc_peeraddr - extract remote peer address from clnt's xprt
* @clnt: RPC client structure
* @buf: target buffer
* @bufsize: length of target buffer
*
* Returns the number of bytes that are actually in the stored address.
*/
size_t rpc_peeraddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t bufsize)
{
size_t bytes;
struct rpc_xprt *xprt;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
bytes = xprt->addrlen;
if (bytes > bufsize)
bytes = bufsize;
memcpy(buf, &xprt->addr, bytes);
rcu_read_unlock();
return bytes;
}
EXPORT_SYMBOL_GPL(rpc_peeraddr);
/**
* rpc_peeraddr2str - return remote peer address in printable format
* @clnt: RPC client structure
* @format: address format
*
* NB: the lifetime of the memory referenced by the returned pointer is
* the same as the rpc_xprt itself. As long as the caller uses this
* pointer, it must hold the RCU read lock.
*/
const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
enum rpc_display_format_t format)
{
struct rpc_xprt *xprt;
xprt = rcu_dereference(clnt->cl_xprt);
if (xprt->address_strings[format] != NULL)
return xprt->address_strings[format];
else
return "unprintable";
}
EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
static const struct sockaddr_in rpc_inaddr_loopback = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
};
static const struct sockaddr_in6 rpc_in6addr_loopback = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
};
/*
* Try a getsockname() on a connected datagram socket. Using a
* connected datagram socket prevents leaving a socket in TIME_WAIT.
* This conserves the ephemeral port number space.
*
* Returns zero and fills in "buf" if successful; otherwise, a
* negative errno is returned.
*/
static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
struct sockaddr *buf, int buflen)
{
struct socket *sock;
int err;
err = __sock_create(net, sap->sa_family,
SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
if (err < 0) {
dprintk("RPC: can't create UDP socket (%d)\n", err);
goto out;
}
switch (sap->sa_family) {
case AF_INET:
err = kernel_bind(sock,
(struct sockaddr *)&rpc_inaddr_loopback,
sizeof(rpc_inaddr_loopback));
break;
case AF_INET6:
err = kernel_bind(sock,
(struct sockaddr *)&rpc_in6addr_loopback,
sizeof(rpc_in6addr_loopback));
break;
default:
err = -EAFNOSUPPORT;
goto out;
}
if (err < 0) {
dprintk("RPC: can't bind UDP socket (%d)\n", err);
goto out_release;
}
err = kernel_connect(sock, sap, salen, 0);
if (err < 0) {
dprintk("RPC: can't connect UDP socket (%d)\n", err);
goto out_release;
}
err = kernel_getsockname(sock, buf, &buflen);
if (err < 0) {
dprintk("RPC: getsockname failed (%d)\n", err);
goto out_release;
}
err = 0;
if (buf->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)buf;
sin6->sin6_scope_id = 0;
}
dprintk("RPC: %s succeeded\n", __func__);
out_release:
sock_release(sock);
out:
return err;
}
/*
* Scraping a connected socket failed, so we don't have a useable
* local address. Fallback: generate an address that will prevent
* the server from calling us back.
*
* Returns zero and fills in "buf" if successful; otherwise, a
* negative errno is returned.
*/
static int rpc_anyaddr(int family, struct sockaddr *buf, size_t buflen)
{
switch (family) {
case AF_INET:
if (buflen < sizeof(rpc_inaddr_loopback))
return -EINVAL;
memcpy(buf, &rpc_inaddr_loopback,
sizeof(rpc_inaddr_loopback));
break;
case AF_INET6:
if (buflen < sizeof(rpc_in6addr_loopback))
return -EINVAL;
memcpy(buf, &rpc_in6addr_loopback,
sizeof(rpc_in6addr_loopback));
default:
dprintk("RPC: %s: address family not supported\n",
__func__);
return -EAFNOSUPPORT;
}
dprintk("RPC: %s: succeeded\n", __func__);
return 0;
}
/**
* rpc_localaddr - discover local endpoint address for an RPC client
* @clnt: RPC client structure
* @buf: target buffer
* @buflen: size of target buffer, in bytes
*
* Returns zero and fills in "buf" and "buflen" if successful;
* otherwise, a negative errno is returned.
*
* This works even if the underlying transport is not currently connected,
* or if the upper layer never previously provided a source address.
*
* The result of this function call is transient: multiple calls in
* succession may give different results, depending on how local
* networking configuration changes over time.
*/
int rpc_localaddr(struct rpc_clnt *clnt, struct sockaddr *buf, size_t buflen)
{
struct sockaddr_storage address;
struct sockaddr *sap = (struct sockaddr *)&address;
struct rpc_xprt *xprt;
struct net *net;
size_t salen;
int err;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
salen = xprt->addrlen;
memcpy(sap, &xprt->addr, salen);
net = get_net(xprt->xprt_net);
rcu_read_unlock();
rpc_set_port(sap, 0);
err = rpc_sockname(net, sap, salen, buf, buflen);
put_net(net);
if (err != 0)
/* Couldn't discover local address, return ANYADDR */
return rpc_anyaddr(sap->sa_family, buf, buflen);
return 0;
}
EXPORT_SYMBOL_GPL(rpc_localaddr);
void
rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize)
{
struct rpc_xprt *xprt;
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
if (xprt->ops->set_buffer_size)
xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rpc_setbufsize);
/**
* rpc_protocol - Get transport protocol number for an RPC client
* @clnt: RPC client to query
*
*/
int rpc_protocol(struct rpc_clnt *clnt)
{
int protocol;
rcu_read_lock();
protocol = rcu_dereference(clnt->cl_xprt)->prot;
rcu_read_unlock();
return protocol;
}
EXPORT_SYMBOL_GPL(rpc_protocol);
/**
* rpc_net_ns - Get the network namespace for this RPC client
* @clnt: RPC client to query
*
*/
struct net *rpc_net_ns(struct rpc_clnt *clnt)
{
struct net *ret;
rcu_read_lock();
ret = rcu_dereference(clnt->cl_xprt)->xprt_net;
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_net_ns);
/**
* rpc_max_payload - Get maximum payload size for a transport, in bytes
* @clnt: RPC client to query
*
* For stream transports, this is one RPC record fragment (see RFC
* 1831), as we don't support multi-record requests yet. For datagram
* transports, this is the size of an IP packet minus the IP, UDP, and
* RPC header sizes.
*/
size_t rpc_max_payload(struct rpc_clnt *clnt)
{
size_t ret;
rcu_read_lock();
ret = rcu_dereference(clnt->cl_xprt)->max_payload;
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_max_payload);
/**
* rpc_get_timeout - Get timeout for transport in units of HZ
* @clnt: RPC client to query
*/
unsigned long rpc_get_timeout(struct rpc_clnt *clnt)
{
unsigned long ret;
rcu_read_lock();
ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval;
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(rpc_get_timeout);
/**
* rpc_force_rebind - force transport to check that remote port is unchanged
* @clnt: client to rebind
*
*/
void rpc_force_rebind(struct rpc_clnt *clnt)
{
if (clnt->cl_autobind) {
rcu_read_lock();
xprt_clear_bound(rcu_dereference(clnt->cl_xprt));
rcu_read_unlock();
}
}
EXPORT_SYMBOL_GPL(rpc_force_rebind);
/*
* Restart an (async) RPC call from the call_prepare state.
* Usually called from within the exit handler.
*/
int
rpc_restart_call_prepare(struct rpc_task *task)
{
if (RPC_ASSASSINATED(task))
return 0;
task->tk_action = call_start;
task->tk_status = 0;
if (task->tk_ops->rpc_call_prepare != NULL)
task->tk_action = rpc_prepare_task;
return 1;
}
EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
/*
* Restart an (async) RPC call. Usually called from within the
* exit handler.
*/
int
rpc_restart_call(struct rpc_task *task)
{
if (RPC_ASSASSINATED(task))
return 0;
task->tk_action = call_start;
task->tk_status = 0;
return 1;
}
EXPORT_SYMBOL_GPL(rpc_restart_call);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
const char
*rpc_proc_name(const struct rpc_task *task)
{
const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
if (proc) {
if (proc->p_name)
return proc->p_name;
else
return "NULL";
} else
return "no proc";
}
#endif
/*
* 0. Initial state
*
* Other FSM states can be visited zero or more times, but
* this state is visited exactly once for each RPC.
*/
static void
call_start(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
clnt->cl_program->name, clnt->cl_vers,
rpc_proc_name(task),
(RPC_IS_ASYNC(task) ? "async" : "sync"));
/* Increment call count */
task->tk_msg.rpc_proc->p_count++;
clnt->cl_stats->rpccnt++;
task->tk_action = call_reserve;
}
/*
* 1. Reserve an RPC call slot
*/
static void
call_reserve(struct rpc_task *task)
{
dprint_status(task);
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_reserve(task);
}
static void call_retry_reserve(struct rpc_task *task);
/*
* 1b. Grok the result of xprt_reserve()
*/
static void
call_reserveresult(struct rpc_task *task)
{
int status = task->tk_status;
dprint_status(task);
/*
* After a call to xprt_reserve(), we must have either
* a request slot or else an error status.
*/
task->tk_status = 0;
if (status >= 0) {
if (task->tk_rqstp) {
task->tk_action = call_refresh;
return;
}
printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
__func__, status);
rpc_exit(task, -EIO);
return;
}
/*
* Even though there was an error, we may have acquired
* a request slot somehow. Make sure not to leak it.
*/
if (task->tk_rqstp) {
printk(KERN_ERR "%s: status=%d, request allocated anyway\n",
__func__, status);
xprt_release(task);
}
switch (status) {
case -ENOMEM:
rpc_delay(task, HZ >> 2);
case -EAGAIN: /* woken up; retry */
task->tk_action = call_retry_reserve;
return;
case -EIO: /* probably a shutdown */
break;
default:
printk(KERN_ERR "%s: unrecognized error %d, exiting\n",
__func__, status);
break;
}
rpc_exit(task, status);
}
/*
* 1c. Retry reserving an RPC call slot
*/
static void
call_retry_reserve(struct rpc_task *task)
{
dprint_status(task);
task->tk_status = 0;
task->tk_action = call_reserveresult;
xprt_retry_reserve(task);
}
/*
* 2. Bind and/or refresh the credentials
*/
static void
call_refresh(struct rpc_task *task)
{
dprint_status(task);
task->tk_action = call_refreshresult;
task->tk_status = 0;
task->tk_client->cl_stats->rpcauthrefresh++;
rpcauth_refreshcred(task);
}
/*
* 2a. Process the results of a credential refresh
*/
static void
call_refreshresult(struct rpc_task *task)
{
int status = task->tk_status;
dprint_status(task);
task->tk_status = 0;
task->tk_action = call_refresh;
switch (status) {
case 0:
sunrpc: Fix infinite loop in RPC state machine When a task enters call_refreshresult with status 0 from call_refresh and !rpcauth_uptodatecred(task) it enters call_refresh again with no rate-limiting or max number of retries. Instead of trying forever, make use of the retry path that other errors use. This only seems to be possible when the crrefresh callback is gss_refresh_null, which only happens when destroying the context. To reproduce: 1) mount with sec=krb5 (or sec=sys with krb5 negotiated for non FSID specific operations). 2) reboot - the client will be stuck and will need to be hard rebooted BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:2:46] Modules linked in: rpcsec_gss_krb5 nfsv4 nfs fscache ppdev crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd serio_raw i2c_piix4 i2c_core e1000 parport_pc parport shpchp nfsd auth_rpcgss oid_registry exportfs nfs_acl lockd sunrpc autofs4 mptspi scsi_transport_spi mptscsih mptbase ata_generic floppy irq event stamp: 195724 hardirqs last enabled at (195723): [<ffffffff814a925c>] restore_args+0x0/0x30 hardirqs last disabled at (195724): [<ffffffff814b0a6a>] apic_timer_interrupt+0x6a/0x80 softirqs last enabled at (195722): [<ffffffff8103f583>] __do_softirq+0x1df/0x276 softirqs last disabled at (195717): [<ffffffff8103f852>] irq_exit+0x53/0x9a CPU: 0 PID: 46 Comm: kworker/0:2 Not tainted 3.13.0-rc3-branch-dros_testing+ #4 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 Workqueue: rpciod rpc_async_schedule [sunrpc] task: ffff8800799c4260 ti: ffff880079002000 task.ti: ffff880079002000 RIP: 0010:[<ffffffffa0064fd4>] [<ffffffffa0064fd4>] __rpc_execute+0x8a/0x362 [sunrpc] RSP: 0018:ffff880079003d18 EFLAGS: 00000246 RAX: 0000000000000005 RBX: 0000000000000007 RCX: 0000000000000007 RDX: 0000000000000007 RSI: ffff88007aecbae8 RDI: ffff8800783d8900 RBP: ffff880079003d78 R08: ffff88006e30e9f8 R09: ffffffffa005a3d7 R10: ffff88006e30e7b0 R11: ffff8800783d8900 R12: ffffffffa006675e R13: ffff880079003ce8 R14: ffff88006e30e7b0 R15: ffff8800783d8900 FS: 0000000000000000(0000) GS:ffff88007f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3072333000 CR3: 0000000001a0b000 CR4: 00000000001407f0 Stack: ffff880079003d98 0000000000000246 0000000000000000 ffff88007a9a4830 ffff880000000000 ffffffff81073f47 ffff88007f212b00 ffff8800799c4260 ffff8800783d8988 ffff88007f212b00 ffffe8ffff604800 0000000000000000 Call Trace: [<ffffffff81073f47>] ? trace_hardirqs_on_caller+0x145/0x1a1 [<ffffffffa00652d3>] rpc_async_schedule+0x27/0x32 [sunrpc] [<ffffffff81052974>] process_one_work+0x211/0x3a5 [<ffffffff810528d5>] ? process_one_work+0x172/0x3a5 [<ffffffff81052eeb>] worker_thread+0x134/0x202 [<ffffffff81052db7>] ? rescuer_thread+0x280/0x280 [<ffffffff81052db7>] ? rescuer_thread+0x280/0x280 [<ffffffff810584a0>] kthread+0xc9/0xd1 [<ffffffff810583d7>] ? __kthread_parkme+0x61/0x61 [<ffffffff814afd6c>] ret_from_fork+0x7c/0xb0 [<ffffffff810583d7>] ? __kthread_parkme+0x61/0x61 Code: e8 87 63 fd e0 c6 05 10 dd 01 00 01 48 8b 43 70 4c 8d 6b 70 45 31 e4 a8 02 0f 85 d5 02 00 00 4c 8b 7b 48 48 c7 43 48 00 00 00 00 <4c> 8b 4b 50 4d 85 ff 75 0c 4d 85 c9 4d 89 cf 0f 84 32 01 00 00 And the output of "rpcdebug -m rpc -s all": RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refresh (status 0) RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 Signed-off-by: Weston Andros Adamson <dros@netapp.com> Cc: stable@vger.kernel.org # 2.6.37+ Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2013-12-18 01:16:11 +08:00
if (rpcauth_uptodatecred(task)) {
task->tk_action = call_allocate;
sunrpc: Fix infinite loop in RPC state machine When a task enters call_refreshresult with status 0 from call_refresh and !rpcauth_uptodatecred(task) it enters call_refresh again with no rate-limiting or max number of retries. Instead of trying forever, make use of the retry path that other errors use. This only seems to be possible when the crrefresh callback is gss_refresh_null, which only happens when destroying the context. To reproduce: 1) mount with sec=krb5 (or sec=sys with krb5 negotiated for non FSID specific operations). 2) reboot - the client will be stuck and will need to be hard rebooted BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:2:46] Modules linked in: rpcsec_gss_krb5 nfsv4 nfs fscache ppdev crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd serio_raw i2c_piix4 i2c_core e1000 parport_pc parport shpchp nfsd auth_rpcgss oid_registry exportfs nfs_acl lockd sunrpc autofs4 mptspi scsi_transport_spi mptscsih mptbase ata_generic floppy irq event stamp: 195724 hardirqs last enabled at (195723): [<ffffffff814a925c>] restore_args+0x0/0x30 hardirqs last disabled at (195724): [<ffffffff814b0a6a>] apic_timer_interrupt+0x6a/0x80 softirqs last enabled at (195722): [<ffffffff8103f583>] __do_softirq+0x1df/0x276 softirqs last disabled at (195717): [<ffffffff8103f852>] irq_exit+0x53/0x9a CPU: 0 PID: 46 Comm: kworker/0:2 Not tainted 3.13.0-rc3-branch-dros_testing+ #4 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 Workqueue: rpciod rpc_async_schedule [sunrpc] task: ffff8800799c4260 ti: ffff880079002000 task.ti: ffff880079002000 RIP: 0010:[<ffffffffa0064fd4>] [<ffffffffa0064fd4>] __rpc_execute+0x8a/0x362 [sunrpc] RSP: 0018:ffff880079003d18 EFLAGS: 00000246 RAX: 0000000000000005 RBX: 0000000000000007 RCX: 0000000000000007 RDX: 0000000000000007 RSI: ffff88007aecbae8 RDI: ffff8800783d8900 RBP: ffff880079003d78 R08: ffff88006e30e9f8 R09: ffffffffa005a3d7 R10: ffff88006e30e7b0 R11: ffff8800783d8900 R12: ffffffffa006675e R13: ffff880079003ce8 R14: ffff88006e30e7b0 R15: ffff8800783d8900 FS: 0000000000000000(0000) GS:ffff88007f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3072333000 CR3: 0000000001a0b000 CR4: 00000000001407f0 Stack: ffff880079003d98 0000000000000246 0000000000000000 ffff88007a9a4830 ffff880000000000 ffffffff81073f47 ffff88007f212b00 ffff8800799c4260 ffff8800783d8988 ffff88007f212b00 ffffe8ffff604800 0000000000000000 Call Trace: [<ffffffff81073f47>] ? trace_hardirqs_on_caller+0x145/0x1a1 [<ffffffffa00652d3>] rpc_async_schedule+0x27/0x32 [sunrpc] [<ffffffff81052974>] process_one_work+0x211/0x3a5 [<ffffffff810528d5>] ? process_one_work+0x172/0x3a5 [<ffffffff81052eeb>] worker_thread+0x134/0x202 [<ffffffff81052db7>] ? rescuer_thread+0x280/0x280 [<ffffffff81052db7>] ? rescuer_thread+0x280/0x280 [<ffffffff810584a0>] kthread+0xc9/0xd1 [<ffffffff810583d7>] ? __kthread_parkme+0x61/0x61 [<ffffffff814afd6c>] ret_from_fork+0x7c/0xb0 [<ffffffff810583d7>] ? __kthread_parkme+0x61/0x61 Code: e8 87 63 fd e0 c6 05 10 dd 01 00 01 48 8b 43 70 4c 8d 6b 70 45 31 e4 a8 02 0f 85 d5 02 00 00 4c 8b 7b 48 48 c7 43 48 00 00 00 00 <4c> 8b 4b 50 4d 85 ff 75 0c 4d 85 c9 4d 89 cf 0f 84 32 01 00 00 And the output of "rpcdebug -m rpc -s all": RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refresh (status 0) RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refresh (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 RPC: 61 call_refreshresult (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refresh (status 0) RPC: 61 call_refreshresult (status 0) RPC: 61 refreshing RPCSEC_GSS cred ffff88007a413cf0 Signed-off-by: Weston Andros Adamson <dros@netapp.com> Cc: stable@vger.kernel.org # 2.6.37+ Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2013-12-18 01:16:11 +08:00
return;
}
/* Use rate-limiting and a max number of retries if refresh
* had status 0 but failed to update the cred.
*/
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
case -EAGAIN:
status = -EACCES;
case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
dprintk("RPC: %5u %s: retry refresh creds\n",
task->tk_pid, __func__);
return;
}
dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
task->tk_pid, __func__, status);
rpc_exit(task, status);
}
/*
* 2b. Allocate the buffer. For details, see sched.c:rpc_malloc.
* (Note: buffer memory is freed in xprt_release).
*/
static void
call_allocate(struct rpc_task *task)
{
unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
dprint_status(task);
task->tk_status = 0;
task->tk_action = call_bind;
if (req->rq_buffer)
return;
if (proc->p_proc != 0) {
BUG_ON(proc->p_arglen == 0);
if (proc->p_decode != NULL)
BUG_ON(proc->p_replen == 0);
}
/*
* Calculate the size (in quads) of the RPC call
* and reply headers, and convert both values
* to byte sizes.
*/
req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen;
req->rq_callsize <<= 2;
req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
req->rq_rcvsize <<= 2;
req->rq_buffer = xprt->ops->buf_alloc(task,
req->rq_callsize + req->rq_rcvsize);
if (req->rq_buffer != NULL)
return;
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) {
task->tk_action = call_allocate;
rpc_delay(task, HZ>>4);
return;
}
rpc_exit(task, -ERESTARTSYS);
}
static inline int
rpc_task_need_encode(struct rpc_task *task)
{
return task->tk_rqstp->rq_snd_buf.len == 0;
}
static inline void
rpc_task_force_reencode(struct rpc_task *task)
{
task->tk_rqstp->rq_snd_buf.len = 0;
task->tk_rqstp->rq_bytes_sent = 0;
}
static inline void
rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
{
buf->head[0].iov_base = start;
buf->head[0].iov_len = len;
buf->tail[0].iov_len = 0;
buf->page_len = 0;
buf->flags = 0;
buf->len = 0;
buf->buflen = len;
}
/*
* 3. Encode arguments of an RPC call
*/
static void
rpc_xdr_encode(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
kxdreproc_t encode;
__be32 *p;
dprint_status(task);
rpc_xdr_buf_init(&req->rq_snd_buf,
req->rq_buffer,
req->rq_callsize);
rpc_xdr_buf_init(&req->rq_rcv_buf,
(char *)req->rq_buffer + req->rq_callsize,
req->rq_rcvsize);
p = rpc_encode_header(task);
if (p == NULL) {
printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n");
rpc_exit(task, -EIO);
return;
}
encode = task->tk_msg.rpc_proc->p_encode;
if (encode == NULL)
return;
task->tk_status = rpcauth_wrap_req(task, encode, req, p,
task->tk_msg.rpc_argp);
}
/*
* 4. Get the server port number if not yet set
*/
static void
call_bind(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
dprint_status(task);
task->tk_action = call_connect;
if (!xprt_bound(xprt)) {
task->tk_action = call_bind_status;
task->tk_timeout = xprt->bind_timeout;
xprt->ops->rpcbind(task);
}
}
/*
* 4a. Sort out bind result
*/
static void
call_bind_status(struct rpc_task *task)
{
int status = -EIO;
if (task->tk_status >= 0) {
dprint_status(task);
task->tk_status = 0;
task->tk_action = call_connect;
return;
}
trace_rpc_bind_status(task);
switch (task->tk_status) {
case -ENOMEM:
dprintk("RPC: %5u rpcbind out of memory\n", task->tk_pid);
rpc_delay(task, HZ >> 2);
goto retry_timeout;
case -EACCES:
dprintk("RPC: %5u remote rpcbind: RPC program/version "
"unavailable\n", task->tk_pid);
/* fail immediately if this is an RPC ping */
if (task->tk_msg.rpc_proc->p_proc == 0) {
status = -EOPNOTSUPP;
break;
}
if (task->tk_rebind_retry == 0)
break;
task->tk_rebind_retry--;
rpc_delay(task, 3*HZ);
goto retry_timeout;
case -ETIMEDOUT:
dprintk("RPC: %5u rpcbind request timed out\n",
task->tk_pid);
goto retry_timeout;
case -EPFNOSUPPORT:
/* server doesn't support any rpcbind version we know of */
dprintk("RPC: %5u unrecognized remote rpcbind service\n",
task->tk_pid);
break;
case -EPROTONOSUPPORT:
dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n",
task->tk_pid);
goto retry_timeout;
case -ECONNREFUSED: /* connection problems */
case -ECONNRESET:
case -ECONNABORTED:
case -ENOTCONN:
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -ENOBUFS:
case -EPIPE:
dprintk("RPC: %5u remote rpcbind unreachable: %d\n",
task->tk_pid, task->tk_status);
if (!RPC_IS_SOFTCONN(task)) {
rpc_delay(task, 5*HZ);
goto retry_timeout;
}
status = task->tk_status;
break;
default:
dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
task->tk_pid, -task->tk_status);
}
rpc_exit(task, status);
return;
retry_timeout:
task->tk_status = 0;
task->tk_action = call_timeout;
}
/*
* 4b. Connect to the RPC server
*/
static void
call_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
dprintk("RPC: %5u call_connect xprt %p %s connected\n",
task->tk_pid, xprt,
(xprt_connected(xprt) ? "is" : "is not"));
task->tk_action = call_transmit;
if (!xprt_connected(xprt)) {
task->tk_action = call_connect_status;
if (task->tk_status < 0)
return;
if (task->tk_flags & RPC_TASK_NOCONNECT) {
rpc_exit(task, -ENOTCONN);
return;
}
xprt_connect(task);
}
}
/*
* 4c. Sort out connect result
*/
static void
call_connect_status(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status;
dprint_status(task);
trace_rpc_connect_status(task, status);
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
case -EPIPE:
if (RPC_IS_SOFTCONN(task))
break;
/* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ);
case -EAGAIN:
/* Check for timeouts before looping back to call_bind */
case -ETIMEDOUT:
task->tk_action = call_timeout;
return;
case 0:
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
return;
}
rpc_exit(task, status);
}
/*
* 5. Transmit the RPC request, and wait for reply
*/
static void
call_transmit(struct rpc_task *task)
{
int is_retrans = RPC_WAS_SENT(task);
dprint_status(task);
task->tk_action = call_status;
if (task->tk_status < 0)
return;
if (!xprt_prepare_transmit(task))
return;
task->tk_action = call_transmit_status;
/* Encode here so that rpcsec_gss can use correct sequence number. */
if (rpc_task_need_encode(task)) {
rpc_xdr_encode(task);
/* Did the encode result in an error condition? */
if (task->tk_status != 0) {
/* Was the error nonfatal? */
if (task->tk_status == -EAGAIN)
rpc_delay(task, HZ >> 4);
else
rpc_exit(task, task->tk_status);
return;
}
}
xprt_transmit(task);
if (task->tk_status < 0)
return;
if (is_retrans)
task->tk_client->cl_stats->rpcretrans++;
/*
* On success, ensure that we call xprt_end_transmit() before sleeping
* in order to allow access to the socket to other RPC requests.
*/
call_transmit_status(task);
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
if (rpc_reply_expected(task))
return;
task->tk_action = rpc_exit_task;
rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task);
}
/*
* 5a. Handle cleanup after a transmission
*/
static void
call_transmit_status(struct rpc_task *task)
{
task->tk_action = call_status;
/*
* Common case: success. Force the compiler to put this
* test first.
*/
if (task->tk_status == 0) {
xprt_end_transmit(task);
rpc_task_force_reencode(task);
return;
}
switch (task->tk_status) {
case -EAGAIN:
break;
default:
dprint_status(task);
xprt_end_transmit(task);
rpc_task_force_reencode(task);
break;
/*
* Special cases: if we've been waiting on the
* socket's write_space() callback, or if the
* socket just returned a connection error,
* then hold onto the transport lock.
*/
case -ECONNREFUSED:
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
rpc: Add -EPERM processing for xs_udp_send_request() If an iptables drop rule is added for an nfs server, the client can end up in a softlockup. Because of the way that xs_sendpages() is structured, the -EPERM is ignored since the prior bits of the packet may have been successfully queued and thus xs_sendpages() returns a non-zero value. Then, xs_udp_send_request() thinks that because some bits were queued it should return -EAGAIN. We then try the request again and again, resulting in cpu spinning. Reproducer: 1) open a file on the nfs server '/nfs/foo' (mounted using udp) 2) iptables -A OUTPUT -d <nfs server ip> -j DROP 3) write to /nfs/foo 4) close /nfs/foo 5) iptables -D OUTPUT -d <nfs server ip> -j DROP The softlockup occurs in step 4 above. The previous patch, allows xs_sendpages() to return both a sent count and any error values that may have occurred. Thus, if we get an -EPERM, return that to the higher level code. With this patch in place we can successfully abort the above sequence and avoid the softlockup. I also tried the above test case on an nfs mount on tcp and although the system does not softlockup, I still ended up with the 'hung_task' firing after 120 seconds, due to the i/o being stuck. The tcp case appears a bit harder to fix, since -EPERM appears to get ignored much lower down in the stack and does not propogate up to xs_sendpages(). This case is not quite as insidious as the softlockup and it is not addressed here. Reported-by: Yigong Lou <ylou@akamai.com> Signed-off-by: Jason Baron <jbaron@akamai.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-09-25 02:08:04 +08:00
case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
xprt_end_transmit(task);
rpc_exit(task, task->tk_status);
break;
}
case -ECONNRESET:
case -ECONNABORTED:
case -EADDRINUSE:
case -ENOTCONN:
case -ENOBUFS:
case -EPIPE:
rpc_task_force_reencode(task);
}
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
/*
* 5b. Send the backchannel RPC reply. On error, drop the reply. In
* addition, disconnect on connectivity errors.
*/
static void
call_bc_transmit(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
if (!xprt_prepare_transmit(task)) {
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
/*
* Could not reserve the transport. Try again after the
* transport is released.
*/
task->tk_status = 0;
task->tk_action = call_bc_transmit;
return;
}
task->tk_action = rpc_exit_task;
if (task->tk_status < 0) {
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
return;
}
xprt_transmit(task);
xprt_end_transmit(task);
dprint_status(task);
switch (task->tk_status) {
case 0:
/* Success */
break;
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -ETIMEDOUT:
/*
* Problem reaching the server. Disconnect and let the
* forechannel reestablish the connection. The server will
* have to retransmit the backchannel request and we'll
* reprocess it. Since these ops are idempotent, there's no
* need to cache our reply at this time.
*/
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
xprt_conditional_disconnect(req->rq_xprt,
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
req->rq_connect_cookie);
break;
default:
/*
* We were unable to reply and will have to drop the
* request. The server should reconnect and retransmit.
*/
WARN_ON_ONCE(task->tk_status == -EAGAIN);
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
printk(KERN_NOTICE "RPC: Could not send backchannel reply "
"error: %d\n", task->tk_status);
break;
}
rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson <andros@netapp.com> Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:23:03 +08:00
/*
* 6. Sort out the RPC call status
*/
static void
call_status(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
int status;
if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
task->tk_status = req->rq_reply_bytes_recvd;
dprint_status(task);
status = task->tk_status;
if (status >= 0) {
task->tk_action = call_decode;
return;
}
trace_rpc_call_status(task);
task->tk_status = 0;
switch(status) {
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
rpc: Add -EPERM processing for xs_udp_send_request() If an iptables drop rule is added for an nfs server, the client can end up in a softlockup. Because of the way that xs_sendpages() is structured, the -EPERM is ignored since the prior bits of the packet may have been successfully queued and thus xs_sendpages() returns a non-zero value. Then, xs_udp_send_request() thinks that because some bits were queued it should return -EAGAIN. We then try the request again and again, resulting in cpu spinning. Reproducer: 1) open a file on the nfs server '/nfs/foo' (mounted using udp) 2) iptables -A OUTPUT -d <nfs server ip> -j DROP 3) write to /nfs/foo 4) close /nfs/foo 5) iptables -D OUTPUT -d <nfs server ip> -j DROP The softlockup occurs in step 4 above. The previous patch, allows xs_sendpages() to return both a sent count and any error values that may have occurred. Thus, if we get an -EPERM, return that to the higher level code. With this patch in place we can successfully abort the above sequence and avoid the softlockup. I also tried the above test case on an nfs mount on tcp and although the system does not softlockup, I still ended up with the 'hung_task' firing after 120 seconds, due to the i/o being stuck. The tcp case appears a bit harder to fix, since -EPERM appears to get ignored much lower down in the stack and does not propogate up to xs_sendpages(). This case is not quite as insidious as the softlockup and it is not addressed here. Reported-by: Yigong Lou <ylou@akamai.com> Signed-off-by: Jason Baron <jbaron@akamai.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-09-25 02:08:04 +08:00
case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
rpc_exit(task, status);
break;
}
/*
* Delay any retries for 3 seconds, then handle as if it
* were a timeout.
*/
rpc_delay(task, 3*HZ);
case -ETIMEDOUT:
task->tk_action = call_timeout;
if (!(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
&& task->tk_client->cl_discrtry)
xprt_conditional_disconnect(req->rq_xprt,
req->rq_connect_cookie);
break;
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
rpc_force_rebind(clnt);
case -EADDRINUSE:
case -ENOBUFS:
rpc_delay(task, 3*HZ);
case -EPIPE:
case -ENOTCONN:
task->tk_action = call_bind;
break;
case -EAGAIN:
task->tk_action = call_transmit;
break;
case -EIO:
/* shutdown or soft timeout */
rpc_exit(task, status);
break;
default:
if (clnt->cl_chatty)
printk("%s: RPC call returned error %d\n",
clnt->cl_program->name, -status);
rpc_exit(task, status);
}
}
/*
* 6a. Handle RPC timeout
* We do not release the request slot, so we keep using the
* same XID for all retransmits.
*/
static void
call_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
if (xprt_adjust_timeout(task->tk_rqstp) == 0) {
dprintk("RPC: %5u call_timeout (minor)\n", task->tk_pid);
goto retry;
}
dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid);
task->tk_timeouts++;
if (RPC_IS_SOFTCONN(task)) {
rpc_exit(task, -ETIMEDOUT);
return;
}
if (RPC_IS_SOFT(task)) {
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
if (task->tk_flags & RPC_TASK_TIMEOUT)
rpc_exit(task, -ETIMEDOUT);
else
rpc_exit(task, -EIO);
return;
}
if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
task->tk_flags |= RPC_CALL_MAJORSEEN;
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
}
rpc_force_rebind(clnt);
/*
* Did our request time out due to an RPCSEC_GSS out-of-sequence
* event? RFC2203 requires the server to drop all such requests.
*/
rpcauth_invalcred(task);
retry:
task->tk_action = call_bind;
task->tk_status = 0;
}
/*
* 7. Decode the RPC reply
*/
static void
call_decode(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
__be32 *p;
dprint_status(task);
if (task->tk_flags & RPC_CALL_MAJORSEEN) {
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s OK\n",
clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
task->tk_flags &= ~RPC_CALL_MAJORSEEN;
}
/*
* Ensure that we see all writes made by xprt_complete_rqst()
* before it changed req->rq_reply_bytes_recvd.
*/
smp_rmb();
req->rq_rcv_buf.len = req->rq_private_buf.len;
/* Check that the softirq receive buffer is valid */
WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
sizeof(req->rq_rcv_buf)) != 0);
if (req->rq_rcv_buf.len < 12) {
if (!RPC_IS_SOFT(task)) {
task->tk_action = call_bind;
goto out_retry;
}
dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
clnt->cl_program->name, task->tk_status);
task->tk_action = call_timeout;
goto out_retry;
}
p = rpc_verify_header(task);
if (IS_ERR(p)) {
if (p == ERR_PTR(-EAGAIN))
goto out_retry;
return;
}
task->tk_action = rpc_exit_task;
if (decode) {
task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
task->tk_msg.rpc_resp);
}
dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
task->tk_status);
return;
out_retry:
task->tk_status = 0;
/* Note: rpc_verify_header() may have freed the RPC slot */
if (task->tk_rqstp == req) {
req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
if (task->tk_client->cl_discrtry)
xprt_conditional_disconnect(req->rq_xprt,
req->rq_connect_cookie);
}
}
static __be32 *
rpc_encode_header(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
__be32 *p = req->rq_svec[0].iov_base;
/* FIXME: check buffer size? */
p = xprt_skip_transport_header(req->rq_xprt, p);
*p++ = req->rq_xid; /* XID */
*p++ = htonl(RPC_CALL); /* CALL */
*p++ = htonl(RPC_VERSION); /* RPC version */
*p++ = htonl(clnt->cl_prog); /* program number */
*p++ = htonl(clnt->cl_vers); /* program version */
*p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */
p = rpcauth_marshcred(task, p);
req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
return p;
}
static __be32 *
rpc_verify_header(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
__be32 *p = iov->iov_base;
u32 n;
int error = -EACCES;
if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
/* RFC-1014 says that the representation of XDR data must be a
* multiple of four bytes
* - if it isn't pointer subtraction in the NFS client may give
* undefined results
*/
dprintk("RPC: %5u %s: XDR representation not a multiple of"
" 4 bytes: 0x%x\n", task->tk_pid, __func__,
task->tk_rqstp->rq_rcv_buf.len);
error = -EIO;
goto out_err;
}
if ((len -= 3) < 0)
goto out_overflow;
nfs41: Process the RPC call direction Reading and storing the RPC direction is a three step process. 1. xs_tcp_read_calldir() reads the RPC direction, but it will not store it in the XDR buffer since the 'struct rpc_rqst' is not yet available. 2. The 'struct rpc_rqst' is obtained during the TCP_RCV_COPY_DATA state. This state need not necessarily be preceeded by the TCP_RCV_READ_CALLDIR. For example, we may be reading a continuation packet to a large reply. Therefore, we can't simply obtain the 'struct rpc_rqst' during the TCP_RCV_READ_CALLDIR state and assume it's available during TCP_RCV_COPY_DATA. This patch adds a new TCP_RCV_READ_CALLDIR flag to indicate the need to read the RPC direction. It then uses TCP_RCV_COPY_CALLDIR to indicate the RPC direction needs to be saved after the 'struct rpc_rqst' has been allocated. 3. The 'struct rpc_rqst' is obtained by the xs_tcp_read_data() helper functions. xs_tcp_read_common() then saves the RPC direction in the XDR buffer if TCP_RCV_COPY_CALLDIR is set. This will happen when we're reading the data immediately after the direction was read. xs_tcp_read_common() then clears this flag. [was nfs41: Skip past the RPC call direction] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: sunrpc: Add RPC direction back into the XDR buffer] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: sunrpc: Don't skip past the RPC call direction] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:22:54 +08:00
p += 1; /* skip XID */
if ((n = ntohl(*p++)) != RPC_REPLY) {
dprintk("RPC: %5u %s: not an RPC reply: %x\n",
nfs41: Process the RPC call direction Reading and storing the RPC direction is a three step process. 1. xs_tcp_read_calldir() reads the RPC direction, but it will not store it in the XDR buffer since the 'struct rpc_rqst' is not yet available. 2. The 'struct rpc_rqst' is obtained during the TCP_RCV_COPY_DATA state. This state need not necessarily be preceeded by the TCP_RCV_READ_CALLDIR. For example, we may be reading a continuation packet to a large reply. Therefore, we can't simply obtain the 'struct rpc_rqst' during the TCP_RCV_READ_CALLDIR state and assume it's available during TCP_RCV_COPY_DATA. This patch adds a new TCP_RCV_READ_CALLDIR flag to indicate the need to read the RPC direction. It then uses TCP_RCV_COPY_CALLDIR to indicate the RPC direction needs to be saved after the 'struct rpc_rqst' has been allocated. 3. The 'struct rpc_rqst' is obtained by the xs_tcp_read_data() helper functions. xs_tcp_read_common() then saves the RPC direction in the XDR buffer if TCP_RCV_COPY_CALLDIR is set. This will happen when we're reading the data immediately after the direction was read. xs_tcp_read_common() then clears this flag. [was nfs41: Skip past the RPC call direction] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: sunrpc: Add RPC direction back into the XDR buffer] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: sunrpc: Don't skip past the RPC call direction] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:22:54 +08:00
task->tk_pid, __func__, n);
error = -EIO;
goto out_garbage;
}
nfs41: Process the RPC call direction Reading and storing the RPC direction is a three step process. 1. xs_tcp_read_calldir() reads the RPC direction, but it will not store it in the XDR buffer since the 'struct rpc_rqst' is not yet available. 2. The 'struct rpc_rqst' is obtained during the TCP_RCV_COPY_DATA state. This state need not necessarily be preceeded by the TCP_RCV_READ_CALLDIR. For example, we may be reading a continuation packet to a large reply. Therefore, we can't simply obtain the 'struct rpc_rqst' during the TCP_RCV_READ_CALLDIR state and assume it's available during TCP_RCV_COPY_DATA. This patch adds a new TCP_RCV_READ_CALLDIR flag to indicate the need to read the RPC direction. It then uses TCP_RCV_COPY_CALLDIR to indicate the RPC direction needs to be saved after the 'struct rpc_rqst' has been allocated. 3. The 'struct rpc_rqst' is obtained by the xs_tcp_read_data() helper functions. xs_tcp_read_common() then saves the RPC direction in the XDR buffer if TCP_RCV_COPY_CALLDIR is set. This will happen when we're reading the data immediately after the direction was read. xs_tcp_read_common() then clears this flag. [was nfs41: Skip past the RPC call direction] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: sunrpc: Add RPC direction back into the XDR buffer] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> [nfs41: sunrpc: Don't skip past the RPC call direction] Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
2009-04-01 21:22:54 +08:00
if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
if (--len < 0)
goto out_overflow;
switch ((n = ntohl(*p++))) {
case RPC_AUTH_ERROR:
break;
case RPC_MISMATCH:
dprintk("RPC: %5u %s: RPC call version mismatch!\n",
task->tk_pid, __func__);
error = -EPROTONOSUPPORT;
goto out_err;
default:
dprintk("RPC: %5u %s: RPC call rejected, "
"unknown error: %x\n",
task->tk_pid, __func__, n);
error = -EIO;
goto out_err;
}
if (--len < 0)
goto out_overflow;
switch ((n = ntohl(*p++))) {
case RPC_AUTH_REJECTEDCRED:
case RPC_AUTH_REJECTEDVERF:
case RPCSEC_GSS_CREDPROBLEM:
case RPCSEC_GSS_CTXPROBLEM:
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
dprintk("RPC: %5u %s: retry stale creds\n",
task->tk_pid, __func__);
rpcauth_invalcred(task);
/* Ensure we obtain a new XID! */
xprt_release(task);
SUNRPC: After calling xprt_release(), we must restart from call_reserve Rob Leslie reports seeing the following Oops after his Kerberos session expired. BUG: unable to handle kernel NULL pointer dereference at 00000058 IP: [<e186ed94>] rpcauth_refreshcred+0x11/0x12c [sunrpc] *pde = 00000000 Oops: 0000 [#1] last sysfs file: /sys/devices/platform/pc87360.26144/temp3_input Modules linked in: autofs4 authenc esp4 xfrm4_mode_transport ipt_LOG ipt_REJECT xt_limit xt_state ipt_REDIRECT xt_owner xt_HL xt_hl xt_tcpudp xt_mark cls_u32 cls_tcindex sch_sfq sch_htb sch_dsmark geodewdt deflate ctr twofish_generic twofish_i586 twofish_common camellia serpent blowfish cast5 cbc xcbc rmd160 sha512_generic sha1_generic hmac crypto_null af_key rpcsec_gss_krb5 nfsd exportfs nfs lockd fscache nfs_acl auth_rpcgss sunrpc ip_gre sit tunnel4 dummy ext3 jbd nf_nat_irc nf_conntrack_irc nf_nat_ftp nf_conntrack_ftp iptable_mangle iptable_nat nf_nat nf_conntrack_ipv4 nf_conntrack nf_defrag_ipv4 iptable_filter ip_tables x_tables pc8736x_gpio nsc_gpio pc87360 hwmon_vid loop aes_i586 aes_generic sha256_generic dm_crypt cs5535_gpio serio_raw cs5535_mfgpt hifn_795x des_generic geode_rng rng_core led_class ext4 mbcache jbd2 crc16 dm_mirror dm_region_hash dm_log dm_snapshot dm_mod sd_mod crc_t10dif ide_pci_generic cs5536 amd74xx ide_core pata_cs5536 ata_generic libata usb_storage via_rhine mii scsi_mod btrfs zlib_deflate crc32c libcrc32c [last unloaded: scsi_wait_scan] Pid: 12875, comm: sudo Not tainted 2.6.36-net5501 #1 / EIP: 0060:[<e186ed94>] EFLAGS: 00010292 CPU: 0 EIP is at rpcauth_refreshcred+0x11/0x12c [sunrpc] EAX: 00000000 EBX: defb13a0 ECX: 00000006 EDX: e18683b8 ESI: defb13a0 EDI: 00000000 EBP: 00000000 ESP: de571d58 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 Process sudo (pid: 12875, ti=de570000 task=decd1430 task.ti=de570000) Stack: e186e008 00000000 defb13a0 0000000d deda6000 e1868f22 e196f12b defb13a0 <0> defb13d8 00000000 00000000 e186e0aa 00000000 defb13a0 de571dac 00000000 <0> e186956c de571e34 debea5c0 de571dc8 e186967a 00000000 debea5c0 de571e34 Call Trace: [<e186e008>] ? rpc_wake_up_next+0x114/0x11b [sunrpc] [<e1868f22>] ? call_decode+0x24a/0x5af [sunrpc] [<e196f12b>] ? nfs4_xdr_dec_access+0x0/0xa2 [nfs] [<e186e0aa>] ? __rpc_execute+0x62/0x17b [sunrpc] [<e186956c>] ? rpc_run_task+0x91/0x97 [sunrpc] [<e186967a>] ? rpc_call_sync+0x40/0x5b [sunrpc] [<e1969ca2>] ? nfs4_proc_access+0x10a/0x176 [nfs] [<e19572fa>] ? nfs_do_access+0x2b1/0x2c0 [nfs] [<e186ed61>] ? rpcauth_lookupcred+0x62/0x84 [sunrpc] [<e19573b6>] ? nfs_permission+0xad/0x13b [nfs] [<c0177824>] ? exec_permission+0x15/0x4b [<c0177fbd>] ? link_path_walk+0x4f/0x456 [<c017867d>] ? path_walk+0x4c/0xa8 [<c0179678>] ? do_path_lookup+0x1f/0x68 [<c017a3fb>] ? user_path_at+0x37/0x5f [<c016359c>] ? handle_mm_fault+0x229/0x55b [<c0170a2d>] ? sys_faccessat+0x93/0x146 [<c0170aef>] ? sys_access+0xf/0x13 [<c02cf615>] ? syscall_call+0x7/0xb Code: 0f 94 c2 84 d2 74 09 8b 44 24 0c e8 6a e9 8b de 83 c4 14 89 d8 5b 5e 5f 5d c3 55 57 56 53 83 ec 1c fc 89 c6 8b 40 10 89 44 24 04 <8b> 58 58 85 db 0f 85 d4 00 00 00 0f b7 46 70 8b 56 20 89 c5 83 EIP: [<e186ed94>] rpcauth_refreshcred+0x11/0x12c [sunrpc] SS:ESP 0068:de571d58 CR2: 0000000000000058 This appears to be caused by the function rpc_verify_header() first calling xprt_release(), then doing a call_refresh. If we release the transport slot, we should _always_ jump back to call_reserve before calling anything else. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: stable@kernel.org
2010-10-25 05:17:31 +08:00
task->tk_action = call_reserve;
goto out_retry;
case RPC_AUTH_BADCRED:
case RPC_AUTH_BADVERF:
/* possibly garbled cred/verf? */
if (!task->tk_garb_retry)
break;
task->tk_garb_retry--;
dprintk("RPC: %5u %s: retry garbled creds\n",
task->tk_pid, __func__);
task->tk_action = call_bind;
goto out_retry;
case RPC_AUTH_TOOWEAK:
rcu_read_lock();
printk(KERN_NOTICE "RPC: server %s requires stronger "
"authentication.\n",
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
break;
default:
dprintk("RPC: %5u %s: unknown auth error: %x\n",
task->tk_pid, __func__, n);
error = -EIO;
}
dprintk("RPC: %5u %s: call rejected %d\n",
task->tk_pid, __func__, n);
goto out_err;
}
p = rpcauth_checkverf(task, p);
if (IS_ERR(p)) {
error = PTR_ERR(p);
dprintk("RPC: %5u %s: auth check failed with %d\n",
task->tk_pid, __func__, error);
goto out_garbage; /* bad verifier, retry */
}
len = p - (__be32 *)iov->iov_base - 1;
if (len < 0)
goto out_overflow;
switch ((n = ntohl(*p++))) {
case RPC_SUCCESS:
return p;
case RPC_PROG_UNAVAIL:
dprintk_rcu("RPC: %5u %s: program %u is unsupported "
"by server %s\n", task->tk_pid, __func__,
(unsigned int)clnt->cl_prog,
rcu_dereference(clnt->cl_xprt)->servername);
error = -EPFNOSUPPORT;
goto out_err;
case RPC_PROG_MISMATCH:
dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported "
"by server %s\n", task->tk_pid, __func__,
(unsigned int)clnt->cl_prog,
(unsigned int)clnt->cl_vers,
rcu_dereference(clnt->cl_xprt)->servername);
error = -EPROTONOSUPPORT;
goto out_err;
case RPC_PROC_UNAVAIL:
dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, "
"version %u on server %s\n",
task->tk_pid, __func__,
rpc_proc_name(task),
clnt->cl_prog, clnt->cl_vers,
rcu_dereference(clnt->cl_xprt)->servername);
error = -EOPNOTSUPP;
goto out_err;
case RPC_GARBAGE_ARGS:
dprintk("RPC: %5u %s: server saw garbage\n",
task->tk_pid, __func__);
break; /* retry */
default:
dprintk("RPC: %5u %s: server accept status: %x\n",
task->tk_pid, __func__, n);
/* Also retry */
}
out_garbage:
clnt->cl_stats->rpcgarbage++;
if (task->tk_garb_retry) {
task->tk_garb_retry--;
dprintk("RPC: %5u %s: retrying\n",
task->tk_pid, __func__);
task->tk_action = call_bind;
out_retry:
return ERR_PTR(-EAGAIN);
}
out_err:
rpc_exit(task, error);
dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
__func__, error);
return ERR_PTR(error);
out_overflow:
dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid,
__func__);
goto out_garbage;
}
static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
{
}
static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
{
return 0;
}
static struct rpc_procinfo rpcproc_null = {
.p_encode = rpcproc_encode_null,
.p_decode = rpcproc_decode_null,
};
static int rpc_ping(struct rpc_clnt *clnt)
{
struct rpc_message msg = {
.rpc_proc = &rpcproc_null,
};
int err;
msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN);
put_rpccred(msg.rpc_cred);
return err;
}
struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
{
struct rpc_message msg = {
.rpc_proc = &rpcproc_null,
.rpc_cred = cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
.rpc_message = &msg,
.callback_ops = &rpc_default_ops,
.flags = flags,
};
return rpc_run_task(&task_setup_data);
}
EXPORT_SYMBOL_GPL(rpc_call_null);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
static void rpc_show_header(void)
{
SUNRPC: Display some debugging information as text rather than numbers In rpc_show_tasks(), display the program name, version number, procedure name and tk_action as human-readable variable-length text fields rather than columnar numbers. Doing the symbol lookup here helps in cases where we have actual debugging output from a kernel log, but don't have access to the kernel image or RPC module that generated the output. Sample output: -pid- flgs status -client- --rqstp- -timeout ---ops-- 5608 0001 -11 eeb42690 f6d93710 0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none 5609 0001 -11 eeb42690 f6d937e0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5610 0001 -11 eeb42690 f6d93230 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5611 0001 -11 eeb42690 f6d93300 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5612 0001 -11 eeb42690 f6d93090 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5613 0001 -11 eeb42690 f6d933d0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5614 0001 -11 eeb42690 f6d93cc0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5615 0001 -11 eeb42690 f6d93a50 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5616 0001 -11 eeb42690 f6d93640 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5617 0001 -11 eeb42690 f6d93b20 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5618 0001 -11 eeb42690 f6d93160 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2008-05-22 05:09:41 +08:00
printk(KERN_INFO "-pid- flgs status -client- --rqstp- "
"-timeout ---ops--\n");
}
static void rpc_show_task(const struct rpc_clnt *clnt,
const struct rpc_task *task)
{
const char *rpc_waitq = "none";
if (RPC_IS_QUEUED(task))
rpc_waitq = rpc_qname(task->tk_waitqueue);
printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
SUNRPC: Display some debugging information as text rather than numbers In rpc_show_tasks(), display the program name, version number, procedure name and tk_action as human-readable variable-length text fields rather than columnar numbers. Doing the symbol lookup here helps in cases where we have actual debugging output from a kernel log, but don't have access to the kernel image or RPC module that generated the output. Sample output: -pid- flgs status -client- --rqstp- -timeout ---ops-- 5608 0001 -11 eeb42690 f6d93710 0 f8fa1764 nfsv3 WRITE a:call_transmit_status q:none 5609 0001 -11 eeb42690 f6d937e0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5610 0001 -11 eeb42690 f6d93230 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5611 0001 -11 eeb42690 f6d93300 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5612 0001 -11 eeb42690 f6d93090 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5613 0001 -11 eeb42690 f6d933d0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5614 0001 -11 eeb42690 f6d93cc0 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5615 0001 -11 eeb42690 f6d93a50 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5616 0001 -11 eeb42690 f6d93640 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5617 0001 -11 eeb42690 f6d93b20 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending 5618 0001 -11 eeb42690 f6d93160 0 f8fa1764 nfsv3 WRITE a:call_status q:xprt_sending Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2008-05-22 05:09:41 +08:00
task->tk_pid, task->tk_flags, task->tk_status,
clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
task->tk_action, rpc_waitq);
}
void rpc_show_tasks(struct net *net)
{
struct rpc_clnt *clnt;
struct rpc_task *task;
int header = 0;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
spin_lock(&sn->rpc_client_lock);
list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
spin_lock(&clnt->cl_lock);
list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
if (!header) {
rpc_show_header();
header++;
}
rpc_show_task(clnt, task);
}
spin_unlock(&clnt->cl_lock);
}
spin_unlock(&sn->rpc_client_lock);
}
#endif