OpenCloudOS-Kernel/fs/afs/rxrpc.c

885 lines
21 KiB
C
Raw Normal View History

/* Maintain an RxRPC server socket to do AFS communications through
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <rxrpc/packet.h>
#include "internal.h"
#include "afs_cm.h"
static struct socket *afs_socket; /* my RxRPC socket */
static struct workqueue_struct *afs_async_calls;
static atomic_t afs_outstanding_calls;
static atomic_t afs_outstanding_skbs;
static void afs_wake_up_call_waiter(struct afs_call *);
static int afs_wait_for_call_to_complete(struct afs_call *);
static void afs_wake_up_async_call(struct afs_call *);
static int afs_dont_wait_for_call_to_complete(struct afs_call *);
static void afs_process_async_call(struct afs_call *);
static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
/* synchronous call management */
const struct afs_wait_mode afs_sync_call = {
.rx_wakeup = afs_wake_up_call_waiter,
.wait = afs_wait_for_call_to_complete,
};
/* asynchronous call management */
const struct afs_wait_mode afs_async_call = {
.rx_wakeup = afs_wake_up_async_call,
.wait = afs_dont_wait_for_call_to_complete,
};
/* asynchronous incoming call management */
static const struct afs_wait_mode afs_async_incoming_call = {
.rx_wakeup = afs_wake_up_async_call,
};
/* asynchronous incoming call initial processing */
static const struct afs_call_type afs_RXCMxxxx = {
.name = "CB.xxxx",
.deliver = afs_deliver_cm_op_id,
.abort_to_error = afs_abort_to_error,
};
static void afs_collect_incoming_call(struct work_struct *);
static struct sk_buff_head afs_incoming_calls;
static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
AFS: Fix kafs module unloading At present, it is not possible to successfully unload the kafs module if there are outstanding async outgoing calls (those made with afs_make_call()). This appears to be due to the changes introduced by: commit 059499453a9abd1857d442b44da8b4c126dc72a8 Author: Tejun Heo <tj@kernel.org> Date: Fri Mar 7 10:24:50 2014 -0500 Subject: afs: don't use PREPARE_WORK which didn't go far enough. The problem is due to: (1) The aforementioned commit introduced a separate handler function pointer in the call, call->async_workfn, in addition to the original workqueue item, call->async_work, for asynchronous operations because workqueues subsystem cannot handle the workqueue item pointer being changed whilst the item is queued or being processed. (2) afs_async_workfn() was introduced in that commit to be the callback for call->async_work. Its sole purpose is to run whatever call->async_workfn points to. (3) call->async_workfn is only used from afs_async_workfn(), which is only set on async_work by afs_collect_incoming_call() - ie. for incoming calls. (4) call->async_workfn is *not* set by afs_make_call() when outgoing calls are made, and call->async_work is set afs_process_async_call() - and not afs_async_workfn(). (5) afs_process_async_call() now changes call->async_workfn rather than call->async_work to point to afs_delete_async_call() to clean up, but this is only effective for incoming calls because call->async_work does not point to afs_async_workfn() for outgoing calls. (6) Because, for incoming calls, call->async_work remains pointing to afs_process_async_call() this results in an infinite loop. Instead, make the workqueue uniformly vector through call->async_workfn, via afs_async_workfn() and simply initialise call->async_workfn to point to afs_process_async_call() in afs_make_call(). Signed-off-by: Nathaniel Wesley Filardo <nwf@cs.jhu.edu> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Tejun Heo <tj@kernel.org>
2014-05-21 21:58:26 +08:00
static void afs_async_workfn(struct work_struct *work)
{
struct afs_call *call = container_of(work, struct afs_call, async_work);
call->async_workfn(call);
AFS: Fix kafs module unloading At present, it is not possible to successfully unload the kafs module if there are outstanding async outgoing calls (those made with afs_make_call()). This appears to be due to the changes introduced by: commit 059499453a9abd1857d442b44da8b4c126dc72a8 Author: Tejun Heo <tj@kernel.org> Date: Fri Mar 7 10:24:50 2014 -0500 Subject: afs: don't use PREPARE_WORK which didn't go far enough. The problem is due to: (1) The aforementioned commit introduced a separate handler function pointer in the call, call->async_workfn, in addition to the original workqueue item, call->async_work, for asynchronous operations because workqueues subsystem cannot handle the workqueue item pointer being changed whilst the item is queued or being processed. (2) afs_async_workfn() was introduced in that commit to be the callback for call->async_work. Its sole purpose is to run whatever call->async_workfn points to. (3) call->async_workfn is only used from afs_async_workfn(), which is only set on async_work by afs_collect_incoming_call() - ie. for incoming calls. (4) call->async_workfn is *not* set by afs_make_call() when outgoing calls are made, and call->async_work is set afs_process_async_call() - and not afs_async_workfn(). (5) afs_process_async_call() now changes call->async_workfn rather than call->async_work to point to afs_delete_async_call() to clean up, but this is only effective for incoming calls because call->async_work does not point to afs_async_workfn() for outgoing calls. (6) Because, for incoming calls, call->async_work remains pointing to afs_process_async_call() this results in an infinite loop. Instead, make the workqueue uniformly vector through call->async_workfn, via afs_async_workfn() and simply initialise call->async_workfn to point to afs_process_async_call() in afs_make_call(). Signed-off-by: Nathaniel Wesley Filardo <nwf@cs.jhu.edu> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Tejun Heo <tj@kernel.org>
2014-05-21 21:58:26 +08:00
}
static int afs_wait_atomic_t(atomic_t *p)
{
schedule();
return 0;
}
/*
* open an RxRPC socket and bind it to be a server for callback notifications
* - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
*/
int afs_open_socket(void)
{
struct sockaddr_rxrpc srx;
struct socket *socket;
int ret;
_enter("");
skb_queue_head_init(&afs_incoming_calls);
rxrpc: Limit the listening backlog Limit the socket incoming call backlog queue size so that a remote client can't pump in sufficient new calls that the server runs out of memory. Note that this is partially theoretical at the moment since whilst the number of calls is limited, the number of packets trying to set up new calls is not. This will be addressed in a later patch. If the caller of listen() specifies a backlog INT_MAX, then they get the current maximum; anything else greater than max_backlog or anything negative incurs EINVAL. The limit on the maximum queue size can be set by: echo N >/proc/sys/net/rxrpc/max_backlog where 4<=N<=32. Further, set the default backlog to 0, requiring listen() to be called before we start actually queueing new calls. Whilst this kind of is a change in the UAPI, the caller can't actually *accept* new calls anyway unless they've first called listen() to put the socket into the LISTENING state - thus the aforementioned new calls would otherwise just sit there, eating up kernel memory. (Note that sockets that don't have a non-zero service ID bound don't get incoming calls anyway.) Given that the default backlog is now 0, make the AFS filesystem call kernel_listen() to set the maximum backlog for itself. Possible improvements include: (1) Trimming a too-large backlog to max_backlog when listen is called. (2) Trimming the backlog value whenever the value is used so that changes to max_backlog are applied to an open socket automatically. Note that the AFS filesystem opens one socket and keeps it open for extended periods, so would miss out on changes to max_backlog. (3) Having a separate setting for the AFS filesystem. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-11 05:30:37 +08:00
ret = -ENOMEM;
afs_async_calls = create_singlethread_workqueue("kafsd");
rxrpc: Limit the listening backlog Limit the socket incoming call backlog queue size so that a remote client can't pump in sufficient new calls that the server runs out of memory. Note that this is partially theoretical at the moment since whilst the number of calls is limited, the number of packets trying to set up new calls is not. This will be addressed in a later patch. If the caller of listen() specifies a backlog INT_MAX, then they get the current maximum; anything else greater than max_backlog or anything negative incurs EINVAL. The limit on the maximum queue size can be set by: echo N >/proc/sys/net/rxrpc/max_backlog where 4<=N<=32. Further, set the default backlog to 0, requiring listen() to be called before we start actually queueing new calls. Whilst this kind of is a change in the UAPI, the caller can't actually *accept* new calls anyway unless they've first called listen() to put the socket into the LISTENING state - thus the aforementioned new calls would otherwise just sit there, eating up kernel memory. (Note that sockets that don't have a non-zero service ID bound don't get incoming calls anyway.) Given that the default backlog is now 0, make the AFS filesystem call kernel_listen() to set the maximum backlog for itself. Possible improvements include: (1) Trimming a too-large backlog to max_backlog when listen is called. (2) Trimming the backlog value whenever the value is used so that changes to max_backlog are applied to an open socket automatically. Note that the AFS filesystem opens one socket and keeps it open for extended periods, so would miss out on changes to max_backlog. (3) Having a separate setting for the AFS filesystem. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-11 05:30:37 +08:00
if (!afs_async_calls)
goto error_0;
ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
rxrpc: Limit the listening backlog Limit the socket incoming call backlog queue size so that a remote client can't pump in sufficient new calls that the server runs out of memory. Note that this is partially theoretical at the moment since whilst the number of calls is limited, the number of packets trying to set up new calls is not. This will be addressed in a later patch. If the caller of listen() specifies a backlog INT_MAX, then they get the current maximum; anything else greater than max_backlog or anything negative incurs EINVAL. The limit on the maximum queue size can be set by: echo N >/proc/sys/net/rxrpc/max_backlog where 4<=N<=32. Further, set the default backlog to 0, requiring listen() to be called before we start actually queueing new calls. Whilst this kind of is a change in the UAPI, the caller can't actually *accept* new calls anyway unless they've first called listen() to put the socket into the LISTENING state - thus the aforementioned new calls would otherwise just sit there, eating up kernel memory. (Note that sockets that don't have a non-zero service ID bound don't get incoming calls anyway.) Given that the default backlog is now 0, make the AFS filesystem call kernel_listen() to set the maximum backlog for itself. Possible improvements include: (1) Trimming a too-large backlog to max_backlog when listen is called. (2) Trimming the backlog value whenever the value is used so that changes to max_backlog are applied to an open socket automatically. Note that the AFS filesystem opens one socket and keeps it open for extended periods, so would miss out on changes to max_backlog. (3) Having a separate setting for the AFS filesystem. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-11 05:30:37 +08:00
if (ret < 0)
goto error_1;
socket->sk->sk_allocation = GFP_NOFS;
/* bind the callback manager's address to make this a server socket */
srx.srx_family = AF_RXRPC;
srx.srx_service = CM_SERVICE;
srx.transport_type = SOCK_DGRAM;
srx.transport_len = sizeof(srx.transport.sin);
srx.transport.sin.sin_family = AF_INET;
srx.transport.sin.sin_port = htons(AFS_CM_PORT);
memset(&srx.transport.sin.sin_addr, 0,
sizeof(srx.transport.sin.sin_addr));
ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
rxrpc: Limit the listening backlog Limit the socket incoming call backlog queue size so that a remote client can't pump in sufficient new calls that the server runs out of memory. Note that this is partially theoretical at the moment since whilst the number of calls is limited, the number of packets trying to set up new calls is not. This will be addressed in a later patch. If the caller of listen() specifies a backlog INT_MAX, then they get the current maximum; anything else greater than max_backlog or anything negative incurs EINVAL. The limit on the maximum queue size can be set by: echo N >/proc/sys/net/rxrpc/max_backlog where 4<=N<=32. Further, set the default backlog to 0, requiring listen() to be called before we start actually queueing new calls. Whilst this kind of is a change in the UAPI, the caller can't actually *accept* new calls anyway unless they've first called listen() to put the socket into the LISTENING state - thus the aforementioned new calls would otherwise just sit there, eating up kernel memory. (Note that sockets that don't have a non-zero service ID bound don't get incoming calls anyway.) Given that the default backlog is now 0, make the AFS filesystem call kernel_listen() to set the maximum backlog for itself. Possible improvements include: (1) Trimming a too-large backlog to max_backlog when listen is called. (2) Trimming the backlog value whenever the value is used so that changes to max_backlog are applied to an open socket automatically. Note that the AFS filesystem opens one socket and keeps it open for extended periods, so would miss out on changes to max_backlog. (3) Having a separate setting for the AFS filesystem. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-11 05:30:37 +08:00
if (ret < 0)
goto error_2;
ret = kernel_listen(socket, INT_MAX);
if (ret < 0)
goto error_2;
rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
afs_socket = socket;
_leave(" = 0");
return 0;
rxrpc: Limit the listening backlog Limit the socket incoming call backlog queue size so that a remote client can't pump in sufficient new calls that the server runs out of memory. Note that this is partially theoretical at the moment since whilst the number of calls is limited, the number of packets trying to set up new calls is not. This will be addressed in a later patch. If the caller of listen() specifies a backlog INT_MAX, then they get the current maximum; anything else greater than max_backlog or anything negative incurs EINVAL. The limit on the maximum queue size can be set by: echo N >/proc/sys/net/rxrpc/max_backlog where 4<=N<=32. Further, set the default backlog to 0, requiring listen() to be called before we start actually queueing new calls. Whilst this kind of is a change in the UAPI, the caller can't actually *accept* new calls anyway unless they've first called listen() to put the socket into the LISTENING state - thus the aforementioned new calls would otherwise just sit there, eating up kernel memory. (Note that sockets that don't have a non-zero service ID bound don't get incoming calls anyway.) Given that the default backlog is now 0, make the AFS filesystem call kernel_listen() to set the maximum backlog for itself. Possible improvements include: (1) Trimming a too-large backlog to max_backlog when listen is called. (2) Trimming the backlog value whenever the value is used so that changes to max_backlog are applied to an open socket automatically. Note that the AFS filesystem opens one socket and keeps it open for extended periods, so would miss out on changes to max_backlog. (3) Having a separate setting for the AFS filesystem. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-11 05:30:37 +08:00
error_2:
sock_release(socket);
error_1:
destroy_workqueue(afs_async_calls);
error_0:
_leave(" = %d", ret);
return ret;
}
/*
* close the RxRPC socket AFS was using
*/
void afs_close_socket(void)
{
_enter("");
wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t,
TASK_UNINTERRUPTIBLE);
_debug("no outstanding calls");
sock_release(afs_socket);
_debug("dework");
destroy_workqueue(afs_async_calls);
ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
_leave("");
}
/*
* note that the data in a socket buffer is now delivered and that the buffer
* should be freed
*/
static void afs_data_delivered(struct sk_buff *skb)
{
if (!skb) {
_debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
dump_stack();
} else {
_debug("DLVR %p{%u} [%d]",
skb, skb->mark, atomic_read(&afs_outstanding_skbs));
if (atomic_dec_return(&afs_outstanding_skbs) == -1)
BUG();
rxrpc_kernel_data_delivered(skb);
}
}
/*
* free a socket buffer
*/
static void afs_free_skb(struct sk_buff *skb)
{
if (!skb) {
_debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
dump_stack();
} else {
_debug("FREE %p{%u} [%d]",
skb, skb->mark, atomic_read(&afs_outstanding_skbs));
if (atomic_dec_return(&afs_outstanding_skbs) == -1)
BUG();
rxrpc_kernel_free_skb(skb);
}
}
/*
* free a call
*/
static void afs_free_call(struct afs_call *call)
{
_debug("DONE %p{%s} [%d]",
call, call->type->name, atomic_read(&afs_outstanding_calls));
ASSERTCMP(call->rxcall, ==, NULL);
ASSERT(!work_pending(&call->async_work));
ASSERT(skb_queue_empty(&call->rx_queue));
ASSERT(call->type->name != NULL);
kfree(call->request);
kfree(call);
if (atomic_dec_and_test(&afs_outstanding_calls))
wake_up_atomic_t(&afs_outstanding_calls);
}
/*
* End a call but do not free it
*/
static void afs_end_call_nofree(struct afs_call *call)
{
if (call->rxcall) {
rxrpc_kernel_end_call(call->rxcall);
call->rxcall = NULL;
}
if (call->type->destructor)
call->type->destructor(call);
}
/*
* End a call and free it
*/
static void afs_end_call(struct afs_call *call)
{
afs_end_call_nofree(call);
afs_free_call(call);
}
/*
* allocate a call with flat request and reply buffers
*/
struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
size_t request_size, size_t reply_size)
{
struct afs_call *call;
call = kzalloc(sizeof(*call), GFP_NOFS);
if (!call)
goto nomem_call;
_debug("CALL %p{%s} [%d]",
call, type->name, atomic_read(&afs_outstanding_calls));
atomic_inc(&afs_outstanding_calls);
call->type = type;
call->request_size = request_size;
call->reply_max = reply_size;
if (request_size) {
call->request = kmalloc(request_size, GFP_NOFS);
if (!call->request)
goto nomem_free;
}
if (reply_size) {
call->buffer = kmalloc(reply_size, GFP_NOFS);
if (!call->buffer)
goto nomem_free;
}
init_waitqueue_head(&call->waitq);
skb_queue_head_init(&call->rx_queue);
return call;
nomem_free:
afs_free_call(call);
nomem_call:
return NULL;
}
/*
* clean up a call with flat buffer
*/
void afs_flat_call_destructor(struct afs_call *call)
{
_enter("");
kfree(call->request);
call->request = NULL;
kfree(call->buffer);
call->buffer = NULL;
}
/*
* attach the data from a bunch of pages on an inode to a call
*/
static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
struct kvec *iov)
{
struct page *pages[8];
unsigned count, n, loop, offset, to;
pgoff_t first = call->first, last = call->last;
int ret;
_enter("");
offset = call->first_offset;
call->first_offset = 0;
do {
_debug("attach %lx-%lx", first, last);
count = last - first + 1;
if (count > ARRAY_SIZE(pages))
count = ARRAY_SIZE(pages);
n = find_get_pages_contig(call->mapping, first, count, pages);
ASSERTCMP(n, ==, count);
loop = 0;
do {
msg->msg_flags = 0;
to = PAGE_SIZE;
if (first + loop >= last)
to = call->last_to;
else
msg->msg_flags = MSG_MORE;
iov->iov_base = kmap(pages[loop]) + offset;
iov->iov_len = to - offset;
offset = 0;
_debug("- range %u-%u%s",
offset, to, msg->msg_flags ? " [more]" : "");
iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC,
iov, 1, to - offset);
/* have to change the state *before* sending the last
* packet as RxRPC might give us the reply before it
* returns from sending the request */
if (first + loop >= last)
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(call->rxcall, msg,
to - offset);
kunmap(pages[loop]);
if (ret < 0)
break;
} while (++loop < count);
first += count;
for (loop = 0; loop < count; loop++)
put_page(pages[loop]);
if (ret < 0)
break;
} while (first <= last);
_leave(" = %d", ret);
return ret;
}
/*
* initiate a call
*/
int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
const struct afs_wait_mode *wait_mode)
{
struct sockaddr_rxrpc srx;
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
int ret;
struct sk_buff *skb;
_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
ASSERT(call->type != NULL);
ASSERT(call->type->name != NULL);
_debug("____MAKE %p{%s,%x} [%d]____",
call, call->type->name, key_serial(call->key),
atomic_read(&afs_outstanding_calls));
call->wait_mode = wait_mode;
AFS: Fix kafs module unloading At present, it is not possible to successfully unload the kafs module if there are outstanding async outgoing calls (those made with afs_make_call()). This appears to be due to the changes introduced by: commit 059499453a9abd1857d442b44da8b4c126dc72a8 Author: Tejun Heo <tj@kernel.org> Date: Fri Mar 7 10:24:50 2014 -0500 Subject: afs: don't use PREPARE_WORK which didn't go far enough. The problem is due to: (1) The aforementioned commit introduced a separate handler function pointer in the call, call->async_workfn, in addition to the original workqueue item, call->async_work, for asynchronous operations because workqueues subsystem cannot handle the workqueue item pointer being changed whilst the item is queued or being processed. (2) afs_async_workfn() was introduced in that commit to be the callback for call->async_work. Its sole purpose is to run whatever call->async_workfn points to. (3) call->async_workfn is only used from afs_async_workfn(), which is only set on async_work by afs_collect_incoming_call() - ie. for incoming calls. (4) call->async_workfn is *not* set by afs_make_call() when outgoing calls are made, and call->async_work is set afs_process_async_call() - and not afs_async_workfn(). (5) afs_process_async_call() now changes call->async_workfn rather than call->async_work to point to afs_delete_async_call() to clean up, but this is only effective for incoming calls because call->async_work does not point to afs_async_workfn() for outgoing calls. (6) Because, for incoming calls, call->async_work remains pointing to afs_process_async_call() this results in an infinite loop. Instead, make the workqueue uniformly vector through call->async_workfn, via afs_async_workfn() and simply initialise call->async_workfn to point to afs_process_async_call() in afs_make_call(). Signed-off-by: Nathaniel Wesley Filardo <nwf@cs.jhu.edu> Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Tejun Heo <tj@kernel.org>
2014-05-21 21:58:26 +08:00
call->async_workfn = afs_process_async_call;
INIT_WORK(&call->async_work, afs_async_workfn);
memset(&srx, 0, sizeof(srx));
srx.srx_family = AF_RXRPC;
srx.srx_service = call->service_id;
srx.transport_type = SOCK_DGRAM;
srx.transport_len = sizeof(srx.transport.sin);
srx.transport.sin.sin_family = AF_INET;
srx.transport.sin.sin_port = call->port;
memcpy(&srx.transport.sin.sin_addr, addr, 4);
/* create a call */
rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
(unsigned long) call, gfp);
call->key = NULL;
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
goto error_kill_call;
}
call->rxcall = rxcall;
/* send the request */
iov[0].iov_base = call->request;
iov[0].iov_len = call->request_size;
msg.msg_name = NULL;
msg.msg_namelen = 0;
iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
call->request_size);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
/* have to change the state *before* sending the last packet as RxRPC
* might give us the reply before it returns from sending the
* request */
if (!call->send_pages)
call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
if (ret < 0)
goto error_do_abort;
if (call->send_pages) {
ret = afs_send_pages(call, &msg, iov);
if (ret < 0)
goto error_do_abort;
}
/* at this point, an async call may no longer exist as it may have
* already completed */
return wait_mode->wait(call);
error_do_abort:
rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
while ((skb = skb_dequeue(&call->rx_queue)))
afs_free_skb(skb);
error_kill_call:
afs_end_call(call);
_leave(" = %d", ret);
return ret;
}
/*
* Handles intercepted messages that were arriving in the socket's Rx queue.
*
* Called from the AF_RXRPC call processor in waitqueue process context. For
* each call, it is guaranteed this will be called in order of packet to be
* delivered.
*/
static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
struct sk_buff *skb)
{
struct afs_call *call = (struct afs_call *) user_call_ID;
_enter("%p,,%u", call, skb->mark);
_debug("ICPT %p{%u} [%d]",
skb, skb->mark, atomic_read(&afs_outstanding_skbs));
ASSERTCMP(sk, ==, afs_socket->sk);
atomic_inc(&afs_outstanding_skbs);
if (!call) {
/* its an incoming call for our callback service */
skb_queue_tail(&afs_incoming_calls, skb);
queue_work(afs_wq, &afs_collect_incoming_call_work);
} else {
/* route the messages directly to the appropriate call */
skb_queue_tail(&call->rx_queue, skb);
call->wait_mode->rx_wakeup(call);
}
_leave("");
}
/*
* deliver messages to a call
*/
static void afs_deliver_to_call(struct afs_call *call)
{
struct sk_buff *skb;
bool last;
u32 abort_code;
int ret;
_enter("");
while ((call->state == AFS_CALL_AWAIT_REPLY ||
call->state == AFS_CALL_AWAIT_OP_ID ||
call->state == AFS_CALL_AWAIT_REQUEST ||
call->state == AFS_CALL_AWAIT_ACK) &&
(skb = skb_dequeue(&call->rx_queue))) {
switch (skb->mark) {
case RXRPC_SKB_MARK_DATA:
_debug("Rcv DATA");
last = rxrpc_kernel_is_data_last(skb);
ret = call->type->deliver(call, skb, last);
switch (ret) {
case 0:
if (last &&
call->state == AFS_CALL_AWAIT_REPLY)
call->state = AFS_CALL_COMPLETE;
break;
case -ENOTCONN:
abort_code = RX_CALL_DEAD;
goto do_abort;
case -ENOTSUPP:
abort_code = RX_INVALID_OPERATION;
goto do_abort;
default:
abort_code = RXGEN_CC_UNMARSHAL;
if (call->state != AFS_CALL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
do_abort:
rxrpc_kernel_abort_call(call->rxcall,
abort_code);
call->error = ret;
call->state = AFS_CALL_ERROR;
break;
}
afs_data_delivered(skb);
skb = NULL;
continue;
case RXRPC_SKB_MARK_FINAL_ACK:
_debug("Rcv ACK");
call->state = AFS_CALL_COMPLETE;
break;
case RXRPC_SKB_MARK_BUSY:
_debug("Rcv BUSY");
call->error = -EBUSY;
call->state = AFS_CALL_BUSY;
break;
case RXRPC_SKB_MARK_REMOTE_ABORT:
abort_code = rxrpc_kernel_get_abort_code(skb);
call->error = call->type->abort_to_error(abort_code);
call->state = AFS_CALL_ABORTED;
_debug("Rcv ABORT %u -> %d", abort_code, call->error);
break;
case RXRPC_SKB_MARK_LOCAL_ABORT:
abort_code = rxrpc_kernel_get_abort_code(skb);
call->error = call->type->abort_to_error(abort_code);
call->state = AFS_CALL_ABORTED;
_debug("Loc ABORT %u -> %d", abort_code, call->error);
break;
case RXRPC_SKB_MARK_NET_ERROR:
call->error = -rxrpc_kernel_get_error_number(skb);
call->state = AFS_CALL_ERROR;
_debug("Rcv NET ERROR %d", call->error);
break;
case RXRPC_SKB_MARK_LOCAL_ERROR:
call->error = -rxrpc_kernel_get_error_number(skb);
call->state = AFS_CALL_ERROR;
_debug("Rcv LOCAL ERROR %d", call->error);
break;
default:
BUG();
break;
}
afs_free_skb(skb);
}
/* make sure the queue is empty if the call is done with (we might have
* aborted the call early because of an unmarshalling error) */
if (call->state >= AFS_CALL_COMPLETE) {
while ((skb = skb_dequeue(&call->rx_queue)))
afs_free_skb(skb);
if (call->incoming)
afs_end_call(call);
}
_leave("");
}
/*
* wait synchronously for a call to complete
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
struct sk_buff *skb;
int ret;
DECLARE_WAITQUEUE(myself, current);
_enter("");
add_wait_queue(&call->waitq, &myself);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
/* deliver any messages that are in the queue */
if (!skb_queue_empty(&call->rx_queue)) {
__set_current_state(TASK_RUNNING);
afs_deliver_to_call(call);
continue;
}
ret = call->error;
if (call->state >= AFS_CALL_COMPLETE)
break;
ret = -EINTR;
if (signal_pending(current))
break;
schedule();
}
remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
/* kill the call */
if (call->state < AFS_CALL_COMPLETE) {
_debug("call incomplete");
rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
while ((skb = skb_dequeue(&call->rx_queue)))
afs_free_skb(skb);
}
_debug("call complete");
afs_end_call(call);
_leave(" = %d", ret);
return ret;
}
/*
* wake up a waiting call
*/
static void afs_wake_up_call_waiter(struct afs_call *call)
{
wake_up(&call->waitq);
}
/*
* wake up an asynchronous call
*/
static void afs_wake_up_async_call(struct afs_call *call)
{
_enter("");
queue_work(afs_async_calls, &call->async_work);
}
/*
* put a call into asynchronous mode
* - mustn't touch the call descriptor as the call my have completed by the
* time we get here
*/
static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
{
_enter("");
return -EINPROGRESS;
}
/*
* delete an asynchronous call
*/
static void afs_delete_async_call(struct afs_call *call)
{
_enter("");
afs_free_call(call);
_leave("");
}
/*
* perform processing on an asynchronous call
* - on a multiple-thread workqueue this work item may try to run on several
* CPUs at the same time
*/
static void afs_process_async_call(struct afs_call *call)
{
_enter("");
if (!skb_queue_empty(&call->rx_queue))
afs_deliver_to_call(call);
if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
if (call->wait_mode->async_complete)
call->wait_mode->async_complete(call->reply,
call->error);
call->reply = NULL;
/* kill the call */
afs_end_call_nofree(call);
/* we can't just delete the call because the work item may be
* queued */
call->async_workfn = afs_delete_async_call;
queue_work(afs_async_calls, &call->async_work);
}
_leave("");
}
/*
* empty a socket buffer into a flat reply buffer
*/
void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
{
size_t len = skb->len;
if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
BUG();
call->reply_size += len;
}
/*
* accept the backlog of incoming calls
*/
static void afs_collect_incoming_call(struct work_struct *work)
{
struct rxrpc_call *rxcall;
struct afs_call *call = NULL;
struct sk_buff *skb;
while ((skb = skb_dequeue(&afs_incoming_calls))) {
_debug("new call");
/* don't need the notification */
afs_free_skb(skb);
if (!call) {
call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
if (!call) {
rxrpc_kernel_reject_call(afs_socket);
return;
}
call->async_workfn = afs_process_async_call;
INIT_WORK(&call->async_work, afs_async_workfn);
call->wait_mode = &afs_async_incoming_call;
call->type = &afs_RXCMxxxx;
init_waitqueue_head(&call->waitq);
skb_queue_head_init(&call->rx_queue);
call->state = AFS_CALL_AWAIT_OP_ID;
_debug("CALL %p{%s} [%d]",
call, call->type->name,
atomic_read(&afs_outstanding_calls));
atomic_inc(&afs_outstanding_calls);
}
rxcall = rxrpc_kernel_accept_call(afs_socket,
(unsigned long) call);
if (!IS_ERR(rxcall)) {
call->rxcall = rxcall;
call = NULL;
}
}
if (call)
afs_free_call(call);
}
/*
* grab the operation ID from an incoming cache manager call
*/
static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
bool last)
{
size_t len = skb->len;
void *oibuf = (void *) &call->operation_ID;
_enter("{%u},{%zu},%d", call->offset, len, last);
ASSERTCMP(call->offset, <, 4);
/* the operation ID forms the first four bytes of the request data */
len = min_t(size_t, len, 4 - call->offset);
if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
BUG();
if (!pskb_pull(skb, len))
BUG();
call->offset += len;
if (call->offset < 4) {
if (last) {
_leave(" = -EBADMSG [op ID short]");
return -EBADMSG;
}
_leave(" = 0 [incomplete]");
return 0;
}
call->state = AFS_CALL_AWAIT_REQUEST;
/* ask the cache manager to route the call (it'll change the call type
* if successful) */
if (!afs_cm_incoming_call(call))
return -ENOTSUPP;
/* pass responsibility for the remainer of this message off to the
* cache manager op */
return call->type->deliver(call, skb, last);
}
/*
* send an empty reply
*/
void afs_send_empty_reply(struct afs_call *call)
{
struct msghdr msg;
_enter("");
msg.msg_name = NULL;
msg.msg_namelen = 0;
iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
call->state = AFS_CALL_AWAIT_ACK;
switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
case 0:
_leave(" [replied]");
return;
case -ENOMEM:
_debug("oom");
rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
default:
afs_end_call(call);
_leave(" [error]");
return;
}
}
/*
* send a simple reply
*/
void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
{
struct msghdr msg;
struct kvec iov[1];
int n;
_enter("");
iov[0].iov_base = (void *) buf;
iov[0].iov_len = len;
msg.msg_name = NULL;
msg.msg_namelen = 0;
iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
call->state = AFS_CALL_AWAIT_ACK;
n = rxrpc_kernel_send_data(call->rxcall, &msg, len);
if (n >= 0) {
/* Success */
_leave(" [replied]");
return;
}
if (n == -ENOMEM) {
_debug("oom");
rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
}
afs_end_call(call);
_leave(" [error]");
}
/*
* extract a piece of data from the received data socket buffers
*/
int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
bool last, void *buf, size_t count)
{
size_t len = skb->len;
_enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
ASSERTCMP(call->offset, <, count);
len = min_t(size_t, len, count - call->offset);
if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
!pskb_pull(skb, len))
BUG();
call->offset += len;
if (call->offset < count) {
if (last) {
_leave(" = -EBADMSG [%d < %zu]", call->offset, count);
return -EBADMSG;
}
_leave(" = -EAGAIN");
return -EAGAIN;
}
return 0;
}