Second round of 9p patches for the 3.11 merge window.
Several of these patches were rebased in order to correct style issues. Only stylistic changes were made versus the patches which were in linux-next for two weeks. The rebases have been in linux-next for 3 days and have passed my regressions. The bulk of these are RDMA fixes and improvements. There's also some additions on the extended attributes front to support some additional namespaces and a new option for TCP to force allocation of mount requests from a priviledged port. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) Comment: GPGTools - http://gpgtools.org iQIcBAABAgAGBQJR3rWXAAoJEDZk62b0Tg6xabIP/12I+SkQ57wRN03EQy5fqUdX gK/YMHKQ9QuDnZPBvrZ2lypesQNqVU0KINay6VEA86JG1gwzPyUd2MnpQ7F0vV3N XwVD54IoflV/M74xUnrgGWB8YxaPcdacQQ8yazX+mEgOgYGdWmDAl7FHmAkdKAFB gSl25f3PNJX1Rjay0dssNVXrVPXuJY/fZXKnNQZKtRwXffRWKsWHd8FU0Eq7F30A kNQB8tmMSfHBBjP+tzR0My6/kQ09jzHdtZOkH9IgVpNzqrd8tfy0l6tEvFypxqGT 5oQFoxHHL/tUW05V0P3gYany2A7lEhSUifPKS6omqHO+vPlw+pDJw+xWlNq9fnDt 8S8znqVuEHhvqRQW7zFdb9ac2MZi8CHHhC2wGIZ7GYjNG2q5XwE8b/QhdXQeFin7 ibugvoW7+ZdcDewpQW27oO0g7B/8hRt8KC+1lc/8rITKIfGxbNJkGzTDl0F4Co7v IH7Ew5PHPe6ZiuU0QSdU+NBuvk8g8sWGxx04Xvzl3WicwOg7XvN3ivrKB9oN2U1x 50KZRnYpwQQv/9AxyhroYU+Ufje8SF4v++zsq1eMzUcHsC/C73eatw2m764t+X4S 8yMLrgqY1Nzif4nAMi/SDMnB/R1bXeuc8kXD9xT6XD9d2tf6e+zCHhQklVeC0tuK RiVRJqGrfanbKMnWIG0Y =n9rI -----END PGP SIGNATURE----- Merge tag 'for-linus-3.11-merge-window-part-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs Pull second round of 9p patches from Eric Van Hensbergen: "Several of these patches were rebased in order to correct style issues. Only stylistic changes were made versus the patches which were in linux-next for two weeks. The rebases have been in linux-next for 3 days and have passed my regressions. The bulk of these are RDMA fixes and improvements. There's also some additions on the extended attributes front to support some additional namespaces and a new option for TCP to force allocation of mount requests from a priviledged port" * tag 'for-linus-3.11-merge-window-part-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh/v9fs: fs/9p: Remove the unused variable "err" in v9fs_vfs_getattr() 9P: Add cancelled() to the transport functions. 9P/RDMA: count posted buffers without a pending request 9P/RDMA: Improve error handling in rdma_request 9P/RDMA: Do not free req->rc in error handling in rdma_request() 9P/RDMA: Use a semaphore to protect the RQ 9P/RDMA: Protect against duplicate replies 9P/RDMA: increase P9_RDMA_MAXSIZE to 1MB 9pnet: refactor struct p9_fcall alloc code 9P/RDMA: rdma_request() needs not allocate req->rc 9P: Fix fcall allocation for rdma fs/9p: xattr: add trusted and security namespaces net/9p: add privport option to 9p tcp transport
This commit is contained in:
commit
19d2f8e0fb
|
@ -31,3 +31,16 @@ config 9P_FS_POSIX_ACL
|
|||
If you don't know what Access Control Lists are, say N
|
||||
|
||||
endif
|
||||
|
||||
|
||||
config 9P_FS_SECURITY
|
||||
bool "9P Security Labels"
|
||||
depends on 9P_FS
|
||||
help
|
||||
Security labels support alternative access control models
|
||||
implemented by security modules like SELinux. This option
|
||||
enables an extended attribute handler for file security
|
||||
labels in the 9P filesystem.
|
||||
|
||||
If you are not using a security module that requires using
|
||||
extended attributes for file security labels, say N.
|
||||
|
|
|
@ -11,7 +11,9 @@ obj-$(CONFIG_9P_FS) := 9p.o
|
|||
v9fs.o \
|
||||
fid.o \
|
||||
xattr.o \
|
||||
xattr_user.o
|
||||
xattr_user.o \
|
||||
xattr_trusted.o
|
||||
|
||||
9p-$(CONFIG_9P_FSCACHE) += cache.o
|
||||
9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
|
||||
9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o
|
||||
|
|
|
@ -1054,13 +1054,11 @@ static int
|
|||
v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
int err;
|
||||
struct v9fs_session_info *v9ses;
|
||||
struct p9_fid *fid;
|
||||
struct p9_wstat *st;
|
||||
|
||||
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
|
||||
err = -EPERM;
|
||||
v9ses = v9fs_dentry2v9ses(dentry);
|
||||
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
|
||||
generic_fillattr(dentry->d_inode, stat);
|
||||
|
|
|
@ -167,9 +167,13 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
|
|||
|
||||
const struct xattr_handler *v9fs_xattr_handlers[] = {
|
||||
&v9fs_xattr_user_handler,
|
||||
&v9fs_xattr_trusted_handler,
|
||||
#ifdef CONFIG_9P_FS_POSIX_ACL
|
||||
&v9fs_xattr_acl_access_handler,
|
||||
&v9fs_xattr_acl_default_handler,
|
||||
#endif
|
||||
#ifdef CONFIG_9P_FS_SECURITY
|
||||
&v9fs_xattr_security_handler,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
|
||||
extern const struct xattr_handler *v9fs_xattr_handlers[];
|
||||
extern struct xattr_handler v9fs_xattr_user_handler;
|
||||
extern struct xattr_handler v9fs_xattr_trusted_handler;
|
||||
extern struct xattr_handler v9fs_xattr_security_handler;
|
||||
extern const struct xattr_handler v9fs_xattr_acl_access_handler;
|
||||
extern const struct xattr_handler v9fs_xattr_acl_default_handler;
|
||||
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright IBM Corporation, 2010
|
||||
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include "xattr.h"
|
||||
|
||||
static int v9fs_xattr_security_get(struct dentry *dentry, const char *name,
|
||||
void *buffer, size_t size, int type)
|
||||
{
|
||||
int retval;
|
||||
char *full_name;
|
||||
size_t name_len;
|
||||
size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
|
||||
|
||||
if (name == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name_len = strlen(name);
|
||||
full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
|
||||
if (!full_name)
|
||||
return -ENOMEM;
|
||||
memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
|
||||
memcpy(full_name+prefix_len, name, name_len);
|
||||
full_name[prefix_len + name_len] = '\0';
|
||||
|
||||
retval = v9fs_xattr_get(dentry, full_name, buffer, size);
|
||||
kfree(full_name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int v9fs_xattr_security_set(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int type)
|
||||
{
|
||||
int retval;
|
||||
char *full_name;
|
||||
size_t name_len;
|
||||
size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
|
||||
|
||||
if (name == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name_len = strlen(name);
|
||||
full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
|
||||
if (!full_name)
|
||||
return -ENOMEM;
|
||||
memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len);
|
||||
memcpy(full_name + prefix_len, name, name_len);
|
||||
full_name[prefix_len + name_len] = '\0';
|
||||
|
||||
retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
|
||||
kfree(full_name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
struct xattr_handler v9fs_xattr_security_handler = {
|
||||
.prefix = XATTR_SECURITY_PREFIX,
|
||||
.get = v9fs_xattr_security_get,
|
||||
.set = v9fs_xattr_security_set,
|
||||
};
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright IBM Corporation, 2010
|
||||
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of version 2.1 of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include "xattr.h"
|
||||
|
||||
static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name,
|
||||
void *buffer, size_t size, int type)
|
||||
{
|
||||
int retval;
|
||||
char *full_name;
|
||||
size_t name_len;
|
||||
size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
|
||||
|
||||
if (name == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name_len = strlen(name);
|
||||
full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
|
||||
if (!full_name)
|
||||
return -ENOMEM;
|
||||
memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
|
||||
memcpy(full_name+prefix_len, name, name_len);
|
||||
full_name[prefix_len + name_len] = '\0';
|
||||
|
||||
retval = v9fs_xattr_get(dentry, full_name, buffer, size);
|
||||
kfree(full_name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int type)
|
||||
{
|
||||
int retval;
|
||||
char *full_name;
|
||||
size_t name_len;
|
||||
size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
|
||||
|
||||
if (name == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (strcmp(name, "") == 0)
|
||||
return -EINVAL;
|
||||
|
||||
name_len = strlen(name);
|
||||
full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL);
|
||||
if (!full_name)
|
||||
return -ENOMEM;
|
||||
memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len);
|
||||
memcpy(full_name + prefix_len, name, name_len);
|
||||
full_name[prefix_len + name_len] = '\0';
|
||||
|
||||
retval = v9fs_xattr_set(dentry, full_name, value, size, flags);
|
||||
kfree(full_name);
|
||||
return retval;
|
||||
}
|
||||
|
||||
struct xattr_handler v9fs_xattr_trusted_handler = {
|
||||
.prefix = XATTR_TRUSTED_PREFIX,
|
||||
.get = v9fs_xattr_trusted_get,
|
||||
.set = v9fs_xattr_trusted_set,
|
||||
};
|
|
@ -26,6 +26,9 @@
|
|||
#ifndef NET_9P_TRANSPORT_H
|
||||
#define NET_9P_TRANSPORT_H
|
||||
|
||||
#define P9_DEF_MIN_RESVPORT (665U)
|
||||
#define P9_DEF_MAX_RESVPORT (1023U)
|
||||
|
||||
/**
|
||||
* struct p9_trans_module - transport module interface
|
||||
* @list: used to maintain a list of currently available transports
|
||||
|
@ -37,6 +40,8 @@
|
|||
* @close: member function to discard a connection on this transport
|
||||
* @request: member function to issue a request to the transport
|
||||
* @cancel: member function to cancel a request (if it hasn't been sent)
|
||||
* @cancelled: member function to notify that a cancelled request will not
|
||||
* not receive a reply
|
||||
*
|
||||
* This is the basic API for a transport module which is registered by the
|
||||
* transport module with the 9P core network module and used by the client
|
||||
|
@ -55,6 +60,7 @@ struct p9_trans_module {
|
|||
void (*close) (struct p9_client *);
|
||||
int (*request) (struct p9_client *, struct p9_req_t *req);
|
||||
int (*cancel) (struct p9_client *, struct p9_req_t *req);
|
||||
int (*cancelled)(struct p9_client *, struct p9_req_t *req);
|
||||
int (*zc_request)(struct p9_client *, struct p9_req_t *,
|
||||
char *, char *, int , int, int, int);
|
||||
};
|
||||
|
|
|
@ -204,6 +204,17 @@ free_and_return:
|
|||
return ret;
|
||||
}
|
||||
|
||||
struct p9_fcall *p9_fcall_alloc(int alloc_msize)
|
||||
{
|
||||
struct p9_fcall *fc;
|
||||
fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
|
||||
if (!fc)
|
||||
return NULL;
|
||||
fc->capacity = alloc_msize;
|
||||
fc->sdata = (char *) fc + sizeof(struct p9_fcall);
|
||||
return fc;
|
||||
}
|
||||
|
||||
/**
|
||||
* p9_tag_alloc - lookup/allocate a request by tag
|
||||
* @c: client session to lookup tag within
|
||||
|
@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
|
|||
col = tag % P9_ROW_MAXTAG;
|
||||
|
||||
req = &c->reqs[row][col];
|
||||
if (!req->tc) {
|
||||
if (!req->wq) {
|
||||
req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
|
||||
if (!req->wq) {
|
||||
pr_err("Couldn't grow tag array\n");
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
if (!req->wq)
|
||||
goto grow_failed;
|
||||
init_waitqueue_head(req->wq);
|
||||
req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
|
||||
GFP_NOFS);
|
||||
req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
|
||||
GFP_NOFS);
|
||||
if ((!req->tc) || (!req->rc)) {
|
||||
pr_err("Couldn't grow tag array\n");
|
||||
kfree(req->tc);
|
||||
kfree(req->rc);
|
||||
kfree(req->wq);
|
||||
req->tc = req->rc = NULL;
|
||||
req->wq = NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
req->tc->capacity = alloc_msize;
|
||||
req->rc->capacity = alloc_msize;
|
||||
req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
|
||||
req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
|
||||
}
|
||||
|
||||
if (!req->tc)
|
||||
req->tc = p9_fcall_alloc(alloc_msize);
|
||||
if (!req->rc)
|
||||
req->rc = p9_fcall_alloc(alloc_msize);
|
||||
if (!req->tc || !req->rc)
|
||||
goto grow_failed;
|
||||
|
||||
p9pdu_reset(req->tc);
|
||||
p9pdu_reset(req->rc);
|
||||
|
||||
req->tc->tag = tag-1;
|
||||
req->status = REQ_STATUS_ALLOC;
|
||||
|
||||
return &c->reqs[row][col];
|
||||
return req;
|
||||
|
||||
grow_failed:
|
||||
pr_err("Couldn't grow tag array\n");
|
||||
kfree(req->tc);
|
||||
kfree(req->rc);
|
||||
kfree(req->wq);
|
||||
req->tc = req->rc = NULL;
|
||||
req->wq = NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
|
|||
return PTR_ERR(req);
|
||||
|
||||
|
||||
/* if we haven't received a response for oldreq,
|
||||
remove it from the list. */
|
||||
/*
|
||||
* if we haven't received a response for oldreq,
|
||||
* remove it from the list, and notify the transport
|
||||
* layer that the reply will never arrive.
|
||||
*/
|
||||
spin_lock(&c->lock);
|
||||
if (oldreq->status == REQ_STATUS_FLSH)
|
||||
if (oldreq->status == REQ_STATUS_FLSH) {
|
||||
list_del(&oldreq->req_list);
|
||||
spin_unlock(&c->lock);
|
||||
spin_unlock(&c->lock);
|
||||
if (c->trans_mod->cancelled)
|
||||
c->trans_mod->cancelled(c, req);
|
||||
} else {
|
||||
spin_unlock(&c->lock);
|
||||
}
|
||||
|
||||
p9_free_req(c, req);
|
||||
return 0;
|
||||
|
|
|
@ -63,6 +63,7 @@ struct p9_fd_opts {
|
|||
int rfd;
|
||||
int wfd;
|
||||
u16 port;
|
||||
int privport;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -87,12 +88,15 @@ struct p9_trans_fd {
|
|||
enum {
|
||||
/* Options that take integer arguments */
|
||||
Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
|
||||
/* Options that take no arguments */
|
||||
Opt_privport,
|
||||
};
|
||||
|
||||
static const match_table_t tokens = {
|
||||
{Opt_port, "port=%u"},
|
||||
{Opt_rfdno, "rfdno=%u"},
|
||||
{Opt_wfdno, "wfdno=%u"},
|
||||
{Opt_privport, "privport"},
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
|
||||
|
@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock);
|
|||
static LIST_HEAD(p9_poll_pending_list);
|
||||
static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
|
||||
|
||||
static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
|
||||
static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
|
||||
|
||||
static void p9_mux_poll_stop(struct p9_conn *m)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
|
|||
if (!*p)
|
||||
continue;
|
||||
token = match_token(p, tokens, args);
|
||||
if (token != Opt_err) {
|
||||
if ((token != Opt_err) && (token != Opt_privport)) {
|
||||
r = match_int(&args[0], &option);
|
||||
if (r < 0) {
|
||||
p9_debug(P9_DEBUG_ERROR,
|
||||
|
@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
|
|||
case Opt_wfdno:
|
||||
opts->wfd = option;
|
||||
break;
|
||||
case Opt_privport:
|
||||
opts->privport = 1;
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int p9_bind_privport(struct socket *sock)
|
||||
{
|
||||
struct sockaddr_in cl;
|
||||
int port, err = -EINVAL;
|
||||
|
||||
memset(&cl, 0, sizeof(cl));
|
||||
cl.sin_family = AF_INET;
|
||||
cl.sin_addr.s_addr = INADDR_ANY;
|
||||
for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
|
||||
cl.sin_port = htons((ushort)port);
|
||||
err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
|
||||
if (err != -EADDRINUSE)
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
|
||||
{
|
||||
|
@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
|
|||
return err;
|
||||
}
|
||||
|
||||
if (opts.privport) {
|
||||
err = p9_bind_privport(csocket);
|
||||
if (err < 0) {
|
||||
pr_err("%s (%d): problem binding to privport\n",
|
||||
__func__, task_pid_nr(current));
|
||||
sock_release(csocket);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = csocket->ops->connect(csocket,
|
||||
(struct sockaddr *)&sin_server,
|
||||
sizeof(struct sockaddr_in), 0);
|
||||
|
|
|
@ -57,9 +57,7 @@
|
|||
#define P9_RDMA_IRD 0
|
||||
#define P9_RDMA_ORD 0
|
||||
#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
|
||||
#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can
|
||||
* safely advertise a maxsize
|
||||
* of 64k */
|
||||
#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */
|
||||
|
||||
/**
|
||||
* struct p9_trans_rdma - RDMA transport instance
|
||||
|
@ -75,7 +73,9 @@
|
|||
* @sq_depth: The depth of the Send Queue
|
||||
* @sq_sem: Semaphore for the SQ
|
||||
* @rq_depth: The depth of the Receive Queue.
|
||||
* @rq_count: Count of requests in the Receive Queue.
|
||||
* @rq_sem: Semaphore for the RQ
|
||||
* @excess_rc : Amount of posted Receive Contexts without a pending request.
|
||||
* See rdma_request()
|
||||
* @addr: The remote peer's address
|
||||
* @req_lock: Protects the active request list
|
||||
* @cm_done: Completion event for connection management tracking
|
||||
|
@ -100,7 +100,8 @@ struct p9_trans_rdma {
|
|||
int sq_depth;
|
||||
struct semaphore sq_sem;
|
||||
int rq_depth;
|
||||
atomic_t rq_count;
|
||||
struct semaphore rq_sem;
|
||||
atomic_t excess_rc;
|
||||
struct sockaddr_in addr;
|
||||
spinlock_t req_lock;
|
||||
|
||||
|
@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
|
|||
if (!req)
|
||||
goto err_out;
|
||||
|
||||
/* Check that we have not yet received a reply for this request.
|
||||
*/
|
||||
if (unlikely(req->rc)) {
|
||||
pr_err("Duplicate reply for request %d", tag);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
req->rc = c->rc;
|
||||
req->status = REQ_STATUS_RCVD;
|
||||
p9_client_cb(client, req);
|
||||
|
@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
|
|||
|
||||
switch (c->wc_op) {
|
||||
case IB_WC_RECV:
|
||||
atomic_dec(&rdma->rq_count);
|
||||
handle_recv(client, rdma, c, wc.status, wc.byte_len);
|
||||
up(&rdma->rq_sem);
|
||||
break;
|
||||
|
||||
case IB_WC_SEND:
|
||||
|
@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
|
|||
struct p9_rdma_context *c = NULL;
|
||||
struct p9_rdma_context *rpl_context = NULL;
|
||||
|
||||
/* When an error occurs between posting the recv and the send,
|
||||
* there will be a receive context posted without a pending request.
|
||||
* Since there is no way to "un-post" it, we remember it and skip
|
||||
* post_recv() for the next request.
|
||||
* So here,
|
||||
* see if we are this `next request' and need to absorb an excess rc.
|
||||
* If yes, then drop and free our own, and do not recv_post().
|
||||
**/
|
||||
if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
|
||||
if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
|
||||
/* Got one ! */
|
||||
kfree(req->rc);
|
||||
req->rc = NULL;
|
||||
goto dont_need_post_recv;
|
||||
} else {
|
||||
/* We raced and lost. */
|
||||
atomic_inc(&rdma->excess_rc);
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate an fcall for the reply */
|
||||
rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
|
||||
if (!rpl_context) {
|
||||
err = -ENOMEM;
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the request has a buffer, steal it, otherwise
|
||||
* allocate a new one. Typically, requests should already
|
||||
* have receive buffers allocated and just swap them around
|
||||
*/
|
||||
if (!req->rc) {
|
||||
req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
|
||||
GFP_NOFS);
|
||||
if (req->rc) {
|
||||
req->rc->sdata = (char *) req->rc +
|
||||
sizeof(struct p9_fcall);
|
||||
req->rc->capacity = client->msize;
|
||||
}
|
||||
goto recv_error;
|
||||
}
|
||||
rpl_context->rc = req->rc;
|
||||
if (!rpl_context->rc) {
|
||||
err = -ENOMEM;
|
||||
goto err_free2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Post a receive buffer for this request. We need to ensure
|
||||
|
@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
|
|||
* outstanding request, so we must keep a count to avoid
|
||||
* overflowing the RQ.
|
||||
*/
|
||||
if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
|
||||
err = post_recv(client, rpl_context);
|
||||
if (err)
|
||||
goto err_free1;
|
||||
} else
|
||||
atomic_dec(&rdma->rq_count);
|
||||
if (down_interruptible(&rdma->rq_sem)) {
|
||||
err = -EINTR;
|
||||
goto recv_error;
|
||||
}
|
||||
|
||||
err = post_recv(client, rpl_context);
|
||||
if (err) {
|
||||
p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
|
||||
goto recv_error;
|
||||
}
|
||||
/* remove posted receive buffer from request structure */
|
||||
req->rc = NULL;
|
||||
|
||||
dont_need_post_recv:
|
||||
/* Post the request */
|
||||
c = kmalloc(sizeof *c, GFP_NOFS);
|
||||
if (!c) {
|
||||
err = -ENOMEM;
|
||||
goto err_free1;
|
||||
goto send_error;
|
||||
}
|
||||
c->req = req;
|
||||
|
||||
c->busa = ib_dma_map_single(rdma->cm_id->device,
|
||||
c->req->tc->sdata, c->req->tc->size,
|
||||
DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
|
||||
goto error;
|
||||
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
|
||||
err = -EIO;
|
||||
goto send_error;
|
||||
}
|
||||
|
||||
sge.addr = c->busa;
|
||||
sge.length = c->req->tc->size;
|
||||
|
@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
|
|||
wr.sg_list = &sge;
|
||||
wr.num_sge = 1;
|
||||
|
||||
if (down_interruptible(&rdma->sq_sem))
|
||||
goto error;
|
||||
if (down_interruptible(&rdma->sq_sem)) {
|
||||
err = -EINTR;
|
||||
goto send_error;
|
||||
}
|
||||
|
||||
return ib_post_send(rdma->qp, &wr, &bad_wr);
|
||||
err = ib_post_send(rdma->qp, &wr, &bad_wr);
|
||||
if (err)
|
||||
goto send_error;
|
||||
|
||||
error:
|
||||
/* Success */
|
||||
return 0;
|
||||
|
||||
/* Handle errors that happened during or while preparing the send: */
|
||||
send_error:
|
||||
kfree(c);
|
||||
kfree(rpl_context->rc);
|
||||
p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
|
||||
|
||||
/* Ach.
|
||||
* We did recv_post(), but not send. We have one recv_post in excess.
|
||||
*/
|
||||
atomic_inc(&rdma->excess_rc);
|
||||
return err;
|
||||
|
||||
/* Handle errors that happened during or while preparing post_recv(): */
|
||||
recv_error:
|
||||
kfree(rpl_context);
|
||||
p9_debug(P9_DEBUG_ERROR, "EIO\n");
|
||||
return -EIO;
|
||||
err_free1:
|
||||
kfree(rpl_context->rc);
|
||||
err_free2:
|
||||
kfree(rpl_context);
|
||||
err_close:
|
||||
spin_lock_irqsave(&rdma->req_lock, flags);
|
||||
if (rdma->state < P9_RDMA_CLOSING) {
|
||||
rdma->state = P9_RDMA_CLOSING;
|
||||
|
@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
|
|||
spin_lock_init(&rdma->req_lock);
|
||||
init_completion(&rdma->cm_done);
|
||||
sema_init(&rdma->sq_sem, rdma->sq_depth);
|
||||
atomic_set(&rdma->rq_count, 0);
|
||||
sema_init(&rdma->rq_sem, rdma->rq_depth);
|
||||
atomic_set(&rdma->excess_rc, 0);
|
||||
|
||||
return rdma;
|
||||
}
|
||||
|
@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* A request has been fully flushed without a reply.
|
||||
* That means we have posted one buffer in excess.
|
||||
*/
|
||||
static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
|
||||
{
|
||||
struct p9_trans_rdma *rdma = client->trans;
|
||||
|
||||
atomic_inc(&rdma->excess_rc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* trans_create_rdma - Transport method for creating atransport instance
|
||||
* @client: client instance
|
||||
|
|
Loading…
Reference in New Issue