dlm for 5.10

This set continues the ongoing rework of the low level
 communication layer in the dlm.  The focus here is on
 improvements to connection handling, and reworking the
 receiving of messages.
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJfhJylAAoJEDgbc8f8gGmqiRYP/R8rHeiAtBuTQebIG2S2FRjT
 OoCsr6F240SxyNJYAJlsV4kWGLtRQ0qnHhWku6nAreg9Yw/+0C7ZRHwDNoJsE2/9
 JuCW6HhqN6jYcWWhV3BZA7wvWzPfzdC7Jnla7f9GGB9ToFlani7CLEj5qzkyEIxh
 KaXfFGTHBCftM20HaNcExqBwmB0bn7jiavlR2Nqnsh/FW+er1HPa4rIIJqYy6k31
 ymf0XJ3kZYgf/I4iArUZkR7FKHHy1GhWW10NSQ/DDfwGtkbQ1Lw8IdBZ/zkyheAG
 aInFcxEt+hQPTMOBSB4hJn4+QPvyNAd9UxjFLuaHawUNglH73PXBk77kGgj8xJGU
 qRcaugX5brVV1tpY2UPQO8MC8ITadmKRa7uZkRoI2hIZfsZO2z+TSgRkegsSDIlD
 wXYLQslSYImZ5k42wHqaONxD4nW/haZxdrhul4sP8Z1+d5WmoPE1UDlXMTvbyp/N
 iW3+jhvPc1NAyzEPdmMj/y7zmCX+yrlkRrO1YjTkpEOIpN5uUaxg/1g8ok5OProR
 Xyx4b9gv3r8/3CGQvYTOiNr9ZUj8kPR8rWv4fbQXAt+kQMAKnIjyRKp4SpfFlp1L
 pKv9UU/sdUsduCoiRDD5+SiqNLGDWSH5UAkYvkH0cz3QDELSEYkLlMA6O00QsDAH
 o1f+TFcNFKsphk47DXVo
 =/n6H
 -----END PGP SIGNATURE-----

Merge tag 'dlm-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

Pull dlm updates from David Teigland:
 "This set continues the ongoing rework of the low level communication
  layer in the dlm.

  The focus here is on improvements to connection handling, and
  reworking the receiving of messages"

* tag 'dlm-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
  fs: dlm: fix race in nodeid2con
  fs: dlm: rework receive handling
  fs: dlm: disallow buffer size below default
  fs: dlm: handle range check as callback
  fs: dlm: fix mark per nodeid setting
  fs: dlm: remove lock dependency warning
  fs: dlm: use free_con to free connection
  fs: dlm: handle possible othercon writequeues
  fs: dlm: move free writequeue into con free
  fs: dlm: fix configfs memory leak
  fs: dlm: fix dlm_local_addr memory leak
  fs: dlm: make connection hash lockless
  fs: dlm: synchronize dlm before shutdown
This commit is contained in:
Linus Torvalds 2020-10-13 08:59:39 -07:00
commit c024a81125
6 changed files with 258 additions and 277 deletions

View File

@ -4,6 +4,7 @@ menuconfig DLM
depends on INET
depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
select IP_SCTP
select SRCU
help
A general purpose distributed lock manager for kernel or userspace
applications.

View File

@ -125,7 +125,7 @@ static ssize_t cluster_cluster_name_store(struct config_item *item,
CONFIGFS_ATTR(cluster_, cluster_name);
static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
int *info_field, int check_zero,
int *info_field, bool (*check_cb)(unsigned int x),
const char *buf, size_t len)
{
unsigned int x;
@ -137,7 +137,7 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
if (rc)
return rc;
if (check_zero && !x)
if (check_cb && check_cb(x))
return -EINVAL;
*cl_field = x;
@ -146,13 +146,13 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
return len;
}
#define CLUSTER_ATTR(name, check_zero) \
#define CLUSTER_ATTR(name, check_cb) \
static ssize_t cluster_##name##_store(struct config_item *item, \
const char *buf, size_t len) \
{ \
struct dlm_cluster *cl = config_item_to_cluster(item); \
return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
check_zero, buf, len); \
check_cb, buf, len); \
} \
static ssize_t cluster_##name##_show(struct config_item *item, char *buf) \
{ \
@ -161,20 +161,30 @@ static ssize_t cluster_##name##_show(struct config_item *item, char *buf) \
} \
CONFIGFS_ATTR(cluster_, name);
CLUSTER_ATTR(tcp_port, 1);
CLUSTER_ATTR(buffer_size, 1);
CLUSTER_ATTR(rsbtbl_size, 1);
CLUSTER_ATTR(recover_timer, 1);
CLUSTER_ATTR(toss_secs, 1);
CLUSTER_ATTR(scan_secs, 1);
CLUSTER_ATTR(log_debug, 0);
CLUSTER_ATTR(log_info, 0);
CLUSTER_ATTR(protocol, 0);
CLUSTER_ATTR(mark, 0);
CLUSTER_ATTR(timewarn_cs, 1);
CLUSTER_ATTR(waitwarn_us, 0);
CLUSTER_ATTR(new_rsb_count, 0);
CLUSTER_ATTR(recover_callbacks, 0);
static bool dlm_check_zero(unsigned int x)
{
return !x;
}
static bool dlm_check_buffer_size(unsigned int x)
{
return (x < DEFAULT_BUFFER_SIZE);
}
CLUSTER_ATTR(tcp_port, dlm_check_zero);
CLUSTER_ATTR(buffer_size, dlm_check_buffer_size);
CLUSTER_ATTR(rsbtbl_size, dlm_check_zero);
CLUSTER_ATTR(recover_timer, dlm_check_zero);
CLUSTER_ATTR(toss_secs, dlm_check_zero);
CLUSTER_ATTR(scan_secs, dlm_check_zero);
CLUSTER_ATTR(log_debug, NULL);
CLUSTER_ATTR(log_info, NULL);
CLUSTER_ATTR(protocol, NULL);
CLUSTER_ATTR(mark, NULL);
CLUSTER_ATTR(timewarn_cs, dlm_check_zero);
CLUSTER_ATTR(waitwarn_us, NULL);
CLUSTER_ATTR(new_rsb_count, NULL);
CLUSTER_ATTR(recover_callbacks, NULL);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port,
@ -221,6 +231,7 @@ struct dlm_space {
struct list_head members;
struct mutex members_lock;
int members_count;
struct dlm_nodes *nds;
};
struct dlm_comms {
@ -430,6 +441,7 @@ static struct config_group *make_space(struct config_group *g, const char *name)
INIT_LIST_HEAD(&sp->members);
mutex_init(&sp->members_lock);
sp->members_count = 0;
sp->nds = nds;
return &sp->group;
fail:
@ -451,6 +463,7 @@ static void drop_space(struct config_group *g, struct config_item *i)
static void release_space(struct config_item *i)
{
struct dlm_space *sp = config_item_to_space(i);
kfree(sp->nds);
kfree(sp);
}
@ -857,18 +870,22 @@ int dlm_comm_seq(int nodeid, uint32_t *seq)
return 0;
}
int dlm_comm_mark(int nodeid, unsigned int *mark)
void dlm_comm_mark(int nodeid, unsigned int *mark)
{
struct dlm_comm *cm;
cm = get_comm(nodeid);
if (!cm)
return -ENOENT;
if (!cm) {
*mark = dlm_config.ci_mark;
return;
}
if (cm->mark)
*mark = cm->mark;
else
*mark = dlm_config.ci_mark;
*mark = cm->mark;
put_comm(cm);
return 0;
}
int dlm_our_nodeid(void)
@ -889,7 +906,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
/* Config file defaults */
#define DEFAULT_TCP_PORT 21064
#define DEFAULT_BUFFER_SIZE 4096
#define DEFAULT_RSBTBL_SIZE 1024
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10

View File

@ -12,6 +12,8 @@
#ifndef __CONFIG_DOT_H__
#define __CONFIG_DOT_H__
#define DEFAULT_BUFFER_SIZE 4096
struct dlm_config_node {
int nodeid;
int weight;
@ -46,7 +48,7 @@ void dlm_config_exit(void);
int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
int *count_out);
int dlm_comm_seq(int nodeid, uint32_t *seq);
int dlm_comm_mark(int nodeid, unsigned int *mark);
void dlm_comm_mark(int nodeid, unsigned int *mark);
int dlm_our_nodeid(void);
int dlm_our_addr(struct sockaddr_storage *addr, int num);

View File

@ -65,40 +65,6 @@
#define MAX_SEND_MSG_COUNT 25
#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(10000)
struct cbuf {
unsigned int base;
unsigned int len;
unsigned int mask;
};
static void cbuf_add(struct cbuf *cb, int n)
{
cb->len += n;
}
static int cbuf_data(struct cbuf *cb)
{
return ((cb->base + cb->len) & cb->mask);
}
static void cbuf_init(struct cbuf *cb, int size)
{
cb->base = cb->len = 0;
cb->mask = size-1;
}
static void cbuf_eat(struct cbuf *cb, int n)
{
cb->len -= n;
cb->base += n;
cb->base &= cb->mask;
}
static bool cbuf_empty(struct cbuf *cb)
{
return cb->len == 0;
}
struct connection {
struct socket *sock; /* NULL if not connected */
uint32_t nodeid; /* So we know who we are in the list */
@ -117,8 +83,6 @@ struct connection {
int (*rx_action) (struct connection *); /* What to do when active */
void (*connect_action) (struct connection *); /* What to do to connect */
void (*shutdown_action)(struct connection *con); /* What to do to shutdown */
struct page *rx_page;
struct cbuf cb;
int retries;
#define MAX_CONNECT_RETRIES 3
struct hlist_node list;
@ -126,6 +90,10 @@ struct connection {
struct work_struct rwork; /* Receive workqueue */
struct work_struct swork; /* Send workqueue */
wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
unsigned char *rx_buf;
int rx_buflen;
int rx_leftover;
struct rcu_head rcu;
};
#define sock2con(x) ((struct connection *)(x)->sk_user_data)
@ -167,8 +135,8 @@ static struct workqueue_struct *recv_workqueue;
static struct workqueue_struct *send_workqueue;
static struct hlist_head connection_hash[CONN_HASH_SIZE];
static DEFINE_MUTEX(connections_lock);
static struct kmem_cache *con_cache;
static DEFINE_SPINLOCK(connections_lock);
DEFINE_STATIC_SRCU(connections_srcu);
static void process_recv_sockets(struct work_struct *work);
static void process_send_sockets(struct work_struct *work);
@ -184,15 +152,20 @@ static inline int nodeid_hash(int nodeid)
static struct connection *__find_con(int nodeid)
{
int r;
int r, idx;
struct connection *con;
r = nodeid_hash(nodeid);
hlist_for_each_entry(con, &connection_hash[r], list) {
if (con->nodeid == nodeid)
idx = srcu_read_lock(&connections_srcu);
hlist_for_each_entry_rcu(con, &connection_hash[r], list) {
if (con->nodeid == nodeid) {
srcu_read_unlock(&connections_srcu, idx);
return con;
}
}
srcu_read_unlock(&connections_srcu, idx);
return NULL;
}
@ -200,21 +173,25 @@ static struct connection *__find_con(int nodeid)
* If 'allocation' is zero then we don't attempt to create a new
* connection structure for this node.
*/
static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
static struct connection *nodeid2con(int nodeid, gfp_t alloc)
{
struct connection *con = NULL;
struct connection *con, *tmp;
int r;
con = __find_con(nodeid);
if (con || !alloc)
return con;
con = kmem_cache_zalloc(con_cache, alloc);
con = kzalloc(sizeof(*con), alloc);
if (!con)
return NULL;
r = nodeid_hash(nodeid);
hlist_add_head(&con->list, &connection_hash[r]);
con->rx_buflen = dlm_config.ci_buffer_size;
con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS);
if (!con->rx_buf) {
kfree(con);
return NULL;
}
con->nodeid = nodeid;
mutex_init(&con->sock_mutex);
@ -233,31 +210,41 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
con->rx_action = zerocon->rx_action;
}
r = nodeid_hash(nodeid);
spin_lock(&connections_lock);
/* Because multiple workqueues/threads calls this function it can
* race on multiple cpu's. Instead of locking hot path __find_con()
* we just check in rare cases of recently added nodes again
* under protection of connections_lock. If this is the case we
* abort our connection creation and return the existing connection.
*/
tmp = __find_con(nodeid);
if (tmp) {
spin_unlock(&connections_lock);
kfree(con->rx_buf);
kfree(con);
return tmp;
}
hlist_add_head_rcu(&con->list, &connection_hash[r]);
spin_unlock(&connections_lock);
return con;
}
/* Loop round all connections */
static void foreach_conn(void (*conn_func)(struct connection *c))
{
int i;
struct hlist_node *n;
int i, idx;
struct connection *con;
idx = srcu_read_lock(&connections_srcu);
for (i = 0; i < CONN_HASH_SIZE; i++) {
hlist_for_each_entry_safe(con, n, &connection_hash[i], list)
hlist_for_each_entry_rcu(con, &connection_hash[i], list)
conn_func(con);
}
}
static struct connection *nodeid2con(int nodeid, gfp_t allocation)
{
struct connection *con;
mutex_lock(&connections_lock);
con = __nodeid2con(nodeid, allocation);
mutex_unlock(&connections_lock);
return con;
srcu_read_unlock(&connections_srcu, idx);
}
static struct dlm_node_addr *find_node_addr(int nodeid)
@ -614,11 +601,8 @@ static void close_connection(struct connection *con, bool and_other,
/* Will only re-enter once. */
close_connection(con->othercon, false, true, true);
}
if (con->rx_page) {
__free_page(con->rx_page);
con->rx_page = NULL;
}
con->rx_leftover = 0;
con->retries = 0;
mutex_unlock(&con->sock_mutex);
clear_bit(CF_CLOSING, &con->flags);
@ -672,16 +656,33 @@ static void dlm_tcp_shutdown(struct connection *con)
shutdown_connection(con);
}
static int con_realloc_receive_buf(struct connection *con, int newlen)
{
unsigned char *newbuf;
newbuf = kmalloc(newlen, GFP_NOFS);
if (!newbuf)
return -ENOMEM;
/* copy any leftover from last receive */
if (con->rx_leftover)
memmove(newbuf, con->rx_buf, con->rx_leftover);
/* swap to new buffer space */
kfree(con->rx_buf);
con->rx_buflen = newlen;
con->rx_buf = newbuf;
return 0;
}
/* Data received from remote end */
static int receive_from_sock(struct connection *con)
{
int ret = 0;
struct msghdr msg = {};
struct kvec iov[2];
unsigned len;
int r;
int call_again_soon = 0;
int nvec;
struct msghdr msg;
struct kvec iov;
int ret, buflen;
mutex_lock(&con->sock_mutex);
@ -689,71 +690,55 @@ static int receive_from_sock(struct connection *con)
ret = -EAGAIN;
goto out_close;
}
if (con->nodeid == 0) {
ret = -EINVAL;
goto out_close;
}
if (con->rx_page == NULL) {
/*
* This doesn't need to be atomic, but I think it should
* improve performance if it is.
*/
con->rx_page = alloc_page(GFP_ATOMIC);
if (con->rx_page == NULL)
/* realloc if we get new buffer size to read out */
buflen = dlm_config.ci_buffer_size;
if (con->rx_buflen != buflen && con->rx_leftover <= buflen) {
ret = con_realloc_receive_buf(con, buflen);
if (ret < 0)
goto out_resched;
cbuf_init(&con->cb, PAGE_SIZE);
}
/*
* iov[0] is the bit of the circular buffer between the current end
* point (cb.base + cb.len) and the end of the buffer.
/* calculate new buffer parameter regarding last receive and
* possible leftover bytes
*/
iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
iov[1].iov_len = 0;
nvec = 1;
iov.iov_base = con->rx_buf + con->rx_leftover;
iov.iov_len = con->rx_buflen - con->rx_leftover;
/*
* iov[1] is the bit of the circular buffer between the start of the
* buffer and the start of the currently used section (cb.base)
*/
if (cbuf_data(&con->cb) >= con->cb.base) {
iov[0].iov_len = PAGE_SIZE - cbuf_data(&con->cb);
iov[1].iov_len = con->cb.base;
iov[1].iov_base = page_address(con->rx_page);
nvec = 2;
}
len = iov[0].iov_len + iov[1].iov_len;
iov_iter_kvec(&msg.msg_iter, READ, iov, nvec, len);
r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL);
memset(&msg, 0, sizeof(msg));
msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
msg.msg_flags);
if (ret <= 0)
goto out_close;
else if (ret == len)
else if (ret == iov.iov_len)
call_again_soon = 1;
cbuf_add(&con->cb, ret);
ret = dlm_process_incoming_buffer(con->nodeid,
page_address(con->rx_page),
con->cb.base, con->cb.len,
PAGE_SIZE);
if (ret < 0) {
log_print("lowcomms err %d: addr=%p, base=%u, len=%u, read=%d",
ret, page_address(con->rx_page), con->cb.base,
con->cb.len, r);
cbuf_eat(&con->cb, r);
} else {
cbuf_eat(&con->cb, ret);
}
/* new buflen according readed bytes and leftover from last receive */
buflen = ret + con->rx_leftover;
ret = dlm_process_incoming_buffer(con->nodeid, con->rx_buf, buflen);
if (ret < 0)
goto out_close;
if (cbuf_empty(&con->cb) && !call_again_soon) {
__free_page(con->rx_page);
con->rx_page = NULL;
/* calculate leftover bytes from process and put it into begin of
* the receive buffer, so next receive we have the full message
* at the start address of the receive buffer.
*/
con->rx_leftover = buflen - ret;
if (con->rx_leftover) {
memmove(con->rx_buf, con->rx_buf + ret,
con->rx_leftover);
call_again_soon = true;
}
if (call_again_soon)
goto out_resched;
mutex_unlock(&con->sock_mutex);
return 0;
@ -791,13 +776,11 @@ static int accept_from_sock(struct connection *con)
int nodeid;
struct connection *newcon;
struct connection *addcon;
unsigned int mark;
mutex_lock(&connections_lock);
if (!dlm_allow_conn) {
mutex_unlock(&connections_lock);
return -1;
}
mutex_unlock(&connections_lock);
mutex_lock_nested(&con->sock_mutex, 0);
@ -830,6 +813,9 @@ static int accept_from_sock(struct connection *con)
return -1;
}
dlm_comm_mark(nodeid, &mark);
sock_set_mark(newsock->sk, mark);
log_print("got connection from %d", nodeid);
/* Check to see if we already have a connection to this node. This
@ -847,13 +833,24 @@ static int accept_from_sock(struct connection *con)
struct connection *othercon = newcon->othercon;
if (!othercon) {
othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
othercon = kzalloc(sizeof(*othercon), GFP_NOFS);
if (!othercon) {
log_print("failed to allocate incoming socket");
mutex_unlock(&newcon->sock_mutex);
result = -ENOMEM;
goto accept_err;
}
othercon->rx_buflen = dlm_config.ci_buffer_size;
othercon->rx_buf = kmalloc(othercon->rx_buflen, GFP_NOFS);
if (!othercon->rx_buf) {
mutex_unlock(&newcon->sock_mutex);
kfree(othercon);
log_print("failed to allocate incoming socket receive buffer");
result = -ENOMEM;
goto accept_err;
}
othercon->nodeid = nodeid;
othercon->rx_action = receive_from_sock;
mutex_init(&othercon->sock_mutex);
@ -975,6 +972,8 @@ static void sctp_connect_to_sock(struct connection *con)
return;
}
dlm_comm_mark(con->nodeid, &mark);
mutex_lock(&con->sock_mutex);
/* Some odd races can cause double-connects, ignore them */
@ -999,11 +998,6 @@ static void sctp_connect_to_sock(struct connection *con)
if (result < 0)
goto socket_err;
/* set skb mark */
result = dlm_comm_mark(con->nodeid, &mark);
if (result < 0)
goto bind_err;
sock_set_mark(sock->sk, mark);
con->rx_action = receive_from_sock;
@ -1076,6 +1070,8 @@ static void tcp_connect_to_sock(struct connection *con)
return;
}
dlm_comm_mark(con->nodeid, &mark);
mutex_lock(&con->sock_mutex);
if (con->retries++ > MAX_CONNECT_RETRIES)
goto out;
@ -1090,11 +1086,6 @@ static void tcp_connect_to_sock(struct connection *con)
if (result < 0)
goto out_err;
/* set skb mark */
result = dlm_comm_mark(con->nodeid, &mark);
if (result < 0)
goto out_err;
sock_set_mark(sock->sk, mark);
memset(&saddr, 0, sizeof(saddr));
@ -1238,6 +1229,14 @@ static void init_local(void)
}
}
static void deinit_local(void)
{
int i;
for (i = 0; i < dlm_local_count; i++)
kfree(dlm_local_addr[i]);
}
/* Initialise SCTP socket and bind to all interfaces */
static int sctp_listen_for_all(void)
{
@ -1546,13 +1545,6 @@ static void process_send_sockets(struct work_struct *work)
send_to_sock(con);
}
/* Discard all entries on the write queues */
static void clean_writequeues(void)
{
foreach_conn(clean_one_writequeue);
}
static void work_stop(void)
{
if (recv_workqueue)
@ -1608,26 +1600,34 @@ static void shutdown_conn(struct connection *con)
con->shutdown_action(con);
}
static void connection_release(struct rcu_head *rcu)
{
struct connection *con = container_of(rcu, struct connection, rcu);
kfree(con->rx_buf);
kfree(con);
}
static void free_conn(struct connection *con)
{
close_connection(con, true, true, true);
if (con->othercon)
kmem_cache_free(con_cache, con->othercon);
hlist_del(&con->list);
kmem_cache_free(con_cache, con);
spin_lock(&connections_lock);
hlist_del_rcu(&con->list);
spin_unlock(&connections_lock);
if (con->othercon) {
clean_one_writequeue(con->othercon);
call_rcu(&con->othercon->rcu, connection_release);
}
clean_one_writequeue(con);
call_rcu(&con->rcu, connection_release);
}
static void work_flush(void)
{
int ok;
int ok, idx;
int i;
struct hlist_node *n;
struct connection *con;
if (recv_workqueue)
flush_workqueue(recv_workqueue);
if (send_workqueue)
flush_workqueue(send_workqueue);
do {
ok = 1;
foreach_conn(stop_conn);
@ -1635,9 +1635,10 @@ static void work_flush(void)
flush_workqueue(recv_workqueue);
if (send_workqueue)
flush_workqueue(send_workqueue);
idx = srcu_read_lock(&connections_srcu);
for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
hlist_for_each_entry_safe(con, n,
&connection_hash[i], list) {
hlist_for_each_entry_rcu(con, &connection_hash[i],
list) {
ok &= test_bit(CF_READ_PENDING, &con->flags);
ok &= test_bit(CF_WRITE_PENDING, &con->flags);
if (con->othercon) {
@ -1648,6 +1649,7 @@ static void work_flush(void)
}
}
}
srcu_read_unlock(&connections_srcu, idx);
} while (!ok);
}
@ -1656,16 +1658,18 @@ void dlm_lowcomms_stop(void)
/* Set all the flags to prevent any
socket activity.
*/
mutex_lock(&connections_lock);
dlm_allow_conn = 0;
mutex_unlock(&connections_lock);
if (recv_workqueue)
flush_workqueue(recv_workqueue);
if (send_workqueue)
flush_workqueue(send_workqueue);
foreach_conn(shutdown_conn);
work_flush();
clean_writequeues();
foreach_conn(free_conn);
work_stop();
kmem_cache_destroy(con_cache);
deinit_local();
}
int dlm_lowcomms_start(void)
@ -1684,16 +1688,9 @@ int dlm_lowcomms_start(void)
goto fail;
}
error = -ENOMEM;
con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
__alignof__(struct connection), 0,
NULL);
if (!con_cache)
goto fail;
error = work_start();
if (error)
goto fail_destroy;
goto fail;
dlm_allow_conn = 1;
@ -1710,12 +1707,8 @@ int dlm_lowcomms_start(void)
fail_unlisten:
dlm_allow_conn = 0;
con = nodeid2con(0,0);
if (con) {
close_connection(con, false, true, true);
kmem_cache_free(con_cache, con);
}
fail_destroy:
kmem_cache_destroy(con_cache);
if (con)
free_conn(con);
fail:
return error;
}

View File

@ -22,114 +22,84 @@
* into packets and sends them to the comms layer.
*/
#include <asm/unaligned.h>
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
#include "lock.h"
#include "midcomms.h"
static void copy_from_cb(void *dst, const void *base, unsigned offset,
unsigned len, unsigned limit)
{
unsigned copy = len;
if ((copy + offset) > limit)
copy = limit - offset;
memcpy(dst, base + offset, copy);
len -= copy;
if (len)
memcpy(dst + copy, base, len);
}
/*
* Called from the low-level comms layer to process a buffer of
* commands.
*
* Only complete messages are processed here, any "spare" bytes from
* the end of a buffer are saved and tacked onto the front of the next
* message that comes in. I doubt this will happen very often but we
* need to be able to cope with it and I don't want the task to be waiting
* for packets to come in when there is useful work to be done.
*/
int dlm_process_incoming_buffer(int nodeid, const void *base,
unsigned offset, unsigned len, unsigned limit)
int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
{
union {
unsigned char __buf[DLM_INBUF_LEN];
/* this is to force proper alignment on some arches */
union dlm_packet p;
} __tmp;
union dlm_packet *p = &__tmp.p;
int ret = 0;
int err = 0;
const unsigned char *ptr = buf;
const struct dlm_header *hd;
uint16_t msglen;
uint32_t lockspace;
int ret = 0;
while (len > sizeof(struct dlm_header)) {
while (len >= sizeof(struct dlm_header)) {
hd = (struct dlm_header *)ptr;
/* Copy just the header to check the total length. The
message may wrap around the end of the buffer back to the
start, so we need to use a temp buffer and copy_from_cb. */
copy_from_cb(p, base, offset, sizeof(struct dlm_header),
limit);
msglen = le16_to_cpu(p->header.h_length);
lockspace = p->header.h_lockspace;
err = -EINVAL;
if (msglen < sizeof(struct dlm_header))
break;
if (p->header.h_cmd == DLM_MSG) {
if (msglen < sizeof(struct dlm_message))
break;
} else {
if (msglen < sizeof(struct dlm_rcom))
break;
/* no message should be more than this otherwise we
* cannot deliver this message to upper layers
*/
msglen = get_unaligned_le16(&hd->h_length);
if (msglen > DEFAULT_BUFFER_SIZE) {
log_print("received invalid length header: %u, will abort message parsing",
msglen);
return -EBADMSG;
}
err = -E2BIG;
if (msglen > dlm_config.ci_buffer_size) {
log_print("message size %d from %d too big, buf len %d",
msglen, nodeid, len);
break;
}
err = 0;
/* If only part of the full message is contained in this
buffer, then do nothing and wait for lowcomms to call
us again later with more data. We return 0 meaning
we've consumed none of the input buffer. */
/* caller will take care that leftover
* will be parsed next call with more data
*/
if (msglen > len)
break;
/* Allocate a larger temp buffer if the full message won't fit
in the buffer on the stack (which should work for most
ordinary messages). */
switch (hd->h_cmd) {
case DLM_MSG:
if (msglen < sizeof(struct dlm_message)) {
log_print("dlm msg too small: %u, will skip this message",
msglen);
goto skip;
}
if (msglen > sizeof(__tmp) && p == &__tmp.p) {
p = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
if (p == NULL)
return ret;
break;
case DLM_RCOM:
if (msglen < sizeof(struct dlm_rcom)) {
log_print("dlm rcom msg too small: %u, will skip this message",
msglen);
goto skip;
}
break;
default:
log_print("unsupported h_cmd received: %u, will skip this message",
hd->h_cmd);
goto skip;
}
copy_from_cb(p, base, offset, msglen, limit);
BUG_ON(lockspace != p->header.h_lockspace);
/* for aligned memory access, we just copy current message
* to begin of the buffer which contains already parsed buffer
* data and should provide align access for upper layers
* because the start address of the buffer has a aligned
* address. This memmove can be removed when the upperlayer
* is capable of unaligned memory access.
*/
memmove(buf, ptr, msglen);
dlm_receive_buffer((union dlm_packet *)buf, nodeid);
skip:
ret += msglen;
offset += msglen;
offset &= (limit - 1);
len -= msglen;
dlm_receive_buffer(p, nodeid);
ptr += msglen;
}
if (p != &__tmp.p)
kfree(p);
return err ? err : ret;
return ret;
}

View File

@ -12,8 +12,7 @@
#ifndef __MIDCOMMS_DOT_H__
#define __MIDCOMMS_DOT_H__
int dlm_process_incoming_buffer(int nodeid, const void *base, unsigned offset,
unsigned len, unsigned limit);
int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int buflen);
#endif /* __MIDCOMMS_DOT_H__ */