NFS client bugfixes for Linux 4.20

Highlights include:
 
 Stable fixes:
  - Fix a page leak when using RPCSEC_GSS/krb5p to encrypt data.
 
 Bugfixes:
  - Fix a regression that causes the RPC receive code to hang
  - Fix call_connect_status() so that it handles tasks that got transmitted
    while queued waiting for the socket lock.
  - Fix a memory leak in call_encode()
  - Fix several other connect races.
  - Fix receive code error handling.
  - Use the discard iterator rather than MSG_TRUNC for compatibility with
    AF_UNIX/AF_LOCAL sockets.
  - nfs: don't dirty kernel pages read by direct-io
  - pnfs/Flexfiles fix to enforce per-mirror stateid only for NFSv4 data
    servers
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJcCWIOAAoJEA4mA3inWBJc3BsP/i/VXd0ZSxxL8i/++qCR1KGT
 /p0+t2HbrhPzb3jKmuaBe/6T6bLMbpmkwbesA6cHENkaPiOqxPhxLsJlh4o2BHwg
 NcjAbbov/hkakFAHlp69KqiL7DZe8YEqQE8GlUnn+3C3RM3i2TSRQ3AGXUH22P2a
 MY5fqiub2PmEwe2UZR8BzIEQd5w60AzTNXzQb181/+SCTOPdJTKneh0Tw54lD4d6
 vWKhi64cyQxQxshCvrX6IpcNWu9qwm7qDGQ3rDAg0whunve4YGtTz1suRUk888M4
 VfNxA8skFZuaQS/UU6oek2xaeMlSzEoJQXimKLYTEJKoqf7sWxfNLAfqHwnfyo4T
 Yab3cfVRs5KgEltVZyodb9oVQd6KI13hYeT+vXubz2kq1Ode4NJCnzgEefOP0hNV
 ENDal0hqBrfjfVIkpg/wfgRJln/W4Y/U0oPPm50eJJxa0ZKTfftBWo4me5DwCFF9
 0/XhPdFWTvZsYjmSGRC1RsaSrzUvO+wFo3tKQ2lQqf8QP3ix9ZtGQHN+h8RN9SxK
 ti5OxTMsfM3jYg7+yu4yOAQkcCcoaDA37+JztpuUSlMRfNss8uM7cQKsQ4WQf6Nr
 24At5Wr/ib7hVkAQ5oB98UWh5q1ZLzmmHhzsf8KacTSNcfjgu0H0DmKtm3CfThFK
 xfTHotzM3IqbUXRZQ7++
 =M/mt
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-4.20-5' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client bugfixes from Trond Myklebust:
 "This is mainly fallout from the updates to the SUNRPC code that is
  being triggered from less common combinations of NFS mount options.

  Highlights include:

  Stable fixes:
   - Fix a page leak when using RPCSEC_GSS/krb5p to encrypt data.

  Bugfixes:
   - Fix a regression that causes the RPC receive code to hang
   - Fix call_connect_status() so that it handles tasks that got
     transmitted while queued waiting for the socket lock.
   - Fix a memory leak in call_encode()
   - Fix several other connect races.
   - Fix receive code error handling.
   - Use the discard iterator rather than MSG_TRUNC for compatibility
     with AF_UNIX/AF_LOCAL sockets.
   - nfs: don't dirty kernel pages read by direct-io
   - pnfs/Flexfiles fix to enforce per-mirror stateid only for NFSv4
     data servers"

* tag 'nfs-for-4.20-5' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
  SUNRPC: Don't force a redundant disconnection in xs_read_stream()
  SUNRPC: Fix up socket polling
  SUNRPC: Use the discard iterator rather than MSG_TRUNC
  SUNRPC: Treat EFAULT as a truncated message in xs_read_stream_request()
  SUNRPC: Fix up handling of the XDRBUF_SPARSE_PAGES flag
  SUNRPC: Fix RPC receive hangs
  SUNRPC: Fix a potential race in xprt_connect()
  SUNRPC: Fix a memory leak in call_encode()
  SUNRPC: Fix leak of krb5p encode pages
  SUNRPC: call_connect_status() must handle tasks that got transmitted
  nfs: don't dirty kernel pages read by direct-io
  flexfiles: enforce per-mirror stateid only for v4 DSes
This commit is contained in:
Linus Torvalds 2018-12-06 18:57:04 -08:00
commit 7f80c7325b
7 changed files with 73 additions and 49 deletions

View File

@ -98,8 +98,11 @@ struct nfs_direct_req {
struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
struct work_struct work; struct work_struct work;
int flags; int flags;
/* for write */
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
/* for read */
#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */
struct nfs_writeverf verf; /* unstable write verifier */ struct nfs_writeverf verf; /* unstable write verifier */
}; };
@ -412,7 +415,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page; struct page *page = req->wb_page;
if (!PageCompound(page) && bytes < hdr->good_bytes) if (!PageCompound(page) && bytes < hdr->good_bytes &&
(dreq->flags == NFS_ODIRECT_SHOULD_DIRTY))
set_page_dirty(page); set_page_dirty(page);
bytes += req->wb_bytes; bytes += req->wb_bytes;
nfs_list_remove_request(req); nfs_list_remove_request(req);
@ -587,6 +591,9 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
if (!is_sync_kiocb(iocb)) if (!is_sync_kiocb(iocb))
dreq->iocb = iocb; dreq->iocb = iocb;
if (iter_is_iovec(iter))
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
nfs_start_io_direct(inode); nfs_start_io_direct(inode);
NFS_I(inode)->read_io += count; NFS_I(inode)->read_io += count;

View File

@ -1733,7 +1733,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
if (fh) if (fh)
hdr->args.fh = fh; hdr->args.fh = fh;
if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) if (vers == 4 &&
!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
goto out_failed; goto out_failed;
/* /*
@ -1798,7 +1799,8 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
if (fh) if (fh)
hdr->args.fh = fh; hdr->args.fh = fh;
if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid)) if (vers == 4 &&
!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
goto out_failed; goto out_failed;
/* /*

View File

@ -72,7 +72,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
buf->head[0].iov_base = start; buf->head[0].iov_base = start;
buf->head[0].iov_len = len; buf->head[0].iov_len = len;
buf->tail[0].iov_len = 0; buf->tail[0].iov_len = 0;
buf->bvec = NULL;
buf->pages = NULL; buf->pages = NULL;
buf->page_len = 0; buf->page_len = 0;
buf->flags = 0; buf->flags = 0;

View File

@ -1791,6 +1791,7 @@ priv_release_snd_buf(struct rpc_rqst *rqstp)
for (i=0; i < rqstp->rq_enc_pages_num; i++) for (i=0; i < rqstp->rq_enc_pages_num; i++)
__free_page(rqstp->rq_enc_pages[i]); __free_page(rqstp->rq_enc_pages[i]);
kfree(rqstp->rq_enc_pages); kfree(rqstp->rq_enc_pages);
rqstp->rq_release_snd_buf = NULL;
} }
static int static int
@ -1799,6 +1800,9 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
int first, last, i; int first, last, i;
if (rqstp->rq_release_snd_buf)
rqstp->rq_release_snd_buf(rqstp);
if (snd_buf->page_len == 0) { if (snd_buf->page_len == 0) {
rqstp->rq_enc_pages_num = 0; rqstp->rq_enc_pages_num = 0;
return 0; return 0;

View File

@ -1915,6 +1915,13 @@ call_connect_status(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client; struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status; int status = task->tk_status;
/* Check if the task was already transmitted */
if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
xprt_end_transmit(task);
task->tk_action = call_transmit_status;
return;
}
dprint_status(task); dprint_status(task);
trace_rpc_connect_status(task); trace_rpc_connect_status(task);
@ -2302,6 +2309,7 @@ out_retry:
task->tk_status = 0; task->tk_status = 0;
/* Note: rpc_verify_header() may have freed the RPC slot */ /* Note: rpc_verify_header() may have freed the RPC slot */
if (task->tk_rqstp == req) { if (task->tk_rqstp == req) {
xdr_free_bvec(&req->rq_rcv_buf);
req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
if (task->tk_client->cl_discrtry) if (task->tk_client->cl_discrtry)
xprt_conditional_disconnect(req->rq_xprt, xprt_conditional_disconnect(req->rq_xprt,

View File

@ -826,8 +826,15 @@ void xprt_connect(struct rpc_task *task)
return; return;
if (xprt_test_and_set_connecting(xprt)) if (xprt_test_and_set_connecting(xprt))
return; return;
xprt->stat.connect_start = jiffies; /* Race breaker */
xprt->ops->connect(xprt, task); if (!xprt_connected(xprt)) {
xprt->stat.connect_start = jiffies;
xprt->ops->connect(xprt, task);
} else {
xprt_clear_connecting(xprt);
task->tk_status = 0;
rpc_wake_up_queued_task(&xprt->pending, task);
}
} }
xprt_release_write(xprt, task); xprt_release_write(xprt, task);
} }
@ -1623,6 +1630,8 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.buflen = 0; req->rq_snd_buf.buflen = 0;
req->rq_rcv_buf.len = 0; req->rq_rcv_buf.len = 0;
req->rq_rcv_buf.buflen = 0; req->rq_rcv_buf.buflen = 0;
req->rq_snd_buf.bvec = NULL;
req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL; req->rq_release_snd_buf = NULL;
xprt_reset_majortimeo(req); xprt_reset_majortimeo(req);
dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,

View File

@ -330,18 +330,16 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
{ {
size_t i,n; size_t i,n;
if (!(buf->flags & XDRBUF_SPARSE_PAGES)) if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES))
return want; return want;
if (want > buf->page_len)
want = buf->page_len;
n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (buf->pages[i]) if (buf->pages[i])
continue; continue;
buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
if (!buf->pages[i]) { if (!buf->pages[i]) {
buf->page_len = (i * PAGE_SIZE) - buf->page_base; i *= PAGE_SIZE;
return buf->page_len; return i > buf->page_base ? i - buf->page_base : 0;
} }
} }
return want; return want;
@ -378,8 +376,8 @@ static ssize_t
xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
size_t count) size_t count)
{ {
struct kvec kvec = { 0 }; iov_iter_discard(&msg->msg_iter, READ, count);
return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0); return sock_recvmsg(sock, msg, flags);
} }
static ssize_t static ssize_t
@ -398,16 +396,17 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out; goto out;
if (ret != want) if (ret != want)
goto eagain; goto out;
seek = 0; seek = 0;
} else { } else {
seek -= buf->head[0].iov_len; seek -= buf->head[0].iov_len;
offset += buf->head[0].iov_len; offset += buf->head[0].iov_len;
} }
if (seek < buf->page_len) {
want = xs_alloc_sparse_pages(buf, want = xs_alloc_sparse_pages(buf,
min_t(size_t, count - offset, buf->page_len), min_t(size_t, count - offset, buf->page_len),
GFP_NOWAIT); GFP_NOWAIT);
if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec, ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf), xdr_buf_pagecount(buf),
want + buf->page_base, want + buf->page_base,
@ -418,12 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out; goto out;
if (ret != want) if (ret != want)
goto eagain; goto out;
seek = 0; seek = 0;
} else { } else {
seek -= buf->page_len; seek -= want;
offset += buf->page_len; offset += want;
} }
if (seek < buf->tail[0].iov_len) { if (seek < buf->tail[0].iov_len) {
want = min_t(size_t, count - offset, buf->tail[0].iov_len); want = min_t(size_t, count - offset, buf->tail[0].iov_len);
ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek); ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
@ -433,17 +433,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out; goto out;
if (ret != want) if (ret != want)
goto eagain; goto out;
} else } else
offset += buf->tail[0].iov_len; offset += buf->tail[0].iov_len;
ret = -EMSGSIZE; ret = -EMSGSIZE;
msg->msg_flags |= MSG_TRUNC;
out: out:
*read = offset - seek_init; *read = offset - seek_init;
return ret; return ret;
eagain:
ret = -EAGAIN;
goto out;
sock_err: sock_err:
offset += seek; offset += seek;
goto out; goto out;
@ -486,19 +482,20 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
if (transport->recv.offset == transport->recv.len) { if (transport->recv.offset == transport->recv.len) {
if (xs_read_stream_request_done(transport)) if (xs_read_stream_request_done(transport))
msg->msg_flags |= MSG_EOR; msg->msg_flags |= MSG_EOR;
return transport->recv.copied; return read;
} }
switch (ret) { switch (ret) {
default:
break;
case -EFAULT:
case -EMSGSIZE: case -EMSGSIZE:
return transport->recv.copied; msg->msg_flags |= MSG_TRUNC;
return read;
case 0: case 0:
return -ESHUTDOWN; return -ESHUTDOWN;
default:
if (ret < 0)
return ret;
} }
return -EAGAIN; return ret < 0 ? ret : read;
} }
static size_t static size_t
@ -537,7 +534,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
ret = xs_read_stream_request(transport, msg, flags, req); ret = xs_read_stream_request(transport, msg, flags, req);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
xprt_complete_bc_request(req, ret); xprt_complete_bc_request(req, transport->recv.copied);
return ret; return ret;
} }
@ -570,7 +567,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
spin_lock(&xprt->queue_lock); spin_lock(&xprt->queue_lock);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
xprt_complete_rqst(req->rq_task, ret); xprt_complete_rqst(req->rq_task, transport->recv.copied);
xprt_unpin_rqst(req); xprt_unpin_rqst(req);
out: out:
spin_unlock(&xprt->queue_lock); spin_unlock(&xprt->queue_lock);
@ -591,10 +588,8 @@ xs_read_stream(struct sock_xprt *transport, int flags)
if (ret <= 0) if (ret <= 0)
goto out_err; goto out_err;
transport->recv.offset = ret; transport->recv.offset = ret;
if (ret != want) { if (transport->recv.offset != want)
ret = -EAGAIN; return transport->recv.offset;
goto out_err;
}
transport->recv.len = be32_to_cpu(transport->recv.fraghdr) & transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
RPC_FRAGMENT_SIZE_MASK; RPC_FRAGMENT_SIZE_MASK;
transport->recv.offset -= sizeof(transport->recv.fraghdr); transport->recv.offset -= sizeof(transport->recv.fraghdr);
@ -602,6 +597,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
} }
switch (be32_to_cpu(transport->recv.calldir)) { switch (be32_to_cpu(transport->recv.calldir)) {
default:
msg.msg_flags |= MSG_TRUNC;
break;
case RPC_CALL: case RPC_CALL:
ret = xs_read_stream_call(transport, &msg, flags); ret = xs_read_stream_call(transport, &msg, flags);
break; break;
@ -616,6 +614,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
goto out_err; goto out_err;
read += ret; read += ret;
if (transport->recv.offset < transport->recv.len) { if (transport->recv.offset < transport->recv.len) {
if (!(msg.msg_flags & MSG_TRUNC))
return read;
msg.msg_flags = 0;
ret = xs_read_discard(transport->sock, &msg, flags, ret = xs_read_discard(transport->sock, &msg, flags,
transport->recv.len - transport->recv.offset); transport->recv.len - transport->recv.offset);
if (ret <= 0) if (ret <= 0)
@ -623,7 +624,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.offset += ret; transport->recv.offset += ret;
read += ret; read += ret;
if (transport->recv.offset != transport->recv.len) if (transport->recv.offset != transport->recv.len)
return -EAGAIN; return read;
} }
if (xs_read_stream_request_done(transport)) { if (xs_read_stream_request_done(transport)) {
trace_xs_stream_read_request(transport); trace_xs_stream_read_request(transport);
@ -633,13 +634,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.len = 0; transport->recv.len = 0;
return read; return read;
out_err: out_err:
switch (ret) { return ret != 0 ? ret : -ESHUTDOWN;
case 0:
case -ESHUTDOWN:
xprt_force_disconnect(&transport->xprt);
return -ESHUTDOWN;
}
return ret;
} }
static void xs_stream_data_receive(struct sock_xprt *transport) static void xs_stream_data_receive(struct sock_xprt *transport)
@ -648,12 +643,12 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
ssize_t ret = 0; ssize_t ret = 0;
mutex_lock(&transport->recv_mutex); mutex_lock(&transport->recv_mutex);
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (transport->sock == NULL) if (transport->sock == NULL)
goto out; goto out;
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) { for (;;) {
ret = xs_read_stream(transport, MSG_DONTWAIT); ret = xs_read_stream(transport, MSG_DONTWAIT);
if (ret <= 0) if (ret < 0)
break; break;
read += ret; read += ret;
cond_resched(); cond_resched();
@ -1345,10 +1340,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
int err; int err;
mutex_lock(&transport->recv_mutex); mutex_lock(&transport->recv_mutex);
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
sk = transport->inet; sk = transport->inet;
if (sk == NULL) if (sk == NULL)
goto out; goto out;
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) { for (;;) {
skb = skb_recv_udp(sk, 0, 1, &err); skb = skb_recv_udp(sk, 0, 1, &err);
if (skb == NULL) if (skb == NULL)