tcp: Convert do_tcp_sendpages() to use MSG_SPLICE_PAGES
Convert do_tcp_sendpages() to use sendmsg() with MSG_SPLICE_PAGES rather than directly splicing in the pages itself. do_tcp_sendpages() can then be inlined in subsequent patches into its callers. This allows ->sendpage() to be replaced by something that can handle multiple multipage folios in a single transaction. Signed-off-by: David Howells <dhowells@redhat.com> cc: David Ahern <dsahern@kernel.org> cc: Jens Axboe <axboe@kernel.dk> cc: Matthew Wilcox <willy@infradead.org> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
270a1c3de4
commit
c5c37af6ec
158
net/ipv4/tcp.c
158
net/ipv4/tcp.c
|
@ -974,163 +974,19 @@ static int tcp_wmem_schedule(struct sock *sk, int copy)
|
|||
return min(copy, sk->sk_forward_alloc);
|
||||
}
|
||||
|
||||
static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
|
||||
struct page *page, int offset, size_t *size)
|
||||
{
|
||||
struct sk_buff *skb = tcp_write_queue_tail(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
bool can_coalesce;
|
||||
int copy, i;
|
||||
|
||||
if (!skb || (copy = size_goal - skb->len) <= 0 ||
|
||||
!tcp_skb_can_collapse_to(skb)) {
|
||||
new_segment:
|
||||
if (!sk_stream_memory_free(sk))
|
||||
return NULL;
|
||||
|
||||
skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation,
|
||||
tcp_rtx_and_write_queues_empty(sk));
|
||||
if (!skb)
|
||||
return NULL;
|
||||
|
||||
#ifdef CONFIG_TLS_DEVICE
|
||||
skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
|
||||
#endif
|
||||
tcp_skb_entail(sk, skb);
|
||||
copy = size_goal;
|
||||
}
|
||||
|
||||
if (copy > *size)
|
||||
copy = *size;
|
||||
|
||||
i = skb_shinfo(skb)->nr_frags;
|
||||
can_coalesce = skb_can_coalesce(skb, i, page, offset);
|
||||
if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
|
||||
tcp_mark_push(tp, skb);
|
||||
goto new_segment;
|
||||
}
|
||||
if (tcp_downgrade_zcopy_pure(sk, skb))
|
||||
return NULL;
|
||||
|
||||
copy = tcp_wmem_schedule(sk, copy);
|
||||
if (!copy)
|
||||
return NULL;
|
||||
|
||||
if (can_coalesce) {
|
||||
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
|
||||
} else {
|
||||
get_page(page);
|
||||
skb_fill_page_desc_noacc(skb, i, page, offset, copy);
|
||||
}
|
||||
|
||||
if (!(flags & MSG_NO_SHARED_FRAGS))
|
||||
skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
|
||||
|
||||
skb->len += copy;
|
||||
skb->data_len += copy;
|
||||
skb->truesize += copy;
|
||||
sk_wmem_queued_add(sk, copy);
|
||||
sk_mem_charge(sk, copy);
|
||||
WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
|
||||
TCP_SKB_CB(skb)->end_seq += copy;
|
||||
tcp_skb_pcount_set(skb, 0);
|
||||
|
||||
*size = copy;
|
||||
return skb;
|
||||
}
|
||||
|
||||
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
|
||||
size_t size, int flags)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int mss_now, size_goal;
|
||||
int err;
|
||||
ssize_t copied;
|
||||
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
|
||||
struct bio_vec bvec;
|
||||
struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
|
||||
|
||||
if (IS_ENABLED(CONFIG_DEBUG_VM) &&
|
||||
WARN_ONCE(!sendpage_ok(page),
|
||||
"page must not be a Slab one and have page_count > 0"))
|
||||
return -EINVAL;
|
||||
bvec_set_page(&bvec, page, size, offset);
|
||||
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
|
||||
|
||||
/* Wait for a connection to finish. One exception is TCP Fast Open
|
||||
* (passive side) where data is allowed to be sent before a connection
|
||||
* is fully established.
|
||||
*/
|
||||
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
|
||||
!tcp_passive_fastopen(sk)) {
|
||||
err = sk_stream_wait_connect(sk, &timeo);
|
||||
if (err != 0)
|
||||
goto out_err;
|
||||
}
|
||||
if (flags & MSG_SENDPAGE_NOTLAST)
|
||||
msg.msg_flags |= MSG_MORE;
|
||||
|
||||
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
||||
|
||||
mss_now = tcp_send_mss(sk, &size_goal, flags);
|
||||
copied = 0;
|
||||
|
||||
err = -EPIPE;
|
||||
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
|
||||
goto out_err;
|
||||
|
||||
while (size > 0) {
|
||||
struct sk_buff *skb;
|
||||
size_t copy = size;
|
||||
|
||||
skb = tcp_build_frag(sk, size_goal, flags, page, offset, ©);
|
||||
if (!skb)
|
||||
goto wait_for_space;
|
||||
|
||||
if (!copied)
|
||||
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
|
||||
|
||||
copied += copy;
|
||||
offset += copy;
|
||||
size -= copy;
|
||||
if (!size)
|
||||
goto out;
|
||||
|
||||
if (skb->len < size_goal || (flags & MSG_OOB))
|
||||
continue;
|
||||
|
||||
if (forced_push(tp)) {
|
||||
tcp_mark_push(tp, skb);
|
||||
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
|
||||
} else if (skb == tcp_send_head(sk))
|
||||
tcp_push_one(sk, mss_now);
|
||||
continue;
|
||||
|
||||
wait_for_space:
|
||||
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
||||
tcp_push(sk, flags & ~MSG_MORE, mss_now,
|
||||
TCP_NAGLE_PUSH, size_goal);
|
||||
|
||||
err = sk_stream_wait_memory(sk, &timeo);
|
||||
if (err != 0)
|
||||
goto do_error;
|
||||
|
||||
mss_now = tcp_send_mss(sk, &size_goal, flags);
|
||||
}
|
||||
|
||||
out:
|
||||
if (copied) {
|
||||
tcp_tx_timestamp(sk, sk->sk_tsflags);
|
||||
if (!(flags & MSG_SENDPAGE_NOTLAST))
|
||||
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
|
||||
}
|
||||
return copied;
|
||||
|
||||
do_error:
|
||||
tcp_remove_empty_skb(sk);
|
||||
if (copied)
|
||||
goto out;
|
||||
out_err:
|
||||
/* make sure we wake any epoll edge trigger waiter */
|
||||
if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
|
||||
sk->sk_write_space(sk);
|
||||
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
|
||||
}
|
||||
return sk_stream_error(sk, flags, err);
|
||||
return tcp_sendmsg_locked(sk, &msg, size);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(do_tcp_sendpages);
|
||||
|
||||
|
|
Loading…
Reference in New Issue