mptcp: allow collapsing consecutive sendpages on the same substream
If the current sendmsg() lands on the same subflow we used last, we can try to collapse the data. Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Christoph Paasch <cpaasch@apple.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
7a6a6cbc3e
commit
57040755a3
|
@ -122,14 +122,27 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||
struct msghdr *msg, long *timeo)
|
||||
static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
|
||||
const struct sk_buff *skb,
|
||||
const struct mptcp_ext *mpext)
|
||||
{
|
||||
int mss_now = 0, size_goal = 0, ret = 0;
|
||||
if (!tcp_skb_can_collapse_to(skb))
|
||||
return false;
|
||||
|
||||
/* can collapse only if MPTCP level sequence is in order */
|
||||
return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
|
||||
}
|
||||
|
||||
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||
struct msghdr *msg, long *timeo, int *pmss_now,
|
||||
int *ps_goal)
|
||||
{
|
||||
int mss_now, avail_size, size_goal, ret;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct mptcp_ext *mpext = NULL;
|
||||
struct sk_buff *skb, *tail;
|
||||
bool can_collapse = false;
|
||||
struct page_frag *pfrag;
|
||||
struct sk_buff *skb;
|
||||
size_t psize;
|
||||
|
||||
/* use the mptcp page cache so that we can easily move the data
|
||||
|
@ -145,8 +158,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
|||
|
||||
/* compute copy limit */
|
||||
mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
|
||||
psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
|
||||
*pmss_now = mss_now;
|
||||
*ps_goal = size_goal;
|
||||
avail_size = size_goal;
|
||||
skb = tcp_write_queue_tail(ssk);
|
||||
if (skb) {
|
||||
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
|
||||
|
||||
/* Limit the write to the size available in the
|
||||
* current skb, if any, so that we create at most a new skb.
|
||||
* Explicitly tells TCP internals to avoid collapsing on later
|
||||
* queue management operation, to avoid breaking the ext <->
|
||||
* SSN association set here
|
||||
*/
|
||||
can_collapse = (size_goal - skb->len > 0) &&
|
||||
mptcp_skb_can_collapse_to(msk, skb, mpext);
|
||||
if (!can_collapse)
|
||||
TCP_SKB_CB(skb)->eor = 1;
|
||||
else
|
||||
avail_size = size_goal - skb->len;
|
||||
}
|
||||
psize = min_t(size_t, pfrag->size - pfrag->offset, avail_size);
|
||||
|
||||
/* Copy to page */
|
||||
pr_debug("left=%zu", msg_data_left(msg));
|
||||
psize = copy_page_from_iter(pfrag->page, pfrag->offset,
|
||||
min_t(size_t, msg_data_left(msg), psize),
|
||||
|
@ -155,14 +189,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
|||
if (!psize)
|
||||
return -EINVAL;
|
||||
|
||||
/* Mark the end of the previous write so the beginning of the
|
||||
* next write (with its own mptcp skb extension data) is not
|
||||
* collapsed.
|
||||
/* tell the TCP stack to delay the push so that we can safely
|
||||
* access the skb after the sendpages call
|
||||
*/
|
||||
skb = tcp_write_queue_tail(ssk);
|
||||
if (skb)
|
||||
TCP_SKB_CB(skb)->eor = 1;
|
||||
|
||||
ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
|
||||
msg->msg_flags | MSG_SENDPAGE_NOTLAST);
|
||||
if (ret <= 0)
|
||||
|
@ -170,6 +199,18 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
|||
if (unlikely(ret < psize))
|
||||
iov_iter_revert(&msg->msg_iter, psize - ret);
|
||||
|
||||
/* if the tail skb extension is still the cached one, collapsing
|
||||
* really happened. Note: we can't check for 'same skb' as the sk_buff
|
||||
* hdr on tail can be transmitted, freed and re-allocated by the
|
||||
* do_tcp_sendpages() call
|
||||
*/
|
||||
tail = tcp_write_queue_tail(ssk);
|
||||
if (mpext && tail && mpext == skb_ext_find(tail, SKB_EXT_MPTCP)) {
|
||||
WARN_ON_ONCE(!can_collapse);
|
||||
mpext->data_len += ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
skb = tcp_write_queue_tail(ssk);
|
||||
mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
|
||||
msk->cached_ext = NULL;
|
||||
|
@ -185,11 +226,11 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
|||
mpext->data_seq, mpext->subflow_seq, mpext->data_len,
|
||||
mpext->dsn64);
|
||||
|
||||
out:
|
||||
pfrag->offset += ret;
|
||||
msk->write_seq += ret;
|
||||
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
|
||||
|
||||
tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -212,11 +253,11 @@ static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
|
|||
|
||||
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||
{
|
||||
int mss_now = 0, size_goal = 0, ret = 0;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct socket *ssock;
|
||||
size_t copied = 0;
|
||||
struct sock *ssk;
|
||||
int ret = 0;
|
||||
long timeo;
|
||||
|
||||
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
|
||||
|
@ -243,15 +284,19 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
|||
|
||||
lock_sock(ssk);
|
||||
while (msg_data_left(msg)) {
|
||||
ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
|
||||
ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
|
||||
&size_goal);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
copied += ret;
|
||||
}
|
||||
|
||||
if (copied > 0)
|
||||
if (copied) {
|
||||
ret = copied;
|
||||
tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
|
||||
size_goal);
|
||||
}
|
||||
|
||||
ssk_check_wmem(msk, ssk);
|
||||
release_sock(ssk);
|
||||
|
|
Loading…
Reference in New Issue