mptcp: allow collapsing consecutive sendpages on the same substream
If the current sendmsg() lands on the same subflow we used last, we can try to collapse the data. Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Christoph Paasch <cpaasch@apple.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
7a6a6cbc3e
commit
57040755a3
|
@ -122,14 +122,27 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
static inline bool mptcp_skb_can_collapse_to(const struct mptcp_sock *msk,
|
||||||
struct msghdr *msg, long *timeo)
|
const struct sk_buff *skb,
|
||||||
|
const struct mptcp_ext *mpext)
|
||||||
{
|
{
|
||||||
int mss_now = 0, size_goal = 0, ret = 0;
|
if (!tcp_skb_can_collapse_to(skb))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* can collapse only if MPTCP level sequence is in order */
|
||||||
|
return mpext && mpext->data_seq + mpext->data_len == msk->write_seq;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||||
|
struct msghdr *msg, long *timeo, int *pmss_now,
|
||||||
|
int *ps_goal)
|
||||||
|
{
|
||||||
|
int mss_now, avail_size, size_goal, ret;
|
||||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||||
struct mptcp_ext *mpext = NULL;
|
struct mptcp_ext *mpext = NULL;
|
||||||
|
struct sk_buff *skb, *tail;
|
||||||
|
bool can_collapse = false;
|
||||||
struct page_frag *pfrag;
|
struct page_frag *pfrag;
|
||||||
struct sk_buff *skb;
|
|
||||||
size_t psize;
|
size_t psize;
|
||||||
|
|
||||||
/* use the mptcp page cache so that we can easily move the data
|
/* use the mptcp page cache so that we can easily move the data
|
||||||
|
@ -145,8 +158,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||||
|
|
||||||
/* compute copy limit */
|
/* compute copy limit */
|
||||||
mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
|
mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
|
||||||
psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
|
*pmss_now = mss_now;
|
||||||
|
*ps_goal = size_goal;
|
||||||
|
avail_size = size_goal;
|
||||||
|
skb = tcp_write_queue_tail(ssk);
|
||||||
|
if (skb) {
|
||||||
|
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
|
||||||
|
|
||||||
|
/* Limit the write to the size available in the
|
||||||
|
* current skb, if any, so that we create at most a new skb.
|
||||||
|
* Explicitly tells TCP internals to avoid collapsing on later
|
||||||
|
* queue management operation, to avoid breaking the ext <->
|
||||||
|
* SSN association set here
|
||||||
|
*/
|
||||||
|
can_collapse = (size_goal - skb->len > 0) &&
|
||||||
|
mptcp_skb_can_collapse_to(msk, skb, mpext);
|
||||||
|
if (!can_collapse)
|
||||||
|
TCP_SKB_CB(skb)->eor = 1;
|
||||||
|
else
|
||||||
|
avail_size = size_goal - skb->len;
|
||||||
|
}
|
||||||
|
psize = min_t(size_t, pfrag->size - pfrag->offset, avail_size);
|
||||||
|
|
||||||
|
/* Copy to page */
|
||||||
pr_debug("left=%zu", msg_data_left(msg));
|
pr_debug("left=%zu", msg_data_left(msg));
|
||||||
psize = copy_page_from_iter(pfrag->page, pfrag->offset,
|
psize = copy_page_from_iter(pfrag->page, pfrag->offset,
|
||||||
min_t(size_t, msg_data_left(msg), psize),
|
min_t(size_t, msg_data_left(msg), psize),
|
||||||
|
@ -155,14 +189,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||||
if (!psize)
|
if (!psize)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* Mark the end of the previous write so the beginning of the
|
/* tell the TCP stack to delay the push so that we can safely
|
||||||
* next write (with its own mptcp skb extension data) is not
|
* access the skb after the sendpages call
|
||||||
* collapsed.
|
|
||||||
*/
|
*/
|
||||||
skb = tcp_write_queue_tail(ssk);
|
|
||||||
if (skb)
|
|
||||||
TCP_SKB_CB(skb)->eor = 1;
|
|
||||||
|
|
||||||
ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
|
ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
|
||||||
msg->msg_flags | MSG_SENDPAGE_NOTLAST);
|
msg->msg_flags | MSG_SENDPAGE_NOTLAST);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
|
@ -170,6 +199,18 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||||
if (unlikely(ret < psize))
|
if (unlikely(ret < psize))
|
||||||
iov_iter_revert(&msg->msg_iter, psize - ret);
|
iov_iter_revert(&msg->msg_iter, psize - ret);
|
||||||
|
|
||||||
|
/* if the tail skb extension is still the cached one, collapsing
|
||||||
|
* really happened. Note: we can't check for 'same skb' as the sk_buff
|
||||||
|
* hdr on tail can be transmitted, freed and re-allocated by the
|
||||||
|
* do_tcp_sendpages() call
|
||||||
|
*/
|
||||||
|
tail = tcp_write_queue_tail(ssk);
|
||||||
|
if (mpext && tail && mpext == skb_ext_find(tail, SKB_EXT_MPTCP)) {
|
||||||
|
WARN_ON_ONCE(!can_collapse);
|
||||||
|
mpext->data_len += ret;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
skb = tcp_write_queue_tail(ssk);
|
skb = tcp_write_queue_tail(ssk);
|
||||||
mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
|
mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
|
||||||
msk->cached_ext = NULL;
|
msk->cached_ext = NULL;
|
||||||
|
@ -185,11 +226,11 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
|
||||||
mpext->data_seq, mpext->subflow_seq, mpext->data_len,
|
mpext->data_seq, mpext->subflow_seq, mpext->data_len,
|
||||||
mpext->dsn64);
|
mpext->dsn64);
|
||||||
|
|
||||||
|
out:
|
||||||
pfrag->offset += ret;
|
pfrag->offset += ret;
|
||||||
msk->write_seq += ret;
|
msk->write_seq += ret;
|
||||||
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
|
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
|
||||||
|
|
||||||
tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -212,11 +253,11 @@ static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
|
||||||
|
|
||||||
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||||
{
|
{
|
||||||
|
int mss_now = 0, size_goal = 0, ret = 0;
|
||||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||||
struct socket *ssock;
|
struct socket *ssock;
|
||||||
size_t copied = 0;
|
size_t copied = 0;
|
||||||
struct sock *ssk;
|
struct sock *ssk;
|
||||||
int ret = 0;
|
|
||||||
long timeo;
|
long timeo;
|
||||||
|
|
||||||
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
|
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
|
||||||
|
@ -243,15 +284,19 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
|
||||||
|
|
||||||
lock_sock(ssk);
|
lock_sock(ssk);
|
||||||
while (msg_data_left(msg)) {
|
while (msg_data_left(msg)) {
|
||||||
ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
|
ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo, &mss_now,
|
||||||
|
&size_goal);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
copied += ret;
|
copied += ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (copied > 0)
|
if (copied) {
|
||||||
ret = copied;
|
ret = copied;
|
||||||
|
tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
|
||||||
|
size_goal);
|
||||||
|
}
|
||||||
|
|
||||||
ssk_check_wmem(msk, ssk);
|
ssk_check_wmem(msk, ssk);
|
||||||
release_sock(ssk);
|
release_sock(ssk);
|
||||||
|
|
Loading…
Reference in New Issue