Merge branch 'shrink-struct-ubuf_info'

Pavel Begunkov says:

====================
shrink struct ubuf_info

struct ubuf_info is large but not all fields are needed for all
cases. We have limited space in io_uring for it and large ubuf_info
prevents some struct embedding, even though we use only a subset
of the fields. It's also not very clean trying to use this typeless
extra space.

Shrink struct ubuf_info to only necessary fields used in generic paths,
namely ->callback, ->refcnt and ->flags, which take only 16 bytes. And
make MSG_ZEROCOPY and some other users to embed it into a larger struct
ubuf_info_msgzc mimicking the former ubuf_info.

Note, xen/vhost may also have some cleaning on top by creating
new structs containing ubuf_info but with proper types.
====================

Link: https://lore.kernel.org/r/cover.1663892211.git.asml.silence@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2022-09-28 18:51:27 -07:00
commit 578b054684
9 changed files with 48 additions and 35 deletions

View File

@ -62,7 +62,7 @@ struct pending_tx_info {
* ubuf_to_vif is a helper which finds the struct xenvif from a pointer
* to this field.
*/
struct ubuf_info callback_struct;
struct ubuf_info_msgzc callback_struct;
};
#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)

View File

@ -591,8 +591,8 @@ int xenvif_init_queue(struct xenvif_queue *queue)
}
for (i = 0; i < MAX_PENDING_REQS; i++) {
queue->pending_tx_info[i].callback_struct = (struct ubuf_info)
{ .callback = xenvif_zerocopy_callback,
queue->pending_tx_info[i].callback_struct = (struct ubuf_info_msgzc)
{ { .callback = xenvif_zerocopy_callback },
{ { .ctx = NULL,
.desc = i } } };
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;

View File

@ -133,7 +133,7 @@ static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
/* Find the containing VIF's structure from a pointer in pending_tx_info array
*/
static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info_msgzc *ubuf)
{
u16 pending_idx = ubuf->desc;
struct pending_tx_info *temp =
@ -1228,11 +1228,12 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
return work_done;
}
void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf,
void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf_base,
bool zerocopy_success)
{
unsigned long flags;
pending_ring_idx_t index;
struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct xenvif_queue *queue = ubuf_to_queue(ubuf);
/* This is the only place where we grab this lock, to protect callbacks
@ -1241,7 +1242,7 @@ void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf,
spin_lock_irqsave(&queue->callback_lock, flags);
do {
u16 pending_idx = ubuf->desc;
ubuf = (struct ubuf_info *) ubuf->ctx;
ubuf = (struct ubuf_info_msgzc *) ubuf->ctx;
BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
MAX_PENDING_REQS);
index = pending_index(queue->dealloc_prod);

View File

@ -118,7 +118,7 @@ struct vhost_net_virtqueue {
/* Number of XDP frames batched */
int batched_xdp;
/* an array of userspace buffers info */
struct ubuf_info *ubuf_info;
struct ubuf_info_msgzc *ubuf_info;
/* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs;
@ -382,8 +382,9 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
}
static void vhost_zerocopy_callback(struct sk_buff *skb,
struct ubuf_info *ubuf, bool success)
struct ubuf_info *ubuf_base, bool success)
{
struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
struct vhost_virtqueue *vq = ubufs->vq;
int cnt;
@ -871,7 +872,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
size_t len, total_len = 0;
int err;
struct vhost_net_ubuf_ref *ubufs;
struct ubuf_info *ubuf;
struct ubuf_info_msgzc *ubuf;
bool zcopy_used;
int sent_pkts = 0;
@ -907,14 +908,14 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
ubuf = nvq->ubuf_info + nvq->upend_idx;
vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx;
ubuf->flags = SKBFL_ZEROCOPY_FRAG;
refcount_set(&ubuf->refcnt, 1);
ubuf->ubuf.callback = vhost_zerocopy_callback;
ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG;
refcount_set(&ubuf->ubuf.refcnt, 1);
msg.msg_control = &ctl;
ctl.type = TUN_MSG_UBUF;
ctl.ptr = ubuf;
ctl.ptr = &ubuf->ubuf;
msg.msg_controllen = sizeof(ctl);
ubufs = nvq->ubufs;
atomic_inc(&ubufs->refcount);

View File

@ -533,6 +533,13 @@ enum {
struct ubuf_info {
void (*callback)(struct sk_buff *, struct ubuf_info *,
bool zerocopy_success);
refcount_t refcnt;
u8 flags;
};
struct ubuf_info_msgzc {
struct ubuf_info ubuf;
union {
struct {
unsigned long desc;
@ -545,8 +552,6 @@ struct ubuf_info {
u32 bytelen;
};
};
refcount_t refcnt;
u8 flags;
struct mmpin {
struct user_struct *user;
@ -555,6 +560,8 @@ struct ubuf_info {
};
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
#define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \
ubuf)
int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
void mm_unaccount_pinned_pages(struct mmpin *mmp);

View File

@ -1188,7 +1188,7 @@ EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{
struct ubuf_info *uarg;
struct ubuf_info_msgzc *uarg;
struct sk_buff *skb;
WARN_ON_ONCE(!in_task());
@ -1206,19 +1206,19 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return NULL;
}
uarg->callback = msg_zerocopy_callback;
uarg->ubuf.callback = msg_zerocopy_callback;
uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
uarg->len = 1;
uarg->bytelen = size;
uarg->zerocopy = 1;
uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
refcount_set(&uarg->refcnt, 1);
uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
refcount_set(&uarg->ubuf.refcnt, 1);
sock_hold(sk);
return uarg;
return &uarg->ubuf;
}
static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg)
{
return container_of((void *)uarg, struct sk_buff, cb);
}
@ -1227,6 +1227,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg)
{
if (uarg) {
struct ubuf_info_msgzc *uarg_zc;
const u32 byte_limit = 1 << 19; /* limit to a few TSO */
u32 bytelen, next;
@ -1242,8 +1243,9 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
return NULL;
}
bytelen = uarg->bytelen + size;
if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
uarg_zc = uarg_to_msgzc(uarg);
bytelen = uarg_zc->bytelen + size;
if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) {
/* TCP can create new skb to attach new uarg */
if (sk->sk_type == SOCK_STREAM)
goto new_alloc;
@ -1251,11 +1253,11 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
}
next = (u32)atomic_read(&sk->sk_zckey);
if ((u32)(uarg->id + uarg->len) == next) {
if (mm_account_pinned_pages(&uarg->mmp, size))
if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
if (mm_account_pinned_pages(&uarg_zc->mmp, size))
return NULL;
uarg->len++;
uarg->bytelen = bytelen;
uarg_zc->len++;
uarg_zc->bytelen = bytelen;
atomic_set(&sk->sk_zckey, ++next);
/* no extra ref when appending to datagram (MSG_MORE) */
@ -1291,7 +1293,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
return true;
}
static void __msg_zerocopy_callback(struct ubuf_info *uarg)
static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg)
{
struct sk_buff *tail, *skb = skb_from_uarg(uarg);
struct sock_exterr_skb *serr;
@ -1344,19 +1346,21 @@ release:
void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
bool success)
{
uarg->zerocopy = uarg->zerocopy & success;
struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);
uarg_zc->zerocopy = uarg_zc->zerocopy & success;
if (refcount_dec_and_test(&uarg->refcnt))
__msg_zerocopy_callback(uarg);
__msg_zerocopy_callback(uarg_zc);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
struct sock *sk = skb_from_uarg(uarg)->sk;
struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk;
atomic_dec(&sk->sk_zckey);
uarg->len--;
uarg_to_msgzc(uarg)->len--;
if (have_uref)
msg_zerocopy_callback(NULL, uarg, true);

View File

@ -1043,7 +1043,7 @@ static int __ip_append_data(struct sock *sk,
paged = true;
zc = true;
} else {
uarg->zerocopy = 0;
uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref);
}
}

View File

@ -1239,7 +1239,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
}
zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc)
uarg->zerocopy = 0;
uarg_to_msgzc(uarg)->zerocopy = 0;
}
}

View File

@ -1567,7 +1567,7 @@ emsgsize:
paged = true;
zc = true;
} else {
uarg->zerocopy = 0;
uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref);
}
}