netns/mbuf: add a per net namespace ring buffer
add a log per netns log ring buffer which's reading side interface is: /proc/net/twatcher/log this is backport from tk3, do some clean-code tasks Reviewed-by: kernelxing <kernelxing@tencent.com> Signed-off-by: MengEn Sun <mengensun@tencent.com>
This commit is contained in:
parent
713a7bc81c
commit
f660cd0791
|
@ -63,6 +63,62 @@ static int seq_open_net(struct inode *inode, struct file *file)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETNS_MBUF
|
||||||
|
/* token from seq_open_net, all is same except the private is
|
||||||
|
* alloc by vmalloc, why?
|
||||||
|
*
|
||||||
|
* sameone may need a big private, wasting continuous phy mem
|
||||||
|
* they can use this function to use vmalloc private
|
||||||
|
*
|
||||||
|
* from now if you using this open abi place write a write
|
||||||
|
* fops like proc_simple_write we delete the pde->write check
|
||||||
|
*/
|
||||||
|
void *seq_open_net_large_private(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct net *net;
|
||||||
|
struct seq_file *seq;
|
||||||
|
struct seq_net_private *p;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
unsigned int state_size = PDE(inode)->state_size;
|
||||||
|
|
||||||
|
WARN_ON_ONCE(state_size < sizeof(struct seq_net_private));
|
||||||
|
|
||||||
|
net = get_proc_net(inode);
|
||||||
|
if (!net) {
|
||||||
|
ret = -ENXIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
p = vmalloc(state_size);
|
||||||
|
if (!p) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto put_out;
|
||||||
|
}
|
||||||
|
memset(p, 0, state_size);
|
||||||
|
|
||||||
|
ret = seq_open(file, PDE(inode)->seq_ops);
|
||||||
|
if (ret < 0)
|
||||||
|
goto free_out;
|
||||||
|
|
||||||
|
seq = file->private_data;
|
||||||
|
seq->private = (void *)p;
|
||||||
|
|
||||||
|
#ifdef CONFIG_NET_NS
|
||||||
|
p->net = net;
|
||||||
|
#endif
|
||||||
|
return p;
|
||||||
|
|
||||||
|
free_out:
|
||||||
|
vfree(p);
|
||||||
|
put_out:
|
||||||
|
put_net(net);
|
||||||
|
out:
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(seq_open_net_large_private);
|
||||||
|
#endif
|
||||||
|
|
||||||
static void seq_file_net_put_net(struct seq_file *seq)
|
static void seq_file_net_put_net(struct seq_file *seq)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_NET_NS
|
#ifdef CONFIG_NET_NS
|
||||||
|
@ -83,6 +139,31 @@ static int seq_release_net(struct inode *ino, struct file *f)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETNS_MBUF
|
||||||
|
/* add a ext-abi to allow someone define the fops by themself, this is all
|
||||||
|
* alike proc_create_net_data except has a extra f_ops parameter
|
||||||
|
*/
|
||||||
|
struct proc_dir_entry *proc_create_net_data_ops(const char *name, umode_t mode,
|
||||||
|
struct proc_dir_entry *parent,
|
||||||
|
const struct seq_operations *seq_ops,
|
||||||
|
unsigned int state_size, void *data,
|
||||||
|
const struct proc_ops *proc_ops)
|
||||||
|
{
|
||||||
|
struct proc_dir_entry *p;
|
||||||
|
|
||||||
|
p = proc_create_reg(name, mode, &parent, data);
|
||||||
|
if (!p)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
pde_force_lookup(p);
|
||||||
|
p->proc_ops = proc_ops;
|
||||||
|
p->seq_ops = seq_ops;
|
||||||
|
p->state_size = state_size;
|
||||||
|
return proc_register(parent, p);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(proc_create_net_data_ops);
|
||||||
|
#endif
|
||||||
|
|
||||||
static const struct proc_ops proc_net_seq_ops = {
|
static const struct proc_ops proc_net_seq_ops = {
|
||||||
.proc_open = seq_open_net,
|
.proc_open = seq_open_net,
|
||||||
.proc_read = seq_read,
|
.proc_read = seq_read,
|
||||||
|
|
|
@ -49,7 +49,7 @@ struct mbuf_slot {
|
||||||
seqlock_t slot_lock;
|
seqlock_t slot_lock;
|
||||||
/* rate limit */
|
/* rate limit */
|
||||||
struct ratelimit_state ratelimit;
|
struct ratelimit_state ratelimit;
|
||||||
struct cgroup *owner;
|
void *owner;
|
||||||
const struct mbuf_operations *ops;
|
const struct mbuf_operations *ops;
|
||||||
struct mbuf_ring *mring;
|
struct mbuf_ring *mring;
|
||||||
};
|
};
|
||||||
|
@ -62,7 +62,7 @@ struct mbuf_operations {
|
||||||
u32 (*next)(struct mbuf_ring *mring, u32 idx);
|
u32 (*next)(struct mbuf_ring *mring, u32 idx);
|
||||||
|
|
||||||
/* write message */
|
/* write message */
|
||||||
ssize_t (*write)(struct cgroup *cg, const char *fmt, va_list);
|
ssize_t (*write)(struct mbuf_slot *mbuf, const char *fmt, va_list args);
|
||||||
} ____cacheline_aligned;
|
} ____cacheline_aligned;
|
||||||
|
|
||||||
|
|
||||||
|
@ -70,9 +70,13 @@ void __init mbuf_bmap_init(void);
|
||||||
void __init setup_mbuf(void);
|
void __init setup_mbuf(void);
|
||||||
|
|
||||||
struct mbuf_slot *mbuf_slot_alloc(struct cgroup *cg);
|
struct mbuf_slot *mbuf_slot_alloc(struct cgroup *cg);
|
||||||
|
struct mbuf_slot *mbuf_slot_alloc_v2(void *owner, struct mbuf_operations *ops);
|
||||||
void mbuf_free(struct cgroup *cg);
|
void mbuf_free(struct cgroup *cg);
|
||||||
|
|
||||||
ssize_t mbuf_print(struct cgroup *cgrp, const char *fmt, ...);
|
ssize_t mbuf_print(struct cgroup *cgrp, const char *fmt, ...);
|
||||||
void snapshot_mbuf(struct mbuf_slot *, struct mbuf_slot*, seqlock_t *);
|
void snapshot_mbuf(struct mbuf_slot *, struct mbuf_slot*, seqlock_t *);
|
||||||
u32 get_mbuf_slot_len(void);
|
u32 get_mbuf_slot_len(void);
|
||||||
|
void mbuf_free_slot(struct mbuf_slot *slot);
|
||||||
|
void mbuf_reset(struct mbuf_slot *mbuf);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -161,6 +161,15 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
|
||||||
void arch_report_meminfo(struct seq_file *m);
|
void arch_report_meminfo(struct seq_file *m);
|
||||||
void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task);
|
void arch_proc_pid_thread_features(struct seq_file *m, struct task_struct *task);
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETNS_MBUF
|
||||||
|
void *seq_open_net_large_private(struct inode *inode, struct file *file);
|
||||||
|
struct proc_dir_entry *proc_create_net_data_ops(const char *name, umode_t mode,
|
||||||
|
struct proc_dir_entry *parent,
|
||||||
|
const struct seq_operations *seq_ops,
|
||||||
|
unsigned int state_size, void *data,
|
||||||
|
const struct proc_ops *proc_ops);
|
||||||
|
#endif
|
||||||
|
|
||||||
#else /* CONFIG_PROC_FS */
|
#else /* CONFIG_PROC_FS */
|
||||||
|
|
||||||
static inline void proc_root_init(void)
|
static inline void proc_root_init(void)
|
||||||
|
|
|
@ -43,7 +43,9 @@
|
||||||
#include <linux/skbuff.h>
|
#include <linux/skbuff.h>
|
||||||
#include <linux/notifier.h>
|
#include <linux/notifier.h>
|
||||||
#include <linux/xarray.h>
|
#include <linux/xarray.h>
|
||||||
|
#ifdef CONFIG_NETNS_MBUF
|
||||||
|
#include <net/netns_mbuf.h>
|
||||||
|
#endif
|
||||||
struct user_namespace;
|
struct user_namespace;
|
||||||
struct proc_dir_entry;
|
struct proc_dir_entry;
|
||||||
struct net_device;
|
struct net_device;
|
||||||
|
@ -190,6 +192,9 @@ struct net {
|
||||||
#if IS_ENABLED(CONFIG_SMC)
|
#if IS_ENABLED(CONFIG_SMC)
|
||||||
struct netns_smc smc;
|
struct netns_smc smc;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_NETNS_MBUF
|
||||||
|
struct net_mbuf mbuf;
|
||||||
|
#endif
|
||||||
} __randomize_layout;
|
} __randomize_layout;
|
||||||
|
|
||||||
#include <linux/seq_file_net.h>
|
#include <linux/seq_file_net.h>
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
*
|
||||||
|
* make mbuf can be used by net namespace
|
||||||
|
*
|
||||||
|
* Author: mengensun <mengensun@tencent.com>
|
||||||
|
* Copyright (C) 2024 Tencent, Inc
|
||||||
|
*/
|
||||||
|
#ifndef __NETNS_MBUF
|
||||||
|
#define __NETNS_MBUF
|
||||||
|
|
||||||
|
#include<linux/proc_fs.h>
|
||||||
|
#include<linux/mbuf.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_NETNS_MBUF
|
||||||
|
struct net_mbuf {
|
||||||
|
struct proc_dir_entry *twatcher;
|
||||||
|
struct proc_dir_entry *log;
|
||||||
|
struct mbuf_slot *slot;
|
||||||
|
};
|
||||||
|
|
||||||
|
int inet_mbuf_init(void);
|
||||||
|
void inet_mbuf_exit(void);
|
||||||
|
ssize_t net_mbuf_print(struct net *net, const char *fmt, ...);
|
||||||
|
#else
|
||||||
|
static __always_inline int inet_mbuf_init(void) {return 0; }
|
||||||
|
static __always_inline void inet_mbuf_exit(void) {}
|
||||||
|
static __always_inline ssize_t net_mbuf_print(struct net *net, const char *fmt, ...) {return 0; };
|
||||||
|
#endif
|
||||||
|
#endif
|
|
@ -252,7 +252,7 @@ static int mbuf_prepare(struct mbuf_ring *mring, u32 msg_size)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Write monitor buffer message */
|
/* Write monitor buffer message */
|
||||||
static ssize_t do_mbuf_write(struct cgroup *cg, char *buffer, size_t size)
|
static ssize_t do_mbuf_write(struct mbuf_slot *mbuf, char *buffer, size_t size)
|
||||||
{
|
{
|
||||||
struct mbuf_ring *mring;
|
struct mbuf_ring *mring;
|
||||||
struct mbuf_ring_desc *desc;
|
struct mbuf_ring_desc *desc;
|
||||||
|
@ -265,13 +265,13 @@ static ssize_t do_mbuf_write(struct cgroup *cg, char *buffer, size_t size)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
mring = cg->mbuf->mring;
|
mring = mbuf->mring;
|
||||||
len = sizeof(struct mbuf_ring_desc) + size;
|
len = sizeof(struct mbuf_ring_desc) + size;
|
||||||
|
|
||||||
write_seqlock_irqsave(&cg->mbuf->slot_lock, flags);
|
write_seqlock_irqsave(&mbuf->slot_lock, flags);
|
||||||
|
|
||||||
if (mbuf_prepare(mring, len)) {
|
if (mbuf_prepare(mring, len)) {
|
||||||
write_sequnlock_irqrestore(&cg->mbuf->slot_lock, flags);
|
write_sequnlock_irqrestore(&mbuf->slot_lock, flags);
|
||||||
pr_err("mbuf: Can not find enough space.\n");
|
pr_err("mbuf: Can not find enough space.\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -290,20 +290,23 @@ static ssize_t do_mbuf_write(struct cgroup *cg, char *buffer, size_t size)
|
||||||
mring->next_idx += desc->len;
|
mring->next_idx += desc->len;
|
||||||
mring->next_seq++;
|
mring->next_seq++;
|
||||||
|
|
||||||
write_sequnlock_irqrestore(&cg->mbuf->slot_lock, flags);
|
write_sequnlock_irqrestore(&mbuf->slot_lock, flags);
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mbuf_reset(struct mbuf_ring *mring)
|
void mbuf_reset(struct mbuf_slot *mbuf)
|
||||||
{
|
{
|
||||||
mring->first_idx = mring->base_idx;
|
write_seqlock(&mbuf->slot_lock);
|
||||||
mring->first_seq = 0;
|
mbuf->mring->first_idx = mbuf->mring->base_idx;
|
||||||
mring->next_idx = mring->base_idx;
|
mbuf->mring->first_seq = 0;
|
||||||
mring->next_seq = 0;
|
mbuf->mring->next_idx = mbuf->mring->base_idx;
|
||||||
|
mbuf->mring->next_seq = 0;
|
||||||
|
write_sequnlock(&mbuf->slot_lock);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(mbuf_reset);
|
||||||
|
|
||||||
static ssize_t mbuf_write(struct cgroup *cg, const char *fmt, va_list args)
|
static ssize_t mbuf_write(struct mbuf_slot *mbuf, const char *fmt, va_list args)
|
||||||
{
|
{
|
||||||
static char buf[MBUF_MSG_LEN_MAX];
|
static char buf[MBUF_MSG_LEN_MAX];
|
||||||
char *text = buf;
|
char *text = buf;
|
||||||
|
@ -313,7 +316,7 @@ static ssize_t mbuf_write(struct cgroup *cg, const char *fmt, va_list args)
|
||||||
t_len = vscnprintf(text, sizeof(buf), fmt, args);
|
t_len = vscnprintf(text, sizeof(buf), fmt, args);
|
||||||
|
|
||||||
/* Write string to mbuf */
|
/* Write string to mbuf */
|
||||||
ret = do_mbuf_write(cg, text, t_len);
|
ret = do_mbuf_write(mbuf, text, t_len);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -335,11 +338,17 @@ static int get_next_mbuf_id(unsigned long *addr, u32 start)
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mbuf_slot_init(struct mbuf_slot *mb, struct cgroup *cg, u32 index)
|
static void mbuf_slot_init(struct mbuf_slot *mb,
|
||||||
|
void *owner, u32 index, struct mbuf_operations *ops)
|
||||||
{
|
{
|
||||||
mb->owner = cg;
|
mb->owner = owner;
|
||||||
mb->idx = index;
|
mb->idx = index;
|
||||||
mb->ops = &mbuf_ops;
|
|
||||||
|
if (!ops)
|
||||||
|
mb->ops = &mbuf_ops;
|
||||||
|
else
|
||||||
|
mb->ops = ops;
|
||||||
|
|
||||||
seqlock_init(&mb->slot_lock);
|
seqlock_init(&mb->slot_lock);
|
||||||
ratelimit_state_init(&mb->ratelimit, 5 * HZ, 50);
|
ratelimit_state_init(&mb->ratelimit, 5 * HZ, 50);
|
||||||
|
|
||||||
|
@ -349,10 +358,10 @@ static void mbuf_slot_init(struct mbuf_slot *mb, struct cgroup *cg, u32 index)
|
||||||
+ sizeof(struct mbuf_ring);
|
+ sizeof(struct mbuf_ring);
|
||||||
mb->mring->end_idx = (index + 1) * g_mbuf.mbuf_size_per_cg - 1;
|
mb->mring->end_idx = (index + 1) * g_mbuf.mbuf_size_per_cg - 1;
|
||||||
|
|
||||||
mbuf_reset(mb->mring);
|
mbuf_reset(mb);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mbuf_slot *mbuf_slot_alloc(struct cgroup *cg)
|
struct mbuf_slot *mbuf_slot_alloc_v2(void *owner, struct mbuf_operations *ops)
|
||||||
{
|
{
|
||||||
struct mbuf_slot *mb;
|
struct mbuf_slot *mb;
|
||||||
u32 index = 0;
|
u32 index = 0;
|
||||||
|
@ -401,26 +410,38 @@ again:
|
||||||
g_mbuf.mbuf_next_id = index;
|
g_mbuf.mbuf_next_id = index;
|
||||||
|
|
||||||
mb = (struct mbuf_slot *)(g_mbuf.mbuf + index * g_mbuf.mbuf_size_per_cg);
|
mb = (struct mbuf_slot *)(g_mbuf.mbuf + index * g_mbuf.mbuf_size_per_cg);
|
||||||
mbuf_slot_init(mb, cg, index);
|
mbuf_slot_init(mb, owner, index, ops);
|
||||||
g_mbuf.mbuf_frees--;
|
g_mbuf.mbuf_frees--;
|
||||||
|
|
||||||
spin_unlock_irqrestore(&g_mbuf.mbuf_lock, flags);
|
spin_unlock_irqrestore(&g_mbuf.mbuf_lock, flags);
|
||||||
|
|
||||||
return mb;
|
return mb;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(mbuf_slot_alloc_v2);
|
||||||
|
|
||||||
void mbuf_free(struct cgroup *cg)
|
struct mbuf_slot *mbuf_slot_alloc(struct cgroup *cg)
|
||||||
|
{
|
||||||
|
return mbuf_slot_alloc_v2((void *)cg, NULL);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(mbuf_slot_alloc);
|
||||||
|
|
||||||
|
void mbuf_free_slot(struct mbuf_slot *slot)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&g_mbuf.mbuf_lock, flags);
|
spin_lock_irqsave(&g_mbuf.mbuf_lock, flags);
|
||||||
|
|
||||||
/* Make current idx the next available buffer */
|
/* Make current idx the next available buffer */
|
||||||
g_mbuf.mbuf_next_id = cg->mbuf->idx;
|
g_mbuf.mbuf_next_id = slot->idx;
|
||||||
__clear_bit(g_mbuf.mbuf_next_id, g_mbuf.mbuf_bitmap);
|
__clear_bit(g_mbuf.mbuf_next_id, g_mbuf.mbuf_bitmap);
|
||||||
|
|
||||||
g_mbuf.mbuf_frees++;
|
g_mbuf.mbuf_frees++;
|
||||||
spin_unlock_irqrestore(&g_mbuf.mbuf_lock, flags);
|
spin_unlock_irqrestore(&g_mbuf.mbuf_lock, flags);
|
||||||
|
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(mbuf_free_slot);
|
||||||
|
|
||||||
|
void mbuf_free(struct cgroup *cg)
|
||||||
|
{
|
||||||
|
mbuf_free_slot(cg->mbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 rd_mbuf_next(struct mbuf_ring *mring, u32 curr_idx)
|
static u32 rd_mbuf_next(struct mbuf_ring *mring, u32 curr_idx)
|
||||||
|
|
10
net/Kconfig
10
net/Kconfig
|
@ -124,6 +124,16 @@ source "net/mptcp/Kconfig"
|
||||||
|
|
||||||
endif # if INET
|
endif # if INET
|
||||||
|
|
||||||
|
config NETNS_MBUF
|
||||||
|
bool "attach a mbuf to net namespace"
|
||||||
|
default y
|
||||||
|
depends on RQM && INET && PROC_FS
|
||||||
|
help
|
||||||
|
this allows attach a mbuf to each net namespace. mbuf is a ring
|
||||||
|
buffer of log, you can used to print log to it.
|
||||||
|
|
||||||
|
if you are unsure how to answer this question, answer N.
|
||||||
|
|
||||||
config NETWORK_SECMARK
|
config NETWORK_SECMARK
|
||||||
bool "Security Marking"
|
bool "Security Marking"
|
||||||
help
|
help
|
||||||
|
|
|
@ -40,3 +40,4 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
|
||||||
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
|
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
|
||||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
|
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
|
||||||
obj-$(CONFIG_OF) += of_net.o
|
obj-$(CONFIG_OF) += of_net.o
|
||||||
|
obj-$(CONFIG_NETNS_MBUF) += netns_mbuf.o
|
||||||
|
|
|
@ -0,0 +1,275 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/* make mbuf can be used by net namespace
|
||||||
|
*
|
||||||
|
* Author: mengensun <mengensun@tencent.com>
|
||||||
|
* Author: yuehongwu <yuehongwu@tencent.com>
|
||||||
|
* Copyright (C) 2024 Tencent, Inc
|
||||||
|
*/
|
||||||
|
#include<linux/cgroup.h>
|
||||||
|
#include<linux/mbuf.h>
|
||||||
|
#include<linux/proc_fs.h>
|
||||||
|
|
||||||
|
#include<net/net_namespace.h>
|
||||||
|
#include<net/netns/generic.h>
|
||||||
|
|
||||||
|
struct mbuf_seq_data {
|
||||||
|
struct seq_net_private snp;
|
||||||
|
struct mbuf_user_desc udesc;
|
||||||
|
struct mbuf_slot snapshot[];
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct mbuf_slot *get_net_mbuf(struct net *net)
|
||||||
|
{
|
||||||
|
return net->mbuf.slot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* not controlled by sysctl_qos_mbuf_enable because we will
|
||||||
|
* have a /proc/net/ipv4/netlat/enable in later patch
|
||||||
|
*/
|
||||||
|
ssize_t net_mbuf_print(struct net *net, const char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list args;
|
||||||
|
struct mbuf_slot *slot;
|
||||||
|
|
||||||
|
slot = net->mbuf.slot;
|
||||||
|
if (!slot || !__ratelimit(&slot->ratelimit))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
va_start(args, fmt);
|
||||||
|
slot->ops->write(slot, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
out:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(net_mbuf_print);
|
||||||
|
|
||||||
|
/* udesc is the user side interface, used to get data from mbuf,
|
||||||
|
* we can alloc a udesc per user, not to alloc a udesc and bind
|
||||||
|
* to mbuf when user accessing mbuf.
|
||||||
|
*
|
||||||
|
* seq file private data is the ideal place to hold the udesc
|
||||||
|
* if we put udesc in seq file private data all things is simple
|
||||||
|
*/
|
||||||
|
static void *netns_mbuf_start(struct seq_file *s, loff_t *pos)
|
||||||
|
{
|
||||||
|
u32 index;
|
||||||
|
struct mbuf_user_desc *udesc;
|
||||||
|
struct mbuf_seq_data *pd;
|
||||||
|
|
||||||
|
pd = s->private;
|
||||||
|
udesc = &pd->udesc;
|
||||||
|
index = *pos;
|
||||||
|
|
||||||
|
/* why: see seq_mbuf_open */
|
||||||
|
if (!pd->snapshot->mring)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* If already reach end, just return */
|
||||||
|
if (index && index == pd->snapshot->mring->next_idx)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
udesc->user_idx = pd->snapshot->mring->first_idx;
|
||||||
|
udesc->user_seq = pd->snapshot->mring->first_seq;
|
||||||
|
|
||||||
|
/* Maybe reach end or empty */
|
||||||
|
if (udesc->user_idx == pd->snapshot->mring->next_idx)
|
||||||
|
return NULL;
|
||||||
|
return udesc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *netns_mbuf_next(struct seq_file *s, void *v, loff_t *pos)
|
||||||
|
{
|
||||||
|
struct mbuf_seq_data *pd;
|
||||||
|
struct mbuf_user_desc *udesc = v;
|
||||||
|
|
||||||
|
pd = s->private;
|
||||||
|
|
||||||
|
/* why: see seq_mbuf_open */
|
||||||
|
if (!pd->snapshot->mring)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
udesc->user_idx = pd->snapshot->ops->next(pd->snapshot->mring,
|
||||||
|
udesc->user_idx);
|
||||||
|
*pos = udesc->user_idx;
|
||||||
|
if (udesc->user_idx == pd->snapshot->mring->next_idx)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return udesc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void netns_mbuf_stop(struct seq_file *s, void *v) { }
|
||||||
|
|
||||||
|
static int netns_mbuf_show(struct seq_file *s, void *v)
|
||||||
|
{
|
||||||
|
ssize_t ret;
|
||||||
|
struct mbuf_seq_data *pd;
|
||||||
|
struct mbuf_user_desc *udesc = (struct mbuf_user_desc *)v;
|
||||||
|
|
||||||
|
pd = s->private;
|
||||||
|
|
||||||
|
/* why: see seq_mbuf_open */
|
||||||
|
if (!pd->snapshot->mring)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
memset(udesc->buf, 0, sizeof(udesc->buf));
|
||||||
|
ret = pd->snapshot->ops->read(pd->snapshot, udesc);
|
||||||
|
if (ret > 0)
|
||||||
|
seq_printf(s, "%s", udesc->buf);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int seq_mbuf_open(struct inode *inode, struct file *file)
|
||||||
|
{
|
||||||
|
struct mbuf_seq_data *p;
|
||||||
|
struct mbuf_slot *mbuf;
|
||||||
|
|
||||||
|
p = seq_open_net_large_private(inode, file);
|
||||||
|
|
||||||
|
if (IS_ERR(p))
|
||||||
|
return PTR_ERR(p);
|
||||||
|
|
||||||
|
mbuf = get_net_mbuf(p->snp.net);
|
||||||
|
/* netns may have no mbuf attached, because the mbuf
|
||||||
|
* pool has a max num
|
||||||
|
* here we let file open success, so, seq_ops must
|
||||||
|
* check mring point
|
||||||
|
*
|
||||||
|
* btw: we memzerod the private in
|
||||||
|
* seq_open_net_large_private
|
||||||
|
*/
|
||||||
|
if (!mbuf)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
snapshot_mbuf(p->snapshot, mbuf, &mbuf->slot_lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* this function is token from seq_release_net, all is the
|
||||||
|
* same except for using **vfree** to free the private
|
||||||
|
*/
|
||||||
|
static int seq_mbuf_release(struct inode *ino, struct file *f)
|
||||||
|
{
|
||||||
|
struct seq_file *seq = f->private_data;
|
||||||
|
|
||||||
|
put_net(seq_file_net(seq));
|
||||||
|
vfree(seq->private);
|
||||||
|
seq->private = NULL;
|
||||||
|
seq_release(ino, f);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* when write clear the data */
|
||||||
|
ssize_t seq_mbuf_write(struct file *f, const char __user *ubuf,
|
||||||
|
size_t size, loff_t *_pos)
|
||||||
|
{
|
||||||
|
struct seq_file *seq = f->private_data;
|
||||||
|
struct mbuf_seq_data *p;
|
||||||
|
struct mbuf_slot *mb;
|
||||||
|
|
||||||
|
p = seq->private;
|
||||||
|
mb = get_net_mbuf(p->snp.net);
|
||||||
|
|
||||||
|
/* the netns not attached mbuf */
|
||||||
|
if (!mb)
|
||||||
|
return size;
|
||||||
|
|
||||||
|
mbuf_reset(mb);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* seq_read have a mutex lock hold when called thoes function
|
||||||
|
* while the mutex lock is bind to struct file, not to inode,
|
||||||
|
* that mutex lock can control mutex access to mbuf among tasks
|
||||||
|
* which have the same file object (eg: muti-threads of
|
||||||
|
* a process)
|
||||||
|
*
|
||||||
|
* if there are muti-process access the mbuf, there have no
|
||||||
|
* mutex accessing.
|
||||||
|
*/
|
||||||
|
static const struct seq_operations mbuf_seq_ops = {
|
||||||
|
.show = netns_mbuf_show,
|
||||||
|
.start = netns_mbuf_start,
|
||||||
|
.next = netns_mbuf_next,
|
||||||
|
.stop = netns_mbuf_stop,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct proc_ops mbuf_seq_fops = {
|
||||||
|
.proc_open = seq_mbuf_open,
|
||||||
|
.proc_read = seq_read,
|
||||||
|
.proc_write = seq_mbuf_write,
|
||||||
|
.proc_lseek = seq_lseek,
|
||||||
|
.proc_release = seq_mbuf_release,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __net_init net_mbuf_init(struct net *net)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
/* if mbuf alloc failed, make the netns create success
|
||||||
|
*
|
||||||
|
* returning error here will put a limit on max netns
|
||||||
|
* can be created on current system
|
||||||
|
*
|
||||||
|
* btw: mbuf_slot has a max num 1024 for now, if mbuf_slot
|
||||||
|
* is all used, more allocing may failed, what we can do
|
||||||
|
* is make usr interface not changed, and make netlat
|
||||||
|
* `speak nothing`
|
||||||
|
* cgroup is used for kabi
|
||||||
|
*/
|
||||||
|
net->mbuf.slot = mbuf_slot_alloc_v2((void *)net, NULL);
|
||||||
|
if (!net->mbuf.slot)
|
||||||
|
pr_err("fail alloc mbuf");
|
||||||
|
|
||||||
|
net->mbuf.twatcher = proc_net_mkdir(net, "twatcher", net->proc_net);
|
||||||
|
if (!net->mbuf.twatcher) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto free_mbuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
net->mbuf.log = proc_create_net_data_ops("log", S_IFREG | 0644,
|
||||||
|
net->mbuf.twatcher,
|
||||||
|
&mbuf_seq_ops,
|
||||||
|
sizeof(struct mbuf_seq_data) + get_mbuf_slot_len(),
|
||||||
|
NULL, &mbuf_seq_fops);
|
||||||
|
if (!net->mbuf.log) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto remove_watcher;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
remove_watcher:
|
||||||
|
remove_proc_entry("twatcher", net->proc_net);
|
||||||
|
|
||||||
|
free_mbuf:
|
||||||
|
if (net->mbuf.slot)
|
||||||
|
mbuf_free_slot(net->mbuf.slot);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __net_exit net_mbuf_exit(struct net *net)
|
||||||
|
{
|
||||||
|
remove_proc_entry("log", net->mbuf.log);
|
||||||
|
remove_proc_entry("twatcher", net->mbuf.twatcher);
|
||||||
|
|
||||||
|
/* if mbuf allocate failed, no need to free */
|
||||||
|
if (!net->mbuf.slot)
|
||||||
|
return;
|
||||||
|
mbuf_free_slot(net->mbuf.slot);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct pernet_operations net_mbuf_ops = {
|
||||||
|
.init = net_mbuf_init,
|
||||||
|
.exit = net_mbuf_exit,
|
||||||
|
};
|
||||||
|
|
||||||
|
int inet_mbuf_init(void)
|
||||||
|
{
|
||||||
|
return register_pernet_subsys(&net_mbuf_ops);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(inet_mbuf_init);
|
||||||
|
|
||||||
|
void inet_mbuf_exit(void)
|
||||||
|
{
|
||||||
|
unregister_pernet_subsys(&net_mbuf_ops);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(inet_mbuf_exit);
|
|
@ -114,6 +114,7 @@
|
||||||
#include <net/xfrm.h>
|
#include <net/xfrm.h>
|
||||||
#include <net/net_namespace.h>
|
#include <net/net_namespace.h>
|
||||||
#include <net/secure_seq.h>
|
#include <net/secure_seq.h>
|
||||||
|
#include <net/netns_mbuf.h>
|
||||||
#ifdef CONFIG_IP_MROUTE
|
#ifdef CONFIG_IP_MROUTE
|
||||||
#include <linux/mroute.h>
|
#include <linux/mroute.h>
|
||||||
#endif
|
#endif
|
||||||
|
@ -2102,11 +2103,15 @@ static int __init ipv4_proc_init(void)
|
||||||
goto out_udp;
|
goto out_udp;
|
||||||
if (ping_proc_init())
|
if (ping_proc_init())
|
||||||
goto out_ping;
|
goto out_ping;
|
||||||
|
if (inet_mbuf_init())
|
||||||
|
goto out_mbuf;
|
||||||
if (ip_misc_proc_init())
|
if (ip_misc_proc_init())
|
||||||
goto out_misc;
|
goto out_misc;
|
||||||
out:
|
out:
|
||||||
return rc;
|
return rc;
|
||||||
out_misc:
|
out_misc:
|
||||||
|
inet_mbuf_exit();
|
||||||
|
out_mbuf:
|
||||||
ping_proc_exit();
|
ping_proc_exit();
|
||||||
out_ping:
|
out_ping:
|
||||||
udp4_proc_exit();
|
udp4_proc_exit();
|
||||||
|
|
Loading…
Reference in New Issue