OpenCloudOS-Kernel/include/net/fq_impl.h

339 lines
7.0 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2016 Qualcomm Atheros, Inc
*
* Based on net/sched/sch_fq_codel.c
*/
#ifndef __NET_SCHED_FQ_IMPL_H
#define __NET_SCHED_FQ_IMPL_H
#include <net/fq.h>
/* functions that are embedded into includer */
static void fq_adjust_removal(struct fq *fq,
struct fq_flow *flow,
struct sk_buff *skb)
{
struct fq_tin *tin = flow->tin;
tin->backlog_bytes -= skb->len;
tin->backlog_packets--;
flow->backlog -= skb->len;
fq->backlog--;
fq->memory_usage -= skb->truesize;
}
static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
{
struct fq_flow *i;
if (flow->backlog == 0) {
list_del_init(&flow->backlogchain);
} else {
i = flow;
list_for_each_entry_continue(i, &fq->backlogs, backlogchain)
if (i->backlog < flow->backlog)
break;
list_move_tail(&flow->backlogchain,
&i->backlogchain);
}
}
static struct sk_buff *fq_flow_dequeue(struct fq *fq,
struct fq_flow *flow)
{
struct sk_buff *skb;
lockdep_assert_held(&fq->lock);
skb = __skb_dequeue(&flow->queue);
if (!skb)
return NULL;
fq_adjust_removal(fq, flow, skb);
fq_rejigger_backlog(fq, flow);
return skb;
}
static struct sk_buff *fq_tin_dequeue(struct fq *fq,
struct fq_tin *tin,
fq_tin_dequeue_t dequeue_func)
{
struct fq_flow *flow;
struct list_head *head;
struct sk_buff *skb;
lockdep_assert_held(&fq->lock);
begin:
head = &tin->new_flows;
if (list_empty(head)) {
head = &tin->old_flows;
if (list_empty(head))
return NULL;
}
flow = list_first_entry(head, struct fq_flow, flowchain);
if (flow->deficit <= 0) {
flow->deficit += fq->quantum;
list_move_tail(&flow->flowchain,
&tin->old_flows);
goto begin;
}
skb = dequeue_func(fq, tin, flow);
if (!skb) {
/* force a pass through old_flows to prevent starvation */
if ((head == &tin->new_flows) &&
!list_empty(&tin->old_flows)) {
list_move_tail(&flow->flowchain, &tin->old_flows);
} else {
list_del_init(&flow->flowchain);
flow->tin = NULL;
}
goto begin;
}
flow->deficit -= skb->len;
tin->tx_bytes += skb->len;
tin->tx_packets++;
return skb;
}
static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
{
net/flow_dissector: switch to siphash UDP IPv6 packets auto flowlabels are using a 32bit secret (static u32 hashrnd in net/core/flow_dissector.c) and apply jhash() over fields known by the receivers. Attackers can easily infer the 32bit secret and use this information to identify a device and/or user, since this 32bit secret is only set at boot time. Really, using jhash() to generate cookies sent on the wire is a serious security concern. Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be a dead end. Trying to periodically change the secret (like in sch_sfq.c) could change paths taken in the network for long lived flows. Let's switch to siphash, as we did in commit df453700e8d8 ("inet: switch IP ID generator to siphash") Using a cryptographically strong pseudo random function will solve this privacy issue and more generally remove other weak points in the stack. Packet schedulers using skb_get_hash_perturb() benefit from this change. Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default") Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Jonathan Berger <jonathann1@walla.com> Reported-by: Amit Klein <aksecurity@gmail.com> Reported-by: Benny Pinkas <benny@pinkas.net> Cc: Tom Herbert <tom@herbertland.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-22 22:57:46 +08:00
u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
return reciprocal_scale(hash, fq->flows_cnt);
}
static struct fq_flow *fq_flow_classify(struct fq *fq,
struct fq_tin *tin, u32 idx,
struct sk_buff *skb,
fq_flow_get_default_t get_default_func)
{
struct fq_flow *flow;
lockdep_assert_held(&fq->lock);
flow = &fq->flows[idx];
if (flow->tin && flow->tin != tin) {
flow = get_default_func(fq, tin, idx, skb);
tin->collisions++;
fq->collisions++;
}
if (!flow->tin)
tin->flows++;
return flow;
}
static void fq_recalc_backlog(struct fq *fq,
struct fq_tin *tin,
struct fq_flow *flow)
{
struct fq_flow *i;
if (list_empty(&flow->backlogchain))
list_add_tail(&flow->backlogchain, &fq->backlogs);
i = flow;
list_for_each_entry_continue_reverse(i, &fq->backlogs,
backlogchain)
if (i->backlog > flow->backlog)
break;
list_move(&flow->backlogchain, &i->backlogchain);
}
static void fq_tin_enqueue(struct fq *fq,
struct fq_tin *tin, u32 idx,
struct sk_buff *skb,
fq_skb_free_t free_func,
fq_flow_get_default_t get_default_func)
{
struct fq_flow *flow;
bool oom;
lockdep_assert_held(&fq->lock);
flow = fq_flow_classify(fq, tin, idx, skb, get_default_func);
flow->tin = tin;
flow->backlog += skb->len;
tin->backlog_bytes += skb->len;
tin->backlog_packets++;
fq->memory_usage += skb->truesize;
fq->backlog++;
fq_recalc_backlog(fq, tin, flow);
if (list_empty(&flow->flowchain)) {
flow->deficit = fq->quantum;
list_add_tail(&flow->flowchain,
&tin->new_flows);
}
__skb_queue_tail(&flow->queue, skb);
oom = (fq->memory_usage > fq->memory_limit);
while (fq->backlog > fq->limit || oom) {
flow = list_first_entry_or_null(&fq->backlogs,
struct fq_flow,
backlogchain);
if (!flow)
return;
skb = fq_flow_dequeue(fq, flow);
if (!skb)
return;
free_func(fq, flow->tin, flow, skb);
flow->tin->overlimit++;
fq->overlimit++;
if (oom) {
fq->overmemory++;
oom = (fq->memory_usage > fq->memory_limit);
}
}
}
static void fq_flow_filter(struct fq *fq,
struct fq_flow *flow,
fq_skb_filter_t filter_func,
void *filter_data,
fq_skb_free_t free_func)
{
struct fq_tin *tin = flow->tin;
struct sk_buff *skb, *tmp;
lockdep_assert_held(&fq->lock);
skb_queue_walk_safe(&flow->queue, skb, tmp) {
if (!filter_func(fq, tin, flow, skb, filter_data))
continue;
__skb_unlink(skb, &flow->queue);
fq_adjust_removal(fq, flow, skb);
free_func(fq, tin, flow, skb);
}
fq_rejigger_backlog(fq, flow);
}
static void fq_tin_filter(struct fq *fq,
struct fq_tin *tin,
fq_skb_filter_t filter_func,
void *filter_data,
fq_skb_free_t free_func)
{
struct fq_flow *flow;
lockdep_assert_held(&fq->lock);
list_for_each_entry(flow, &tin->new_flows, flowchain)
fq_flow_filter(fq, flow, filter_func, filter_data, free_func);
list_for_each_entry(flow, &tin->old_flows, flowchain)
fq_flow_filter(fq, flow, filter_func, filter_data, free_func);
}
static void fq_flow_reset(struct fq *fq,
struct fq_flow *flow,
fq_skb_free_t free_func)
{
struct sk_buff *skb;
while ((skb = fq_flow_dequeue(fq, flow)))
free_func(fq, flow->tin, flow, skb);
if (!list_empty(&flow->flowchain))
list_del_init(&flow->flowchain);
if (!list_empty(&flow->backlogchain))
list_del_init(&flow->backlogchain);
flow->tin = NULL;
WARN_ON_ONCE(flow->backlog);
}
static void fq_tin_reset(struct fq *fq,
struct fq_tin *tin,
fq_skb_free_t free_func)
{
struct list_head *head;
struct fq_flow *flow;
for (;;) {
head = &tin->new_flows;
if (list_empty(head)) {
head = &tin->old_flows;
if (list_empty(head))
break;
}
flow = list_first_entry(head, struct fq_flow, flowchain);
fq_flow_reset(fq, flow, free_func);
}
WARN_ON_ONCE(tin->backlog_bytes);
WARN_ON_ONCE(tin->backlog_packets);
}
static void fq_flow_init(struct fq_flow *flow)
{
INIT_LIST_HEAD(&flow->flowchain);
INIT_LIST_HEAD(&flow->backlogchain);
__skb_queue_head_init(&flow->queue);
}
static void fq_tin_init(struct fq_tin *tin)
{
INIT_LIST_HEAD(&tin->new_flows);
INIT_LIST_HEAD(&tin->old_flows);
}
static int fq_init(struct fq *fq, int flows_cnt)
{
int i;
memset(fq, 0, sizeof(fq[0]));
INIT_LIST_HEAD(&fq->backlogs);
spin_lock_init(&fq->lock);
fq->flows_cnt = max_t(u32, flows_cnt, 1);
net/flow_dissector: switch to siphash UDP IPv6 packets auto flowlabels are using a 32bit secret (static u32 hashrnd in net/core/flow_dissector.c) and apply jhash() over fields known by the receivers. Attackers can easily infer the 32bit secret and use this information to identify a device and/or user, since this 32bit secret is only set at boot time. Really, using jhash() to generate cookies sent on the wire is a serious security concern. Trying to change the rol32(hash, 16) in ip6_make_flowlabel() would be a dead end. Trying to periodically change the secret (like in sch_sfq.c) could change paths taken in the network for long lived flows. Let's switch to siphash, as we did in commit df453700e8d8 ("inet: switch IP ID generator to siphash") Using a cryptographically strong pseudo random function will solve this privacy issue and more generally remove other weak points in the stack. Packet schedulers using skb_get_hash_perturb() benefit from this change. Fixes: b56774163f99 ("ipv6: Enable auto flow labels by default") Fixes: 42240901f7c4 ("ipv6: Implement different admin modes for automatic flow labels") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Fixes: cb1ce2ef387b ("ipv6: Implement automatic flow label generation on transmit") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Jonathan Berger <jonathann1@walla.com> Reported-by: Amit Klein <aksecurity@gmail.com> Reported-by: Benny Pinkas <benny@pinkas.net> Cc: Tom Herbert <tom@herbertland.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-10-22 22:57:46 +08:00
get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
fq->quantum = 300;
fq->limit = 8192;
fq->memory_limit = 16 << 20; /* 16 MBytes */
fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
if (!fq->flows)
return -ENOMEM;
for (i = 0; i < fq->flows_cnt; i++)
fq_flow_init(&fq->flows[i]);
return 0;
}
static void fq_reset(struct fq *fq,
fq_skb_free_t free_func)
{
int i;
for (i = 0; i < fq->flows_cnt; i++)
fq_flow_reset(fq, &fq->flows[i], free_func);
kvfree(fq->flows);
fq->flows = NULL;
}
#endif