Merge branch 'sched-Patches-from-out-of-tree-version-of-sch_cake'

Toke Høiland-Jørgensen says:

====================
sched: Patches from out-of-tree version of sch_cake

This series includes a couple of patches with updates from the out-of-tree
version of sch_cake. The first one is a fix to the fairness scheduling when
dual-mode fairness is enabled. The second patch is an additional feature flag
that allows using fwmark as a tin selector, as a convenience for people who want
to customise tin selection. The third patch is just a cleanup to the tin
selection logic.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-03-03 20:14:28 -08:00
commit 3cec12ce5a
2 changed files with 98 additions and 58 deletions

View File

@ -1021,6 +1021,7 @@ enum {
TCA_CAKE_INGRESS, TCA_CAKE_INGRESS,
TCA_CAKE_ACK_FILTER, TCA_CAKE_ACK_FILTER,
TCA_CAKE_SPLIT_GSO, TCA_CAKE_SPLIT_GSO,
TCA_CAKE_FWMARK,
__TCA_CAKE_MAX __TCA_CAKE_MAX
}; };
#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) #define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)

View File

@ -138,8 +138,8 @@ struct cake_flow {
struct cake_host { struct cake_host {
u32 srchost_tag; u32 srchost_tag;
u32 dsthost_tag; u32 dsthost_tag;
u16 srchost_refcnt; u16 srchost_bulk_flow_count;
u16 dsthost_refcnt; u16 dsthost_bulk_flow_count;
}; };
struct cake_heap_entry { struct cake_heap_entry {
@ -258,7 +258,8 @@ enum {
CAKE_FLAG_AUTORATE_INGRESS = BIT(1), CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
CAKE_FLAG_INGRESS = BIT(2), CAKE_FLAG_INGRESS = BIT(2),
CAKE_FLAG_WASH = BIT(3), CAKE_FLAG_WASH = BIT(3),
CAKE_FLAG_SPLIT_GSO = BIT(4) CAKE_FLAG_SPLIT_GSO = BIT(4),
CAKE_FLAG_FWMARK = BIT(5)
}; };
/* COBALT operates the Codel and BLUE algorithms in parallel, in order to /* COBALT operates the Codel and BLUE algorithms in parallel, in order to
@ -746,8 +747,10 @@ skip_hash:
* queue, accept the collision, update the host tags. * queue, accept the collision, update the host tags.
*/ */
q->way_collisions++; q->way_collisions++;
q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--; if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--; q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
}
allocate_src = cake_dsrc(flow_mode); allocate_src = cake_dsrc(flow_mode);
allocate_dst = cake_ddst(flow_mode); allocate_dst = cake_ddst(flow_mode);
found: found:
@ -767,13 +770,14 @@ found:
} }
for (i = 0; i < CAKE_SET_WAYS; for (i = 0; i < CAKE_SET_WAYS;
i++, k = (k + 1) % CAKE_SET_WAYS) { i++, k = (k + 1) % CAKE_SET_WAYS) {
if (!q->hosts[outer_hash + k].srchost_refcnt) if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
break; break;
} }
q->hosts[outer_hash + k].srchost_tag = srchost_hash; q->hosts[outer_hash + k].srchost_tag = srchost_hash;
found_src: found_src:
srchost_idx = outer_hash + k; srchost_idx = outer_hash + k;
q->hosts[srchost_idx].srchost_refcnt++; if (q->flows[reduced_hash].set == CAKE_SET_BULK)
q->hosts[srchost_idx].srchost_bulk_flow_count++;
q->flows[reduced_hash].srchost = srchost_idx; q->flows[reduced_hash].srchost = srchost_idx;
} }
@ -789,13 +793,14 @@ found_src:
} }
for (i = 0; i < CAKE_SET_WAYS; for (i = 0; i < CAKE_SET_WAYS;
i++, k = (k + 1) % CAKE_SET_WAYS) { i++, k = (k + 1) % CAKE_SET_WAYS) {
if (!q->hosts[outer_hash + k].dsthost_refcnt) if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
break; break;
} }
q->hosts[outer_hash + k].dsthost_tag = dsthost_hash; q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
found_dst: found_dst:
dsthost_idx = outer_hash + k; dsthost_idx = outer_hash + k;
q->hosts[dsthost_idx].dsthost_refcnt++; if (q->flows[reduced_hash].set == CAKE_SET_BULK)
q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
q->flows[reduced_hash].dsthost = dsthost_idx; q->flows[reduced_hash].dsthost = dsthost_idx;
} }
} }
@ -1508,20 +1513,6 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
return idx + (tin << 16); return idx + (tin << 16);
} }
static void cake_wash_diffserv(struct sk_buff *skb)
{
switch (skb->protocol) {
case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
break;
case htons(ETH_P_IPV6):
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
break;
default:
break;
}
}
static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
{ {
u8 dscp; u8 dscp;
@ -1553,25 +1544,32 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
{ {
struct cake_sched_data *q = qdisc_priv(sch); struct cake_sched_data *q = qdisc_priv(sch);
u32 tin; u32 tin;
u8 dscp;
if (TC_H_MAJ(skb->priority) == sch->handle && /* Tin selection: Default to diffserv-based selection, allow overriding
* using firewall marks or skb->priority.
*/
dscp = cake_handle_diffserv(skb,
q->rate_flags & CAKE_FLAG_WASH);
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
else if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */
skb->mark &&
skb->mark <= q->tin_cnt)
tin = q->tin_order[skb->mark - 1];
else if (TC_H_MAJ(skb->priority) == sch->handle &&
TC_H_MIN(skb->priority) > 0 && TC_H_MIN(skb->priority) > 0 &&
TC_H_MIN(skb->priority) <= q->tin_cnt) { TC_H_MIN(skb->priority) <= q->tin_cnt)
tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
if (q->rate_flags & CAKE_FLAG_WASH) else {
cake_wash_diffserv(skb); tin = q->tin_index[dscp];
} else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
/* extract the Diffserv Precedence field, if it exists */
/* and clear DSCP bits if washing */
tin = q->tin_index[cake_handle_diffserv(skb,
q->rate_flags & CAKE_FLAG_WASH)];
if (unlikely(tin >= q->tin_cnt)) if (unlikely(tin >= q->tin_cnt))
tin = 0; tin = 0;
} else {
tin = 0;
if (q->rate_flags & CAKE_FLAG_WASH)
cake_wash_diffserv(skb);
} }
return &q->tins[tin]; return &q->tins[tin];
@ -1794,20 +1792,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
b->sparse_flow_count++; b->sparse_flow_count++;
if (cake_dsrc(q->flow_mode)) if (cake_dsrc(q->flow_mode))
host_load = max(host_load, srchost->srchost_refcnt); host_load = max(host_load, srchost->srchost_bulk_flow_count);
if (cake_ddst(q->flow_mode)) if (cake_ddst(q->flow_mode))
host_load = max(host_load, dsthost->dsthost_refcnt); host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
flow->deficit = (b->flow_quantum * flow->deficit = (b->flow_quantum *
quantum_div[host_load]) >> 16; quantum_div[host_load]) >> 16;
} else if (flow->set == CAKE_SET_SPARSE_WAIT) { } else if (flow->set == CAKE_SET_SPARSE_WAIT) {
struct cake_host *srchost = &b->hosts[flow->srchost];
struct cake_host *dsthost = &b->hosts[flow->dsthost];
/* this flow was empty, accounted as a sparse flow, but actually /* this flow was empty, accounted as a sparse flow, but actually
* in the bulk rotation. * in the bulk rotation.
*/ */
flow->set = CAKE_SET_BULK; flow->set = CAKE_SET_BULK;
b->sparse_flow_count--; b->sparse_flow_count--;
b->bulk_flow_count++; b->bulk_flow_count++;
if (cake_dsrc(q->flow_mode))
srchost->srchost_bulk_flow_count++;
if (cake_ddst(q->flow_mode))
dsthost->dsthost_bulk_flow_count++;
} }
if (q->buffer_used > q->buffer_max_used) if (q->buffer_used > q->buffer_max_used)
@ -1975,23 +1983,8 @@ retry:
dsthost = &b->hosts[flow->dsthost]; dsthost = &b->hosts[flow->dsthost];
host_load = 1; host_load = 1;
if (cake_dsrc(q->flow_mode))
host_load = max(host_load, srchost->srchost_refcnt);
if (cake_ddst(q->flow_mode))
host_load = max(host_load, dsthost->dsthost_refcnt);
WARN_ON(host_load > CAKE_QUEUES);
/* flow isolation (DRR++) */ /* flow isolation (DRR++) */
if (flow->deficit <= 0) { if (flow->deficit <= 0) {
/* The shifted prandom_u32() is a way to apply dithering to
* avoid accumulating roundoff errors
*/
flow->deficit += (b->flow_quantum * quantum_div[host_load] +
(prandom_u32() >> 16)) >> 16;
list_move_tail(&flow->flowchain, &b->old_flows);
/* Keep all flows with deficits out of the sparse and decaying /* Keep all flows with deficits out of the sparse and decaying
* rotations. No non-empty flow can go into the decaying * rotations. No non-empty flow can go into the decaying
* rotation, so they can't get deficits * rotation, so they can't get deficits
@ -2000,6 +1993,13 @@ retry:
if (flow->head) { if (flow->head) {
b->sparse_flow_count--; b->sparse_flow_count--;
b->bulk_flow_count++; b->bulk_flow_count++;
if (cake_dsrc(q->flow_mode))
srchost->srchost_bulk_flow_count++;
if (cake_ddst(q->flow_mode))
dsthost->dsthost_bulk_flow_count++;
flow->set = CAKE_SET_BULK; flow->set = CAKE_SET_BULK;
} else { } else {
/* we've moved it to the bulk rotation for /* we've moved it to the bulk rotation for
@ -2009,6 +2009,22 @@ retry:
flow->set = CAKE_SET_SPARSE_WAIT; flow->set = CAKE_SET_SPARSE_WAIT;
} }
} }
if (cake_dsrc(q->flow_mode))
host_load = max(host_load, srchost->srchost_bulk_flow_count);
if (cake_ddst(q->flow_mode))
host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
WARN_ON(host_load > CAKE_QUEUES);
/* The shifted prandom_u32() is a way to apply dithering to
* avoid accumulating roundoff errors
*/
flow->deficit += (b->flow_quantum * quantum_div[host_load] +
(prandom_u32() >> 16)) >> 16;
list_move_tail(&flow->flowchain, &b->old_flows);
goto retry; goto retry;
} }
@ -2029,6 +2045,13 @@ retry:
&b->decaying_flows); &b->decaying_flows);
if (flow->set == CAKE_SET_BULK) { if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--; b->bulk_flow_count--;
if (cake_dsrc(q->flow_mode))
srchost->srchost_bulk_flow_count--;
if (cake_ddst(q->flow_mode))
dsthost->dsthost_bulk_flow_count--;
b->decaying_flow_count++; b->decaying_flow_count++;
} else if (flow->set == CAKE_SET_SPARSE || } else if (flow->set == CAKE_SET_SPARSE ||
flow->set == CAKE_SET_SPARSE_WAIT) { flow->set == CAKE_SET_SPARSE_WAIT) {
@ -2042,14 +2065,19 @@ retry:
if (flow->set == CAKE_SET_SPARSE || if (flow->set == CAKE_SET_SPARSE ||
flow->set == CAKE_SET_SPARSE_WAIT) flow->set == CAKE_SET_SPARSE_WAIT)
b->sparse_flow_count--; b->sparse_flow_count--;
else if (flow->set == CAKE_SET_BULK) else if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--; b->bulk_flow_count--;
else
if (cake_dsrc(q->flow_mode))
srchost->srchost_bulk_flow_count--;
if (cake_ddst(q->flow_mode))
dsthost->dsthost_bulk_flow_count--;
} else
b->decaying_flow_count--; b->decaying_flow_count--;
flow->set = CAKE_SET_NONE; flow->set = CAKE_SET_NONE;
srchost->srchost_refcnt--;
dsthost->dsthost_refcnt--;
} }
goto begin; goto begin;
} }
@ -2590,6 +2618,13 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO; q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
} }
if (tb[TCA_CAKE_FWMARK]) {
if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
q->rate_flags |= CAKE_FLAG_FWMARK;
else
q->rate_flags &= ~CAKE_FLAG_FWMARK;
}
if (q->tins) { if (q->tins) {
sch_tree_lock(sch); sch_tree_lock(sch);
cake_reconfigure(sch); cake_reconfigure(sch);
@ -2749,6 +2784,10 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure; goto nla_put_failure;
if (nla_put_u32(skb, TCA_CAKE_FWMARK,
!!(q->rate_flags & CAKE_FLAG_FWMARK)))
goto nla_put_failure;
return nla_nest_end(skb, opts); return nla_nest_end(skb, opts);
nla_put_failure: nla_put_failure: