2005-04-17 06:20:36 +08:00
|
|
|
#ifndef __LINUX_PKT_SCHED_H
|
|
|
|
#define __LINUX_PKT_SCHED_H
|
|
|
|
|
2009-01-31 00:37:05 +08:00
|
|
|
#include <linux/types.h>
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* Logical priority bands not depending on specific packet scheduler.
|
|
|
|
Every scheduler will map them to real traffic classes, if it has
|
|
|
|
no more precise mechanism to classify packets.
|
|
|
|
|
|
|
|
These numbers have no special meaning, though their coincidence
|
|
|
|
with obsolete IPv6 values is not occasional :-). New IPv6 drafts
|
|
|
|
preferred full anarchy inspired by diffserv group.
|
|
|
|
|
|
|
|
Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
|
|
|
|
class, actually, as rule it will be handled with more care than
|
|
|
|
filler or even bulk.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define TC_PRIO_BESTEFFORT 0
|
|
|
|
#define TC_PRIO_FILLER 1
|
|
|
|
#define TC_PRIO_BULK 2
|
|
|
|
#define TC_PRIO_INTERACTIVE_BULK 4
|
|
|
|
#define TC_PRIO_INTERACTIVE 6
|
|
|
|
#define TC_PRIO_CONTROL 7
|
|
|
|
|
|
|
|
#define TC_PRIO_MAX 15
|
|
|
|
|
|
|
|
/* Generic queue statistics, available for all the elements.
|
|
|
|
Particular schedulers may have also their private records.
|
|
|
|
*/
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_stats {
|
2011-11-21 14:53:46 +08:00
|
|
|
__u64 bytes; /* Number of enqueued bytes */
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 packets; /* Number of enqueued packets */
|
|
|
|
__u32 drops; /* Packets dropped because of lack of resources */
|
|
|
|
__u32 overlimits; /* Number of throttle events when this
|
|
|
|
* flow goes out of allocated bandwidth */
|
|
|
|
__u32 bps; /* Current flow byte rate */
|
|
|
|
__u32 pps; /* Current flow packet rate */
|
|
|
|
__u32 qlen;
|
|
|
|
__u32 backlog;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_estimator {
|
2005-04-17 06:20:36 +08:00
|
|
|
signed char interval;
|
|
|
|
unsigned char ewma_log;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* "Handles"
|
|
|
|
---------
|
|
|
|
|
|
|
|
All the traffic control objects have 32bit identifiers, or "handles".
|
|
|
|
|
|
|
|
They can be considered as opaque numbers from user API viewpoint,
|
|
|
|
but actually they always consist of two fields: major and
|
|
|
|
minor numbers, which are interpreted by kernel specially,
|
|
|
|
that may be used by applications, though not recommended.
|
|
|
|
|
|
|
|
F.e. qdisc handles always have minor number equal to zero,
|
|
|
|
classes (or flows) have major equal to parent qdisc major, and
|
|
|
|
minor uniquely identifying class inside qdisc.
|
|
|
|
|
|
|
|
Macros to manipulate handles:
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define TC_H_MAJ_MASK (0xFFFF0000U)
|
|
|
|
#define TC_H_MIN_MASK (0x0000FFFFU)
|
|
|
|
#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
|
|
|
|
#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
|
|
|
|
#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
|
|
|
|
|
|
|
|
#define TC_H_UNSPEC (0U)
|
|
|
|
#define TC_H_ROOT (0xFFFFFFFFU)
|
|
|
|
#define TC_H_INGRESS (0xFFFFFFF1U)
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_ratespec {
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned char cell_log;
|
|
|
|
unsigned char __reserved;
|
2007-09-12 22:36:28 +08:00
|
|
|
unsigned short overhead;
|
|
|
|
short cell_align;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned short mpu;
|
|
|
|
__u32 rate;
|
|
|
|
};
|
|
|
|
|
2008-01-24 12:35:19 +08:00
|
|
|
#define TC_RTAB_SIZE 1024
|
|
|
|
|
2008-07-20 15:08:47 +08:00
|
|
|
struct tc_sizespec {
|
|
|
|
unsigned char cell_log;
|
|
|
|
unsigned char size_log;
|
|
|
|
short cell_align;
|
|
|
|
int overhead;
|
|
|
|
unsigned int linklayer;
|
|
|
|
unsigned int mpu;
|
|
|
|
unsigned int mtu;
|
|
|
|
unsigned int tsize;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
TCA_STAB_UNSPEC,
|
|
|
|
TCA_STAB_BASE,
|
|
|
|
TCA_STAB_DATA,
|
|
|
|
__TCA_STAB_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* FIFO section */
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_fifo_qopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* PRIO section */
|
|
|
|
|
|
|
|
#define TCQ_PRIO_BANDS 16
|
2005-11-06 04:14:28 +08:00
|
|
|
#define TCQ_MIN_PRIO_BANDS 2
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_prio_qopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
int bands; /* Number of bands */
|
|
|
|
__u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
|
|
|
|
};
|
|
|
|
|
2008-09-13 07:29:34 +08:00
|
|
|
/* MULTIQ section */
|
|
|
|
|
|
|
|
struct tc_multiq_qopt {
|
|
|
|
__u16 bands; /* Number of bands */
|
|
|
|
__u16 max_bands; /* Maximum number of queues */
|
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* TBF section */
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_tbf_qopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
struct tc_ratespec rate;
|
|
|
|
struct tc_ratespec peakrate;
|
|
|
|
__u32 limit;
|
|
|
|
__u32 buffer;
|
|
|
|
__u32 mtu;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
enum {
|
2005-04-17 06:20:36 +08:00
|
|
|
TCA_TBF_UNSPEC,
|
|
|
|
TCA_TBF_PARMS,
|
|
|
|
TCA_TBF_RTAB,
|
|
|
|
TCA_TBF_PTAB,
|
|
|
|
__TCA_TBF_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
|
|
|
|
|
|
|
|
|
|
|
|
/* TEQL section */
|
|
|
|
|
|
|
|
/* TEQL does not require any parameters */
|
|
|
|
|
|
|
|
/* SFQ section */
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_sfq_qopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned quantum; /* Bytes per round allocated to flow */
|
|
|
|
int perturb_period; /* Period of hash perturbation */
|
|
|
|
__u32 limit; /* Maximal packets in queue */
|
|
|
|
unsigned divisor; /* Hash divisor */
|
|
|
|
unsigned flows; /* Maximal number of flows */
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_sfq_xstats {
|
2008-02-01 10:37:16 +08:00
|
|
|
__s32 allot;
|
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* NOTE: limit, divisor and flows are hardwired to code at the moment.
|
|
|
|
*
|
|
|
|
* limit=flows=128, divisor=1024;
|
|
|
|
*
|
|
|
|
* The only reason for this is efficiency, it is possible
|
|
|
|
* to change these parameters in compile time.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* RED section */
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
enum {
|
2005-04-17 06:20:36 +08:00
|
|
|
TCA_RED_UNSPEC,
|
|
|
|
TCA_RED_PARMS,
|
|
|
|
TCA_RED_STAB,
|
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 14:06:03 +08:00
|
|
|
TCA_RED_MAX_P,
|
2005-04-17 06:20:36 +08:00
|
|
|
__TCA_RED_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_RED_MAX (__TCA_RED_MAX - 1)
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_red_qopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 limit; /* HARD maximal queue length (bytes) */
|
|
|
|
__u32 qth_min; /* Min average length threshold (bytes) */
|
|
|
|
__u32 qth_max; /* Max average length threshold (bytes) */
|
|
|
|
unsigned char Wlog; /* log(W) */
|
|
|
|
unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
|
|
|
|
unsigned char Scell_log; /* cell size for idle damping */
|
|
|
|
unsigned char flags;
|
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :
http://icir.org/floyd/papers/adaptiveRed.pdf
Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2
Every 500 ms:
if (avg > target and max_p <= 0.5)
increase max_p : max_p += alpha;
else if (avg < target and max_p >= 0.01)
decrease max_p : max_p *= beta;
target :[min_th + 0.4*(min_th - max_th),
min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)
Changes against our RED implementation are :
max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.
To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.
dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.
Example on a 10Mbit link :
tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
limit 400000 min 30000 max 90000 avpkt 1000 \
burst 55 ecn adaptative bandwidth 10Mbit
# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
rate 9749Kbit 831pps backlog 72056b 16p requeues 0
marked 1357 early 35 pdrop 0 other 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-08 14:06:03 +08:00
|
|
|
#define TC_RED_ECN 1
|
|
|
|
#define TC_RED_HARDDROP 2
|
|
|
|
#define TC_RED_ADAPTATIVE 4
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_red_xstats {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 early; /* Early drops */
|
|
|
|
__u32 pdrop; /* Drops due to queue limits */
|
|
|
|
__u32 other; /* Drops due to drop() calls */
|
|
|
|
__u32 marked; /* Marked packets */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* GRED section */
|
|
|
|
|
|
|
|
#define MAX_DPs 16
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
enum {
|
2005-04-17 06:20:36 +08:00
|
|
|
TCA_GRED_UNSPEC,
|
|
|
|
TCA_GRED_PARMS,
|
|
|
|
TCA_GRED_STAB,
|
|
|
|
TCA_GRED_DPS,
|
2011-12-09 10:46:45 +08:00
|
|
|
TCA_GRED_MAX_P,
|
2005-04-17 06:20:36 +08:00
|
|
|
__TCA_GRED_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_gred_qopt {
|
2005-11-06 04:14:25 +08:00
|
|
|
__u32 limit; /* HARD maximal queue length (bytes) */
|
|
|
|
__u32 qth_min; /* Min average length threshold (bytes) */
|
|
|
|
__u32 qth_max; /* Max average length threshold (bytes) */
|
2011-03-31 09:57:33 +08:00
|
|
|
__u32 DP; /* up to 2^32 DPs */
|
2005-11-06 04:14:25 +08:00
|
|
|
__u32 backlog;
|
|
|
|
__u32 qave;
|
|
|
|
__u32 forced;
|
|
|
|
__u32 early;
|
|
|
|
__u32 other;
|
|
|
|
__u32 pdrop;
|
|
|
|
__u8 Wlog; /* log(W) */
|
|
|
|
__u8 Plog; /* log(P_max/(qth_max-qth_min)) */
|
|
|
|
__u8 Scell_log; /* cell size for idle damping */
|
|
|
|
__u8 prio; /* prio of this VQ */
|
|
|
|
__u32 packets;
|
|
|
|
__u32 bytesin;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
2005-11-06 04:14:25 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* gred setup */
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_gred_sopt {
|
2005-11-06 04:14:25 +08:00
|
|
|
__u32 DPs;
|
|
|
|
__u32 def_DP;
|
|
|
|
__u8 grio;
|
2005-11-06 04:14:27 +08:00
|
|
|
__u8 flags;
|
|
|
|
__u16 pad1;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2011-02-02 23:21:10 +08:00
|
|
|
/* CHOKe section */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
TCA_CHOKE_UNSPEC,
|
|
|
|
TCA_CHOKE_PARMS,
|
|
|
|
TCA_CHOKE_STAB,
|
2011-12-09 10:46:45 +08:00
|
|
|
TCA_CHOKE_MAX_P,
|
2011-02-02 23:21:10 +08:00
|
|
|
__TCA_CHOKE_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
|
|
|
|
|
|
|
|
struct tc_choke_qopt {
|
|
|
|
__u32 limit; /* Hard queue length (packets) */
|
|
|
|
__u32 qth_min; /* Min average threshold (packets) */
|
|
|
|
__u32 qth_max; /* Max average threshold (packets) */
|
|
|
|
unsigned char Wlog; /* log(W) */
|
|
|
|
unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
|
|
|
|
unsigned char Scell_log; /* cell size for idle damping */
|
|
|
|
unsigned char flags; /* see RED flags */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct tc_choke_xstats {
|
|
|
|
__u32 early; /* Early drops */
|
|
|
|
__u32 pdrop; /* Drops due to queue limits */
|
|
|
|
__u32 other; /* Drops due to drop() calls */
|
|
|
|
__u32 marked; /* Marked packets */
|
|
|
|
__u32 matched; /* Drops due to flow match */
|
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* HTB section */
|
|
|
|
#define TC_HTB_NUMPRIO 8
|
|
|
|
#define TC_HTB_MAXDEPTH 8
|
|
|
|
#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_htb_opt {
|
2005-04-17 06:20:36 +08:00
|
|
|
struct tc_ratespec rate;
|
|
|
|
struct tc_ratespec ceil;
|
|
|
|
__u32 buffer;
|
|
|
|
__u32 cbuffer;
|
|
|
|
__u32 quantum;
|
|
|
|
__u32 level; /* out only */
|
|
|
|
__u32 prio;
|
|
|
|
};
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_htb_glob {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 version; /* to match HTB/TC */
|
|
|
|
__u32 rate2quantum; /* bps->quantum divisor */
|
|
|
|
__u32 defcls; /* default class number */
|
|
|
|
__u32 debug; /* debug flags */
|
|
|
|
|
|
|
|
/* stats */
|
2011-11-21 14:53:46 +08:00
|
|
|
__u32 direct_pkts; /* count of non shaped packets */
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
2009-11-05 01:50:58 +08:00
|
|
|
enum {
|
2005-04-17 06:20:36 +08:00
|
|
|
TCA_HTB_UNSPEC,
|
|
|
|
TCA_HTB_PARMS,
|
|
|
|
TCA_HTB_INIT,
|
|
|
|
TCA_HTB_CTAB,
|
|
|
|
TCA_HTB_RTAB,
|
|
|
|
__TCA_HTB_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_htb_xstats {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 lends;
|
|
|
|
__u32 borrows;
|
|
|
|
__u32 giants; /* too big packets (rate will not be accurate) */
|
|
|
|
__u32 tokens;
|
|
|
|
__u32 ctokens;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* HFSC section */
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_hfsc_qopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u16 defcls; /* default class */
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_service_curve {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 m1; /* slope of the first segment in bps */
|
|
|
|
__u32 d; /* x-projection of the first segment in us */
|
|
|
|
__u32 m2; /* slope of the second segment in bps */
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_hfsc_stats {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u64 work; /* total work done */
|
|
|
|
__u64 rtwork; /* work done by real-time criteria */
|
|
|
|
__u32 period; /* current period */
|
|
|
|
__u32 level; /* class level in hierarchy */
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
enum {
|
2005-04-17 06:20:36 +08:00
|
|
|
TCA_HFSC_UNSPEC,
|
|
|
|
TCA_HFSC_RSC,
|
|
|
|
TCA_HFSC_FSC,
|
|
|
|
TCA_HFSC_USC,
|
|
|
|
__TCA_HFSC_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
|
|
|
|
|
|
|
|
|
|
|
|
/* CBQ section */
|
|
|
|
|
|
|
|
#define TC_CBQ_MAXPRIO 8
|
|
|
|
#define TC_CBQ_MAXLEVEL 8
|
|
|
|
#define TC_CBQ_DEF_EWMA 5
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_cbq_lssopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned char change;
|
|
|
|
unsigned char flags;
|
|
|
|
#define TCF_CBQ_LSS_BOUNDED 1
|
|
|
|
#define TCF_CBQ_LSS_ISOLATED 2
|
|
|
|
unsigned char ewma_log;
|
|
|
|
unsigned char level;
|
|
|
|
#define TCF_CBQ_LSS_FLAGS 1
|
|
|
|
#define TCF_CBQ_LSS_EWMA 2
|
|
|
|
#define TCF_CBQ_LSS_MAXIDLE 4
|
|
|
|
#define TCF_CBQ_LSS_MINIDLE 8
|
|
|
|
#define TCF_CBQ_LSS_OFFTIME 0x10
|
|
|
|
#define TCF_CBQ_LSS_AVPKT 0x20
|
|
|
|
__u32 maxidle;
|
|
|
|
__u32 minidle;
|
|
|
|
__u32 offtime;
|
|
|
|
__u32 avpkt;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_cbq_wrropt {
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned char flags;
|
|
|
|
unsigned char priority;
|
|
|
|
unsigned char cpriority;
|
|
|
|
unsigned char __reserved;
|
|
|
|
__u32 allot;
|
|
|
|
__u32 weight;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_cbq_ovl {
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned char strategy;
|
|
|
|
#define TC_CBQ_OVL_CLASSIC 0
|
|
|
|
#define TC_CBQ_OVL_DELAY 1
|
|
|
|
#define TC_CBQ_OVL_LOWPRIO 2
|
|
|
|
#define TC_CBQ_OVL_DROP 3
|
|
|
|
#define TC_CBQ_OVL_RCLASSIC 4
|
|
|
|
unsigned char priority2;
|
2005-06-29 03:56:45 +08:00
|
|
|
__u16 pad;
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 penalty;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_cbq_police {
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned char police;
|
|
|
|
unsigned char __res1;
|
|
|
|
unsigned short __res2;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_cbq_fopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 split;
|
|
|
|
__u32 defmap;
|
|
|
|
__u32 defchange;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_cbq_xstats {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 borrows;
|
|
|
|
__u32 overactions;
|
|
|
|
__s32 avgidle;
|
|
|
|
__s32 undertime;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
enum {
|
2005-04-17 06:20:36 +08:00
|
|
|
TCA_CBQ_UNSPEC,
|
|
|
|
TCA_CBQ_LSSOPT,
|
|
|
|
TCA_CBQ_WRROPT,
|
|
|
|
TCA_CBQ_FOPT,
|
|
|
|
TCA_CBQ_OVL_STRATEGY,
|
|
|
|
TCA_CBQ_RATE,
|
|
|
|
TCA_CBQ_RTAB,
|
|
|
|
TCA_CBQ_POLICE,
|
|
|
|
__TCA_CBQ_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1)
|
|
|
|
|
|
|
|
/* dsmark section */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
TCA_DSMARK_UNSPEC,
|
|
|
|
TCA_DSMARK_INDICES,
|
|
|
|
TCA_DSMARK_DEFAULT_INDEX,
|
|
|
|
TCA_DSMARK_SET_TC_INDEX,
|
|
|
|
TCA_DSMARK_MASK,
|
|
|
|
TCA_DSMARK_VALUE,
|
|
|
|
__TCA_DSMARK_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
|
|
|
|
|
|
|
|
/* ATM section */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
TCA_ATM_UNSPEC,
|
|
|
|
TCA_ATM_FD, /* file/socket descriptor */
|
|
|
|
TCA_ATM_PTR, /* pointer to descriptor - later */
|
|
|
|
TCA_ATM_HDR, /* LL header */
|
|
|
|
TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */
|
|
|
|
TCA_ATM_ADDR, /* PVC address (for output only) */
|
|
|
|
TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */
|
|
|
|
__TCA_ATM_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_ATM_MAX (__TCA_ATM_MAX - 1)
|
|
|
|
|
|
|
|
/* Network emulator */
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
enum {
|
2005-04-17 06:20:36 +08:00
|
|
|
TCA_NETEM_UNSPEC,
|
|
|
|
TCA_NETEM_CORR,
|
|
|
|
TCA_NETEM_DELAY_DIST,
|
2005-05-27 03:55:48 +08:00
|
|
|
TCA_NETEM_REORDER,
|
2005-12-22 11:03:44 +08:00
|
|
|
TCA_NETEM_CORRUPT,
|
2011-02-23 21:04:21 +08:00
|
|
|
TCA_NETEM_LOSS,
|
2011-11-30 20:20:26 +08:00
|
|
|
TCA_NETEM_RATE,
|
2005-04-17 06:20:36 +08:00
|
|
|
__TCA_NETEM_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_netem_qopt {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 latency; /* added delay (us) */
|
|
|
|
__u32 limit; /* fifo limit (packets) */
|
|
|
|
__u32 loss; /* random packet loss (0=none ~0=100%) */
|
2005-05-27 03:55:48 +08:00
|
|
|
__u32 gap; /* re-ordering gap (0 for none) */
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 duplicate; /* random packet dup (0=none ~0=100%) */
|
|
|
|
__u32 jitter; /* random jitter in latency (us) */
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_netem_corr {
|
2005-04-17 06:20:36 +08:00
|
|
|
__u32 delay_corr; /* delay correlation */
|
|
|
|
__u32 loss_corr; /* packet loss correlation */
|
|
|
|
__u32 dup_corr; /* duplicate correlation */
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_netem_reorder {
|
2005-05-27 03:55:48 +08:00
|
|
|
__u32 probability;
|
|
|
|
__u32 correlation;
|
|
|
|
};
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_netem_corrupt {
|
2005-12-22 11:03:44 +08:00
|
|
|
__u32 probability;
|
|
|
|
__u32 correlation;
|
|
|
|
};
|
|
|
|
|
2011-11-30 20:20:26 +08:00
|
|
|
struct tc_netem_rate {
|
|
|
|
__u32 rate; /* byte/s */
|
2011-12-12 22:30:00 +08:00
|
|
|
__s32 packet_overhead;
|
|
|
|
__u32 cell_size;
|
|
|
|
__s32 cell_overhead;
|
2011-11-30 20:20:26 +08:00
|
|
|
};
|
|
|
|
|
2011-02-23 21:04:21 +08:00
|
|
|
enum {
|
|
|
|
NETEM_LOSS_UNSPEC,
|
|
|
|
NETEM_LOSS_GI, /* General Intuitive - 4 state model */
|
|
|
|
NETEM_LOSS_GE, /* Gilbert Elliot models */
|
|
|
|
__NETEM_LOSS_MAX
|
|
|
|
};
|
|
|
|
#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
|
|
|
|
|
2011-11-21 14:53:46 +08:00
|
|
|
/* State transition probabilities for 4 state model */
|
2011-02-23 21:04:21 +08:00
|
|
|
struct tc_netem_gimodel {
|
|
|
|
__u32 p13;
|
|
|
|
__u32 p31;
|
|
|
|
__u32 p32;
|
|
|
|
__u32 p14;
|
|
|
|
__u32 p23;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Gilbert-Elliot models */
|
|
|
|
struct tc_netem_gemodel {
|
|
|
|
__u32 p;
|
|
|
|
__u32 r;
|
|
|
|
__u32 h;
|
|
|
|
__u32 k1;
|
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#define NETEM_DIST_SCALE 8192
|
2011-02-23 21:04:19 +08:00
|
|
|
#define NETEM_DIST_MAX 16384
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-11-20 20:10:00 +08:00
|
|
|
/* DRR */
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
enum {
|
2008-11-20 20:10:00 +08:00
|
|
|
TCA_DRR_UNSPEC,
|
|
|
|
TCA_DRR_QUANTUM,
|
|
|
|
__TCA_DRR_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_DRR_MAX (__TCA_DRR_MAX - 1)
|
|
|
|
|
2009-11-05 01:50:58 +08:00
|
|
|
struct tc_drr_stats {
|
2009-02-11 09:18:17 +08:00
|
|
|
__u32 deficit;
|
2008-11-20 20:10:00 +08:00
|
|
|
};
|
|
|
|
|
2011-01-17 16:06:09 +08:00
|
|
|
/* MQPRIO */
|
|
|
|
#define TC_QOPT_BITMASK 15
|
|
|
|
#define TC_QOPT_MAX_QUEUE 16
|
|
|
|
|
|
|
|
struct tc_mqprio_qopt {
|
|
|
|
__u8 num_tc;
|
|
|
|
__u8 prio_tc_map[TC_QOPT_BITMASK + 1];
|
|
|
|
__u8 hw;
|
|
|
|
__u16 count[TC_QOPT_MAX_QUEUE];
|
|
|
|
__u16 offset[TC_QOPT_MAX_QUEUE];
|
|
|
|
};
|
|
|
|
|
net_sched: SFB flow scheduler
This is the Stochastic Fair Blue scheduler, based on work from :
W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: A New Class of Active Queue
Management Algorithms. U. Michigan CSE-TR-387-99, April 1999.
http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
This implementation is based on work done by Juliusz Chroboczek
General SFB algorithm can be found in figure 14, page 15:
B[l][n] : L x N array of bins (L levels, N bins per level)
enqueue()
Calculate hash function values h{0}, h{1}, .. h{L-1}
Update bins at each level
for i = 0 to L - 1
if (B[i][h{i}].qlen > bin_size)
B[i][h{i}].p_mark += p_increment;
else if (B[i][h{i}].qlen == 0)
B[i][h{i}].p_mark -= p_decrement;
p_min = min(B[0][h{0}].p_mark ... B[L-1][h{L-1}].p_mark);
if (p_min == 1.0)
ratelimit();
else
mark/drop with probabilty p_min;
I did the adaptation of Juliusz code to meet current kernel standards,
and various changes to address previous comments :
http://thread.gmane.org/gmane.linux.network/90225
http://thread.gmane.org/gmane.linux.network/90375
Default flow classifier is the rxhash introduced by RPS in 2.6.35, but
we can use an external flow classifier if wanted.
tc qdisc add dev $DEV parent 1:11 handle 11: \
est 0.5sec 2sec sfb limit 128
tc filter add dev $DEV protocol ip parent 11: handle 3 \
flow hash keys dst divisor 1024
Notes:
1) SFB default child qdisc is pfifo_fast. It can be changed by another
qdisc but a child qdisc MUST not drop a packet previously queued. This
is because SFB needs to handle a dequeued packet in order to maintain
its virtual queue states. pfifo_head_drop or CHOKe should not be used.
2) ECN is enabled by default, unlike RED/CHOKe/GRED
With help from Patrick McHardy & Andi Kleen
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Juliusz Chroboczek <Juliusz.Chroboczek@pps.jussieu.fr>
CC: Stephen Hemminger <shemminger@vyatta.com>
CC: Patrick McHardy <kaber@trash.net>
CC: Andi Kleen <andi@firstfloor.org>
CC: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-02-23 18:56:17 +08:00
|
|
|
/* SFB */
|
|
|
|
|
|
|
|
enum {
|
|
|
|
TCA_SFB_UNSPEC,
|
|
|
|
TCA_SFB_PARMS,
|
|
|
|
__TCA_SFB_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: increment, decrement are Q0.16 fixed-point values.
|
|
|
|
*/
|
|
|
|
struct tc_sfb_qopt {
|
|
|
|
__u32 rehash_interval; /* delay between hash move, in ms */
|
|
|
|
__u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */
|
|
|
|
__u32 max; /* max len of qlen_min */
|
|
|
|
__u32 bin_size; /* maximum queue length per bin */
|
|
|
|
__u32 increment; /* probability increment, (d1 in Blue) */
|
|
|
|
__u32 decrement; /* probability decrement, (d2 in Blue) */
|
|
|
|
__u32 limit; /* max SFB queue length */
|
|
|
|
__u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */
|
|
|
|
__u32 penalty_burst;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct tc_sfb_xstats {
|
|
|
|
__u32 earlydrop;
|
|
|
|
__u32 penaltydrop;
|
|
|
|
__u32 bucketdrop;
|
|
|
|
__u32 queuedrop;
|
|
|
|
__u32 childdrop; /* drops in child qdisc */
|
|
|
|
__u32 marked;
|
|
|
|
__u32 maxqlen;
|
|
|
|
__u32 maxprob;
|
|
|
|
__u32 avgprob;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define SFB_MAX_PROB 0xFFFF
|
|
|
|
|
2011-04-04 13:30:58 +08:00
|
|
|
/* QFQ */
|
|
|
|
enum {
|
|
|
|
TCA_QFQ_UNSPEC,
|
|
|
|
TCA_QFQ_WEIGHT,
|
|
|
|
TCA_QFQ_LMAX,
|
|
|
|
__TCA_QFQ_MAX
|
|
|
|
};
|
|
|
|
|
|
|
|
#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1)
|
|
|
|
|
|
|
|
struct tc_qfq_stats {
|
|
|
|
__u32 weight;
|
|
|
|
__u32 lmax;
|
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|