sched: Make __update_entity_runnable_avg() fast
__update_entity_runnable_avg forms the core of maintaining an entity's runnable load average. In this function we charge the accumulated run-time since last update and handle appropriate decay. In some cases, e.g. a waking task, this time interval may be much larger than our period unit. Fortunately we can exploit some properties of our series to perform decay for a blocked update in constant time and account the contribution for a running update in essentially-constant* time. [*]: For any running entity they should be performing updates at the tick which gives us a soft limit of 1 jiffy between updates, and we can compute up to a 32 jiffy update in a single pass. C program to generate the magic constants in the arrays: #include <math.h> #include <stdio.h> #define N 32 #define WMULT_SHIFT 32 const long WMULT_CONST = ((1UL << N) - 1); double y; long runnable_avg_yN_inv[N]; void calc_mult_inv() { int i; double yn = 0; printf("inverses\n"); for (i = 0; i < N; i++) { yn = (double)WMULT_CONST * pow(y, i); runnable_avg_yN_inv[i] = yn; printf("%2d: 0x%8lx\n", i, runnable_avg_yN_inv[i]); } printf("\n"); } long mult_inv(long c, int n) { return (c * runnable_avg_yN_inv[n]) >> WMULT_SHIFT; } void calc_yn_sum(int n) { int i; double sum = 0, sum_fl = 0, diff = 0; /* * We take the floored sum to ensure the sum of partial sums is never * larger than the actual sum. */ printf("sum y^n\n"); printf(" %8s %8s %8s\n", "exact", "floor", "error"); for (i = 1; i <= n; i++) { sum = (y * sum + y * 1024); sum_fl = floor(y * sum_fl+ y * 1024); printf("%2d: %8.0f %8.0f %8.0f\n", i, sum, sum_fl, sum_fl - sum); } printf("\n"); } void calc_conv(long n) { long old_n; int i = -1; printf("convergence (LOAD_AVG_MAX, LOAD_AVG_MAX_N)\n"); do { old_n = n; n = mult_inv(n, 1) + 1024; i++; } while (n != old_n); printf("%d> %ld\n", i - 1, n); printf("\n"); } void main() { y = pow(0.5, 1/(double)N); calc_mult_inv(); calc_conv(1024); calc_yn_sum(N); } [ Compile with -lm ] Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Ben Segall <bsegall@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20120823141507.277808946@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
f269ae0469
commit
5b51f2f80b
|
@ -883,18 +883,93 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
|
|||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* We choose a half-life close to 1 scheduling period.
|
||||
* Note: The tables below are dependent on this value.
|
||||
*/
|
||||
#define LOAD_AVG_PERIOD 32
|
||||
#define LOAD_AVG_MAX 47742 /* maximum possible load avg */
|
||||
#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */
|
||||
|
||||
/* Precomputed fixed inverse multiplies for multiplication by y^n */
|
||||
static const u32 runnable_avg_yN_inv[] = {
|
||||
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
|
||||
0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
|
||||
0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
|
||||
0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
|
||||
0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80,
|
||||
0x85aac367, 0x82cd8698,
|
||||
};
|
||||
|
||||
/*
|
||||
* Precomputed \Sum y^k { 1<=k<=n }. These are floor(true_value) to prevent
|
||||
* over-estimates when re-combining.
|
||||
*/
|
||||
static const u32 runnable_avg_yN_sum[] = {
|
||||
0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103,
|
||||
9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082,
|
||||
17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
|
||||
};
|
||||
|
||||
/*
|
||||
* Approximate:
|
||||
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
|
||||
*/
|
||||
static __always_inline u64 decay_load(u64 val, u64 n)
|
||||
{
|
||||
for (; n && val; n--) {
|
||||
val *= 4008;
|
||||
val >>= 12;
|
||||
unsigned int local_n;
|
||||
|
||||
if (!n)
|
||||
return val;
|
||||
else if (unlikely(n > LOAD_AVG_PERIOD * 63))
|
||||
return 0;
|
||||
|
||||
/* after bounds checking we can collapse to 32-bit */
|
||||
local_n = n;
|
||||
|
||||
/*
|
||||
* As y^PERIOD = 1/2, we can combine
|
||||
* y^n = 1/2^(n/PERIOD) * k^(n%PERIOD)
|
||||
* With a look-up table which covers k^n (n<PERIOD)
|
||||
*
|
||||
* To achieve constant time decay_load.
|
||||
*/
|
||||
if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
|
||||
val >>= local_n / LOAD_AVG_PERIOD;
|
||||
local_n %= LOAD_AVG_PERIOD;
|
||||
}
|
||||
|
||||
return val;
|
||||
val *= runnable_avg_yN_inv[local_n];
|
||||
/* We don't use SRR here since we always want to round down. */
|
||||
return val >> 32;
|
||||
}
|
||||
|
||||
/*
|
||||
* For updates fully spanning n periods, the contribution to runnable
|
||||
* average will be: \Sum 1024*y^n
|
||||
*
|
||||
* We can compute this reasonably efficiently by combining:
|
||||
* y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for n <PERIOD}
|
||||
*/
|
||||
static u32 __compute_runnable_contrib(u64 n)
|
||||
{
|
||||
u32 contrib = 0;
|
||||
|
||||
if (likely(n <= LOAD_AVG_PERIOD))
|
||||
return runnable_avg_yN_sum[n];
|
||||
else if (unlikely(n >= LOAD_AVG_MAX_N))
|
||||
return LOAD_AVG_MAX;
|
||||
|
||||
/* Compute \Sum k^n combining precomputed values for k^i, \Sum k^j */
|
||||
do {
|
||||
contrib /= 2; /* y^LOAD_AVG_PERIOD = 1/2 */
|
||||
contrib += runnable_avg_yN_sum[LOAD_AVG_PERIOD];
|
||||
|
||||
n -= LOAD_AVG_PERIOD;
|
||||
} while (n > LOAD_AVG_PERIOD);
|
||||
|
||||
contrib = decay_load(contrib, n);
|
||||
return contrib + runnable_avg_yN_sum[n];
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -929,7 +1004,8 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
|
|||
struct sched_avg *sa,
|
||||
int runnable)
|
||||
{
|
||||
u64 delta;
|
||||
u64 delta, periods;
|
||||
u32 runnable_contrib;
|
||||
int delta_w, decayed = 0;
|
||||
|
||||
delta = now - sa->last_runnable_update;
|
||||
|
@ -963,25 +1039,26 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
|
|||
* period and accrue it.
|
||||
*/
|
||||
delta_w = 1024 - delta_w;
|
||||
BUG_ON(delta_w > delta);
|
||||
do {
|
||||
if (runnable)
|
||||
sa->runnable_avg_sum += delta_w;
|
||||
sa->runnable_avg_period += delta_w;
|
||||
if (runnable)
|
||||
sa->runnable_avg_sum += delta_w;
|
||||
sa->runnable_avg_period += delta_w;
|
||||
|
||||
/*
|
||||
* Remainder of delta initiates a new period, roll over
|
||||
* the previous.
|
||||
*/
|
||||
sa->runnable_avg_sum =
|
||||
decay_load(sa->runnable_avg_sum, 1);
|
||||
sa->runnable_avg_period =
|
||||
decay_load(sa->runnable_avg_period, 1);
|
||||
delta -= delta_w;
|
||||
|
||||
delta -= delta_w;
|
||||
/* New period is empty */
|
||||
delta_w = 1024;
|
||||
} while (delta >= 1024);
|
||||
/* Figure out how many additional periods this update spans */
|
||||
periods = delta / 1024;
|
||||
delta %= 1024;
|
||||
|
||||
sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
|
||||
periods + 1);
|
||||
sa->runnable_avg_period = decay_load(sa->runnable_avg_period,
|
||||
periods + 1);
|
||||
|
||||
/* Efficiently calculate \sum (1..n_period) 1024*y^i */
|
||||
runnable_contrib = __compute_runnable_contrib(periods);
|
||||
if (runnable)
|
||||
sa->runnable_avg_sum += runnable_contrib;
|
||||
sa->runnable_avg_period += runnable_contrib;
|
||||
}
|
||||
|
||||
/* Remainder of delta accrued against u_0` */
|
||||
|
|
Loading…
Reference in New Issue