diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 9b8cb50768c2..b8f1dc0761e4 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -74,16 +74,26 @@ static inline u64 mul_u32_u32(u32 a, u32 b) #else # include -static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div) +/* + * Will generate an #DE when the result doesn't fit u64, could fix with an + * __ex_table[] entry when it becomes an issue. + */ +static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) { u64 q; asm ("mulq %2; divq %3" : "=a" (q) - : "a" (a), "rm" ((u64)mul), "rm" ((u64)div) + : "a" (a), "rm" (mul), "rm" (div) : "rdx"); return q; } +#define mul_u64_u64_div_u64 mul_u64_u64_div_u64 + +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div) +{ + return mul_u64_u64_div_u64(a, mul, div); +} #define mul_u64_u32_div mul_u64_u32_div #endif /* CONFIG_X86_32 */ diff --git a/include/linux/math64.h b/include/linux/math64.h index 11a267413e8e..d097119419e6 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -263,6 +263,8 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) } #endif /* mul_u64_u32_div */ +u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); + #define DIV64_U64_ROUND_UP(ll, d) \ ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ff9435dee1df..5a55d2300452 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -519,50 +519,6 @@ void account_idle_ticks(unsigned long ticks) account_idle_time(cputime); } -/* - * Perform (stime * rtime) / total, but avoid multiplication overflow by - * losing precision when the numbers are big. - */ -static u64 scale_stime(u64 stime, u64 rtime, u64 total) -{ - u64 scaled; - - for (;;) { - /* Make sure "rtime" is the bigger of stime/rtime */ - if (stime > rtime) - swap(rtime, stime); - - /* Make sure 'total' fits in 32 bits */ - if (total >> 32) - goto drop_precision; - - /* Does rtime (and thus stime) fit in 32 bits? */ - if (!(rtime >> 32)) - break; - - /* Can we just balance rtime/stime rather than dropping bits? */ - if (stime >> 31) - goto drop_precision; - - /* We can grow stime and shrink rtime and try to make them both fit */ - stime <<= 1; - rtime >>= 1; - continue; - -drop_precision: - /* We drop from rtime, it has more bits than stime */ - rtime >>= 1; - total >>= 1; - } - - /* - * Make sure gcc understands that this is a 32x32->64 multiply, - * followed by a 64/32->64 divide. - */ - scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); - return scaled; -} - /* * Adjust tick based cputime random precision against scheduler runtime * accounting. @@ -622,7 +578,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, goto update; } - stime = scale_stime(stime, rtime, stime + utime); + stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); update: /* diff --git a/lib/math/div64.c b/lib/math/div64.c index 368ca7fd0d82..3952a07130d8 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -190,3 +190,44 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) return __iter_div_u64_rem(dividend, divisor, remainder); } EXPORT_SYMBOL(iter_div_u64_rem); + +#ifndef mul_u64_u64_div_u64 +u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) +{ + u64 res = 0, div, rem; + int shift; + + /* can a * b overflow ? */ + if (ilog2(a) + ilog2(b) > 62) { + /* + * (b * a) / c is equal to + * + * (b / c) * a + + * (b % c) * a / c + * + * if nothing overflows. Can the 1st multiplication + * overflow? Yes, but we do not care: this can only + * happen if the end result can't fit in u64 anyway. + * + * So the code below does + * + * res = (b / c) * a; + * b = b % c; + */ + div = div64_u64_rem(b, c, &rem); + res = div * a; + b = rem; + + shift = ilog2(a) + ilog2(b) - 62; + if (shift > 0) { + /* drop precision */ + b >>= shift; + c >>= shift; + if (!c) + return res; + } + } + + return res + div64_u64(a * b, c); +} +#endif