bcache: Clean up bch_get_congested()
There are a few nits in this function. They could in theory all be separate patches, but that's probably taking small commits too far. 1) I added a brief comment saying what it does. 2) I like to declare pointer parameters "const" where possible for documentation reasons. 3) It uses bitmap_weight(&rand, BITS_PER_LONG) to compute the Hamming weight of a 32-bit random number (giving a random integer with mean 16 and variance 8). Passing by reference in a 64-bit variable is silly; just use hweight32(). 4) Its helper function fract_exp_two is unnecessarily tangled. Gcc can optimize the multiply by (1 << x) to a shift, but it can be written in a much more straightforward way at the cost of one more bit of internal precision. Some analysis reveals that this bit is always available. This shrinks the object code for fract_exp_two(x, 6) from 23 bytes: 0000000000000000 <foo1>: 0: 89 f9 mov %edi,%ecx 2: c1 e9 06 shr $0x6,%ecx 5: b8 01 00 00 00 mov $0x1,%eax a: d3 e0 shl %cl,%eax c: 83 e7 3f and $0x3f,%edi f: d3 e7 shl %cl,%edi 11: c1 ef 06 shr $0x6,%edi 14: 01 f8 add %edi,%eax 16: c3 retq To 19: 0000000000000017 <foo2>: 17: 89 f8 mov %edi,%eax 19: 83 e0 3f and $0x3f,%eax 1c: 83 c0 40 add $0x40,%eax 1f: 89 f9 mov %edi,%ecx 21: c1 e9 06 shr $0x6,%ecx 24: d3 e0 shl %cl,%eax 26: c1 e8 06 shr $0x6,%eax 29: c3 retq (Verified with 0 <= frac_bits <= 8, 0 <= x < 16<<frac_bits; both versions produce the same output.) 5) And finally, the call to bch_get_congested() in check_should_bypass() is separated from the use of the value by multiple tests which could moot the need to compute it. Move the computation down to where it's needed. This also saves a local register to hold the computed value. Signed-off-by: George Spelvin <lkml@sdf.org> Signed-off-by: Coly Li <colyli@suse.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
792732d985
commit
3a3947271c
|
@ -329,12 +329,13 @@ void bch_data_insert(struct closure *cl)
|
||||||
bch_data_insert_start(cl);
|
bch_data_insert_start(cl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Congested? */
|
/*
|
||||||
|
* Congested? Return 0 (not congested) or the limit (in sectors)
|
||||||
unsigned int bch_get_congested(struct cache_set *c)
|
* beyond which we should bypass the cache due to congestion.
|
||||||
|
*/
|
||||||
|
unsigned int bch_get_congested(const struct cache_set *c)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
long rand;
|
|
||||||
|
|
||||||
if (!c->congested_read_threshold_us &&
|
if (!c->congested_read_threshold_us &&
|
||||||
!c->congested_write_threshold_us)
|
!c->congested_write_threshold_us)
|
||||||
|
@ -353,8 +354,7 @@ unsigned int bch_get_congested(struct cache_set *c)
|
||||||
if (i > 0)
|
if (i > 0)
|
||||||
i = fract_exp_two(i, 6);
|
i = fract_exp_two(i, 6);
|
||||||
|
|
||||||
rand = get_random_int();
|
i -= hweight32(get_random_u32());
|
||||||
i -= bitmap_weight(&rand, BITS_PER_LONG);
|
|
||||||
|
|
||||||
return i > 0 ? i : 1;
|
return i > 0 ? i : 1;
|
||||||
}
|
}
|
||||||
|
@ -376,7 +376,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
|
||||||
{
|
{
|
||||||
struct cache_set *c = dc->disk.c;
|
struct cache_set *c = dc->disk.c;
|
||||||
unsigned int mode = cache_mode(dc);
|
unsigned int mode = cache_mode(dc);
|
||||||
unsigned int sectors, congested = bch_get_congested(c);
|
unsigned int sectors, congested;
|
||||||
struct task_struct *task = current;
|
struct task_struct *task = current;
|
||||||
struct io *i;
|
struct io *i;
|
||||||
|
|
||||||
|
@ -412,6 +412,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
|
||||||
goto rescale;
|
goto rescale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
congested = bch_get_congested(c);
|
||||||
if (!congested && !dc->sequential_cutoff)
|
if (!congested && !dc->sequential_cutoff)
|
||||||
goto rescale;
|
goto rescale;
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ struct data_insert_op {
|
||||||
BKEY_PADDED(replace_key);
|
BKEY_PADDED(replace_key);
|
||||||
};
|
};
|
||||||
|
|
||||||
unsigned int bch_get_congested(struct cache_set *c);
|
unsigned int bch_get_congested(const struct cache_set *c);
|
||||||
void bch_data_insert(struct closure *cl);
|
void bch_data_insert(struct closure *cl);
|
||||||
|
|
||||||
void bch_cached_dev_request_init(struct cached_dev *dc);
|
void bch_cached_dev_request_init(struct cached_dev *dc);
|
||||||
|
|
|
@ -560,17 +560,29 @@ static inline uint64_t bch_crc64_update(uint64_t crc,
|
||||||
return crc;
|
return crc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Does linear interpolation between powers of two */
|
/*
|
||||||
|
* A stepwise-linear pseudo-exponential. This returns 1 << (x >>
|
||||||
|
* frac_bits), with the less-significant bits filled in by linear
|
||||||
|
* interpolation.
|
||||||
|
*
|
||||||
|
* This can also be interpreted as a floating-point number format,
|
||||||
|
* where the low frac_bits are the mantissa (with implicit leading
|
||||||
|
* 1 bit), and the more significant bits are the exponent.
|
||||||
|
* The return value is 1.mantissa * 2^exponent.
|
||||||
|
*
|
||||||
|
* The way this is used, fract_bits is 6 and the largest possible
|
||||||
|
* input is CONGESTED_MAX-1 = 1023 (exponent 16, mantissa 0x1.fc),
|
||||||
|
* so the maximum output is 0x1fc00.
|
||||||
|
*/
|
||||||
static inline unsigned int fract_exp_two(unsigned int x,
|
static inline unsigned int fract_exp_two(unsigned int x,
|
||||||
unsigned int fract_bits)
|
unsigned int fract_bits)
|
||||||
{
|
{
|
||||||
unsigned int fract = x & ~(~0 << fract_bits);
|
unsigned int mantissa = 1 << fract_bits; /* Implicit bit */
|
||||||
|
|
||||||
x >>= fract_bits;
|
mantissa += x & (mantissa - 1);
|
||||||
x = 1 << x;
|
x >>= fract_bits; /* The exponent */
|
||||||
x += (x * fract) >> fract_bits;
|
/* Largest intermediate value 0x7f0000 */
|
||||||
|
return mantissa << x >> fract_bits;
|
||||||
return x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_bio_map(struct bio *bio, void *base);
|
void bch_bio_map(struct bio *bio, void *base);
|
||||||
|
|
Loading…
Reference in New Issue