bcache: Clean up bch_get_congested()

There are a few nits in this function. They could in theory all be separate patches, but that's probably taking small commits too far. 1) I added a brief comment saying what it does. 2) I like to declare pointer parameters "const" where possible for documentation reasons. 3) It uses bitmap_weight(&rand, BITS_PER_LONG) to compute the Hamming weight of a 32-bit random number (giving a random integer with mean 16 and variance 8). Passing by reference in a 64-bit variable is silly; just use hweight32(). 4) Its helper function fract_exp_two is unnecessarily tangled. Gcc can optimize the multiply by (1 << x) to a shift, but it can be written in a much more straightforward way at the cost of one more bit of internal precision. Some analysis reveals that this bit is always available. This shrinks the object code for fract_exp_two(x, 6) from 23 bytes: 0000000000000000 <foo1>: 0: 89 f9 mov %edi,%ecx 2: c1 e9 06 shr $0x6,%ecx 5: b8 01 00 00 00 mov $0x1,%eax a: d3 e0 shl %cl,%eax c: 83 e7 3f and $0x3f,%edi f: d3 e7 shl %cl,%edi 11: c1 ef 06 shr $0x6,%edi 14: 01 f8 add %edi,%eax 16: c3 retq To 19: 0000000000000017 <foo2>: 17: 89 f8 mov %edi,%eax 19: 83 e0 3f and $0x3f,%eax 1c: 83 c0 40 add $0x40,%eax 1f: 89 f9 mov %edi,%ecx 21: c1 e9 06 shr $0x6,%ecx 24: d3 e0 shl %cl,%eax 26: c1 e8 06 shr $0x6,%eax 29: c3 retq (Verified with 0 <= frac_bits <= 8, 0 <= x < 16<<frac_bits; both versions produce the same output.) 5) And finally, the call to bch_get_congested() in check_should_bypass() is separated from the use of the value by multiple tests which could moot the need to compute it. Move the computation down to where it's needed. This also saves a local register to hold the computed value. Signed-off-by: George Spelvin <lkml@sdf.org> Signed-off-by: Coly Li <colyli@suse.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2019-04-25 00:48:30 +08:00 · 2019-04-25 00:48:30 +08:00 · 3a3947271c
parent 792732d985
commit 3a3947271c
3 changed files with 28 additions and 15 deletions
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@ -329,12 +329,13 @@ void bch_data_insert(struct closure *cl)
 	bch_data_insert_start(cl);
 }

-/* Congested? */
-
-unsigned int bch_get_congested(struct cache_set *c)
+/*
+ * Congested?  Return 0 (not congested) or the limit (in sectors)
+ * beyond which we should bypass the cache due to congestion.
+ */
+unsigned int bch_get_congested(const struct cache_set *c)
 {
 	int i;
-	long rand;

 	if (!c->congested_read_threshold_us &&
 	    !c->congested_write_threshold_us)
@ -353,8 +354,7 @@ unsigned int bch_get_congested(struct cache_set *c)
 	if (i > 0)
 		i = fract_exp_two(i, 6);

-	rand = get_random_int();
-	i -= bitmap_weight(&rand, BITS_PER_LONG);
+	i -= hweight32(get_random_u32());

 	return i > 0 ? i : 1;
 }
@ -376,7 +376,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 {
 	struct cache_set *c = dc->disk.c;
 	unsigned int mode = cache_mode(dc);
-	unsigned int sectors, congested = bch_get_congested(c);
+	unsigned int sectors, congested;
 	struct task_struct *task = current;
 	struct io *i;

@ -412,6 +412,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 			goto rescale;
 	}

+	congested = bch_get_congested(c);
 	if (!congested && !dc->sequential_cutoff)
 		goto rescale;

--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@ -33,7 +33,7 @@ struct data_insert_op {
 	BKEY_PADDED(replace_key);
 };

-unsigned int bch_get_congested(struct cache_set *c);
+unsigned int bch_get_congested(const struct cache_set *c);
 void bch_data_insert(struct closure *cl);

 void bch_cached_dev_request_init(struct cached_dev *dc);
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@ -560,17 +560,29 @@ static inline uint64_t bch_crc64_update(uint64_t crc,
 	return crc;
 }

-/* Does linear interpolation between powers of two */
+/*
+ * A stepwise-linear pseudo-exponential.  This returns 1 << (x >>
+ * frac_bits), with the less-significant bits filled in by linear
+ * interpolation.
+ *
+ * This can also be interpreted as a floating-point number format,
+ * where the low frac_bits are the mantissa (with implicit leading
+ * 1 bit), and the more significant bits are the exponent.
+ * The return value is 1.mantissa * 2^exponent.
+ *
+ * The way this is used, fract_bits is 6 and the largest possible
+ * input is CONGESTED_MAX-1 = 1023 (exponent 16, mantissa 0x1.fc),
+ * so the maximum output is 0x1fc00.
+ */
 static inline unsigned int fract_exp_two(unsigned int x,
 					 unsigned int fract_bits)
 {
-	unsigned int fract = x & ~(~0 << fract_bits);
+	unsigned int mantissa = 1 << fract_bits;	/* Implicit bit */

-	x >>= fract_bits;
-	x   = 1 << x;
-	x  += (x * fract) >> fract_bits;
-
-	return x;
+	mantissa += x & (mantissa - 1);
+	x >>= fract_bits;	/* The exponent */
+	/* Largest intermediate value 0x7f0000 */
+	return mantissa << x >> fract_bits;
 }

 void bch_bio_map(struct bio *bio, void *base);