[GFS2] Faster gfs2_bitfit algorithm
This version of the gfs2_bitfit algorithm includes the latest suggestions from Steve Whitehouse. It is typically eight to ten times faster than the version we're using today. If there is a lot of metadata mixed in (lots of small files) the algorithm is often 15 times faster, and given the right conditions, I've seen peaks of 20 times faster. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
parent
d82661d969
commit
1f466a47e8
|
@ -14,6 +14,7 @@
|
|||
#include <linux/fs.h>
|
||||
#include <linux/gfs2_ondisk.h>
|
||||
#include <linux/lm_interface.h>
|
||||
#include <linux/prefetch.h>
|
||||
|
||||
#include "gfs2.h"
|
||||
#include "incore.h"
|
||||
|
@ -33,6 +34,16 @@
|
|||
#define BFITNOENT ((u32)~0)
|
||||
#define NO_BLOCK ((u64)~0)
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
#define LBITMASK (0x55555555UL)
|
||||
#define LBITSKIP55 (0x55555555UL)
|
||||
#define LBITSKIP00 (0x00000000UL)
|
||||
#else
|
||||
#define LBITMASK (0x5555555555555555UL)
|
||||
#define LBITSKIP55 (0x5555555555555555UL)
|
||||
#define LBITSKIP00 (0x0000000000000000UL)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* These routines are used by the resource group routines (rgrp.c)
|
||||
* to keep track of block allocation. Each block is represented by two
|
||||
|
@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
|
|||
static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
|
||||
u8 old_state)
|
||||
{
|
||||
const u8 *byte;
|
||||
u32 blk = goal;
|
||||
unsigned int bit, bitlong;
|
||||
const unsigned long *plong;
|
||||
#if BITS_PER_LONG == 32
|
||||
const unsigned long plong55 = 0x55555555;
|
||||
#else
|
||||
const unsigned long plong55 = 0x5555555555555555;
|
||||
#endif
|
||||
const u8 *byte, *start, *end;
|
||||
int bit, startbit;
|
||||
u32 g1, g2, misaligned;
|
||||
unsigned long *plong;
|
||||
unsigned long lskipval;
|
||||
|
||||
byte = buffer + (goal / GFS2_NBBY);
|
||||
plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY));
|
||||
bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
|
||||
bitlong = bit;
|
||||
|
||||
while (byte < buffer + buflen) {
|
||||
|
||||
if (bitlong == 0 && old_state == 0 && *plong == plong55) {
|
||||
plong++;
|
||||
byte += sizeof(unsigned long);
|
||||
blk += sizeof(unsigned long) * GFS2_NBBY;
|
||||
continue;
|
||||
lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
|
||||
g1 = (goal / GFS2_NBBY);
|
||||
start = buffer + g1;
|
||||
byte = start;
|
||||
end = buffer + buflen;
|
||||
g2 = ALIGN(g1, sizeof(unsigned long));
|
||||
plong = (unsigned long *)(buffer + g2);
|
||||
startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
|
||||
misaligned = g2 - g1;
|
||||
if (!misaligned)
|
||||
goto ulong_aligned;
|
||||
/* parse the bitmap a byte at a time */
|
||||
misaligned:
|
||||
while (byte < end) {
|
||||
if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
|
||||
return goal +
|
||||
(((byte - start) * GFS2_NBBY) +
|
||||
((bit - startbit) >> 1));
|
||||
}
|
||||
if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
|
||||
return blk;
|
||||
bit += GFS2_BIT_SIZE;
|
||||
if (bit >= 8) {
|
||||
if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
|
||||
bit = 0;
|
||||
byte++;
|
||||
misaligned--;
|
||||
if (!misaligned) {
|
||||
plong = (unsigned long *)byte;
|
||||
goto ulong_aligned;
|
||||
}
|
||||
}
|
||||
bitlong += GFS2_BIT_SIZE;
|
||||
if (bitlong >= sizeof(unsigned long) * 8) {
|
||||
bitlong = 0;
|
||||
plong++;
|
||||
}
|
||||
|
||||
blk++;
|
||||
}
|
||||
return BFITNOENT;
|
||||
|
||||
/* parse the bitmap a unsigned long at a time */
|
||||
ulong_aligned:
|
||||
/* Stop at "end - 1" or else prefetch can go past the end and segfault.
|
||||
We could "if" it but we'd lose some of the performance gained.
|
||||
This way will only slow down searching the very last 4/8 bytes
|
||||
depending on architecture. I've experimented with several ways
|
||||
of writing this section such as using an else before the goto
|
||||
but this one seems to be the fastest. */
|
||||
while ((unsigned char *)plong < end - 1) {
|
||||
prefetch(plong + 1);
|
||||
if (((*plong) & LBITMASK) != lskipval)
|
||||
break;
|
||||
plong++;
|
||||
}
|
||||
if ((unsigned char *)plong < end) {
|
||||
byte = (const u8 *)plong;
|
||||
misaligned += sizeof(unsigned long) - 1;
|
||||
goto misaligned;
|
||||
}
|
||||
return BFITNOENT;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue