We've got ten GFS2 patches for this merge window.

1. Andreas Gruenbacher wrote a patch to replace the deprecated
    call to rhashtable_walk_init with rhashtable_walk_enter.
 2. Andreas also wrote a patch to eliminate redundant code in
    two of our debugfs sequence files.
 3. Andreas also cleaned up the rhashtable key ugliness Linus
    pointed out during this cycle, following Linus's suggestions.
 4. Andreas also wrote a patch to take advantage of his new
    function rhashtable_lookup_get_insert_fast. This makes glock
    lookup faster and more bullet-proof.
 5. Andreas also wrote a patch to revert a patch in the evict
    path that caused occasional deadlocks, and is no longer
    needed.
 6. Andrew Price wrote a patch to re-enable fallocate for the
    rindex system file to enable gfs2_grow to grow properly on
    secondary file system grow operations.
 7. I wrote a patch to initialize an inode number field to make
    certain kernel trace points more understandable.
 8. I also wrote a patch that makes GFS2 file system "withdraw"
    work more like it should by ignoring operations after a
    withdraw that would formerly cause a BUG() and kernel panic.
 9. I also reworked the entire truncate/delete algorithm,
    scrapping the old recursive algorithm in favor of a new
    non-recursive algorithm. This was done for performance:
    This way, GFS2 no longer needs to lock multiple resource
    groups while doing truncates and deletes of files that cross
    multiple resource group boundaries, allowing for better
    parallelism. It also solves a problem whereby deleting large
    files would request a large chunk of kernel memory, which
    resulted in a get_page_from_freelist warning.
 10. Due to a regression found during testing, I added a new
     patch to correct "GFS2: Prevent BUG from occurring when
     normal Withdraws occur".
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJZDNnaAAoJENeLYdPf93o7B7kIAJzwz7vVDVg2TpWVhMmXIWhf
 rZx3Gth5F0h+ZHddW7HzTLg+64XQ5//GyDD3UDtCpkhl5SJH+nt3juHyPJlRwioT
 0ua4SjyKLQSoJJVAEgAwu42QjORTXab7NjYn5LEhvRc0Gg/El9WGU+ZgmP2/aAvf
 KE2u/IEYNDkoJNS3Oqc7shajAyLYda6wCAASs/1ZGt9u48m/o/I23Zd7wr7EOkzw
 rd3gB0x80cJqDAB5IcymGOm111Tg4g34LwsRuyMnWE3H1jOgV+J515FVHEIvZuPq
 Wl9X7V8CzktI7nyLKVnZhpuv5JzyMq/vOPiD01tTFx8Oy1JCRezjmATXFjW/zIo=
 =MX3c
 -----END PGP SIGNATURE-----

Merge tag 'gfs2-4.12.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2

Pull GFS2 updates from Bob Peterson:
 "We've got ten GFS2 patches for this merge window.

   - Andreas Gruenbacher wrote a patch to replace the deprecated call to
     rhashtable_walk_init with rhashtable_walk_enter.

   - Andreas also wrote a patch to eliminate redundant code in two of
     our debugfs sequence files.

   - Andreas also cleaned up the rhashtable key ugliness Linus pointed
     out during this cycle, following Linus's suggestions.

   - Andreas also wrote a patch to take advantage of his new function
     rhashtable_lookup_get_insert_fast. This makes glock lookup faster
     and more bullet-proof.

   - Andreas also wrote a patch to revert a patch in the evict path that
     caused occasional deadlocks, and is no longer needed.

   - Andrew Price wrote a patch to re-enable fallocate for the rindex
     system file to enable gfs2_grow to grow properly on secondary file
     system grow operations.

   - I wrote a patch to initialize an inode number field to make certain
     kernel trace points more understandable.

   - I also wrote a patch that makes GFS2 file system "withdraw" work
     more like it should by ignoring operations after a withdraw that
     would formerly cause a BUG() and kernel panic.

   - I also reworked the entire truncate/delete algorithm, scrapping the
     old recursive algorithm in favor of a new non-recursive algorithm.
     This was done for performance: This way, GFS2 no longer needs to
     lock multiple resource groups while doing truncates and deletes of
     files that cross multiple resource group boundaries, allowing for
     better parallelism. It also solves a problem whereby deleting large
     files would request a large chunk of kernel memory, which resulted
     in a get_page_from_freelist warning.

   - Due to a regression found during testing, I added a new patch to
     correct 'GFS2: Prevent BUG from occurring when normal Withdraws
     occur'."

* tag 'gfs2-4.12.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2:
  GFS2: Allow glocks to be unlocked after withdraw
  GFS2: Non-recursive delete
  gfs2: Re-enable fallocate for the rindex
  Revert "GFS2: Wait for iopen glock dequeues"
  gfs2: Switch to rhashtable_lookup_get_insert_fast
  GFS2: Temporarily zero i_no_addr when creating a dinode
  gfs2: Don't pack struct lm_lockname
  gfs2: Deduplicate gfs2_{glocks,glstats}_open
  gfs2: Replace rhashtable_walk_init with rhashtable_walk_enter
  GFS2: Prevent BUG from occurring when normal Withdraws occur
This commit is contained in:
Linus Torvalds 2017-05-05 13:40:20 -07:00
commit 1a5fb64fee
8 changed files with 518 additions and 347 deletions

View File

@ -38,11 +38,6 @@ struct metapath {
__u16 mp_list[GFS2_MAX_META_HEIGHT];
};
struct strip_mine {
int sm_first;
unsigned int sm_height;
};
/**
* gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
* @ip: the inode
@ -252,6 +247,19 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp)
return 1;
}
/**
* metaptr1 - Return the first possible metadata pointer in a metaath buffer
* @height: The metadata height (0 = dinode)
* @mp: The metapath
*/
static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
{
struct buffer_head *bh = mp->mp_bh[height];
if (height == 0)
return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
}
/**
* metapointer - Return pointer to start of metadata in a buffer
* @height: The metadata height (0 = dinode)
@ -264,10 +272,8 @@ static inline unsigned int metapath_branch_start(const struct metapath *mp)
static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
{
struct buffer_head *bh = mp->mp_bh[height];
unsigned int head_size = (height > 0) ?
sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
__be64 *p = metaptr1(height, mp);
return p + mp->mp_list[height];
}
static void gfs2_metapath_ra(struct gfs2_glock *gl,
@ -295,6 +301,23 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
}
}
/**
* lookup_mp_height - helper function for lookup_metapath
* @ip: the inode
* @mp: the metapath
* @h: the height which needs looking up
*/
static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h)
{
__be64 *ptr = metapointer(h, mp);
u64 dblock = be64_to_cpu(*ptr);
if (!dblock)
return h + 1;
return gfs2_meta_indirect_buffer(ip, h + 1, dblock, &mp->mp_bh[h + 1]);
}
/**
* lookup_metapath - Walk the metadata tree to a specific point
* @ip: The inode
@ -316,17 +339,10 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
{
unsigned int end_of_metadata = ip->i_height - 1;
unsigned int x;
__be64 *ptr;
u64 dblock;
int ret;
for (x = 0; x < end_of_metadata; x++) {
ptr = metapointer(x, mp);
dblock = be64_to_cpu(*ptr);
if (!dblock)
return x + 1;
ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]);
ret = lookup_mp_height(ip, mp, x);
if (ret)
return ret;
}
@ -334,6 +350,35 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
return ip->i_height;
}
/**
* fillup_metapath - fill up buffers for the metadata path to a specific height
* @ip: The inode
* @mp: The metapath
* @h: The height to which it should be mapped
*
* Similar to lookup_metapath, but does lookups for a range of heights
*
* Returns: error or height of metadata tree
*/
static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
{
unsigned int start_h = h - 1;
int ret;
if (h) {
/* find the first buffer we need to look up. */
while (start_h > 0 && mp->mp_bh[start_h] == NULL)
start_h--;
for (; start_h < h; start_h++) {
ret = lookup_mp_height(ip, mp, start_h);
if (ret)
return ret;
}
}
return ip->i_height;
}
static inline void release_metapath(struct metapath *mp)
{
int i;
@ -422,6 +467,13 @@ enum alloc_state {
/* ALLOC_UNSTUFF = 3, TBD and rather complicated */
};
static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt)
{
if (hgt)
return sdp->sd_inptrs;
return sdp->sd_diptrs;
}
/**
* gfs2_bmap_alloc - Build a metadata tree of the requested height
* @inode: The GFS2 inode
@ -620,7 +672,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
BUG_ON(maxlen == 0);
memset(mp.mp_bh, 0, sizeof(mp.mp_bh));
memset(&mp, 0, sizeof(mp));
bmap_lock(ip, create);
clear_buffer_mapped(bh_map);
clear_buffer_new(bh_map);
@ -701,252 +753,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
return ret;
}
/**
* do_strip - Look for a layer a particular layer of the file and strip it off
* @ip: the inode
* @dibh: the dinode buffer
* @bh: A buffer of pointers
* @top: The first pointer in the buffer
* @bottom: One more than the last pointer
* @height: the height this buffer is at
* @sm: a pointer to a struct strip_mine
*
* Returns: errno
*/
static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
struct buffer_head *bh, __be64 *top, __be64 *bottom,
unsigned int height, struct strip_mine *sm)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrp_list rlist;
struct gfs2_trans *tr;
u64 bn, bstart;
u32 blen, btotal;
__be64 *p;
unsigned int rg_blocks = 0;
int metadata;
unsigned int revokes = 0;
int x;
int error;
int jblocks_rqsted;
error = gfs2_rindex_update(sdp);
if (error)
return error;
if (!*top)
sm->sm_first = 0;
if (height != sm->sm_height)
return 0;
if (sm->sm_first) {
top++;
sm->sm_first = 0;
}
metadata = (height != ip->i_height - 1);
if (metadata)
revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
else if (ip->i_depth)
revokes = sdp->sd_inptrs;
memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
bstart = 0;
blen = 0;
for (p = top; p < bottom; p++) {
if (!*p)
continue;
bn = be64_to_cpu(*p);
if (bstart + blen == bn)
blen++;
else {
if (bstart)
gfs2_rlist_add(ip, &rlist, bstart);
bstart = bn;
blen = 1;
}
}
if (bstart)
gfs2_rlist_add(ip, &rlist, bstart);
else
goto out; /* Nothing to do */
gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE);
for (x = 0; x < rlist.rl_rgrps; x++) {
struct gfs2_rgrpd *rgd;
rgd = rlist.rl_ghs[x].gh_gl->gl_object;
rg_blocks += rgd->rd_length;
}
error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
if (error)
goto out_rlist;
if (gfs2_rs_active(&ip->i_res)) /* needs to be done with the rgrp glock held */
gfs2_rs_deltree(&ip->i_res);
restart:
jblocks_rqsted = rg_blocks + RES_DINODE +
RES_INDIRECT + RES_STATFS + RES_QUOTA +
gfs2_struct2blk(sdp, revokes, sizeof(u64));
if (jblocks_rqsted > atomic_read(&sdp->sd_log_thresh2))
jblocks_rqsted = atomic_read(&sdp->sd_log_thresh2);
error = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
if (error)
goto out_rg_gunlock;
tr = current->journal_info;
down_write(&ip->i_rw_mutex);
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_trans_add_meta(ip->i_gl, bh);
bstart = 0;
blen = 0;
btotal = 0;
for (p = top; p < bottom; p++) {
if (!*p)
continue;
/* check for max reasonable journal transaction blocks */
if (tr->tr_num_buf_new + RES_STATFS +
RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
if (rg_blocks >= tr->tr_num_buf_new)
rg_blocks -= tr->tr_num_buf_new;
else
rg_blocks = 0;
break;
}
bn = be64_to_cpu(*p);
if (bstart + blen == bn)
blen++;
else {
if (bstart) {
__gfs2_free_blocks(ip, bstart, blen, metadata);
btotal += blen;
}
bstart = bn;
blen = 1;
}
*p = 0;
gfs2_add_inode_blocks(&ip->i_inode, -1);
}
if (p == bottom)
rg_blocks = 0;
if (bstart) {
__gfs2_free_blocks(ip, bstart, blen, metadata);
btotal += blen;
}
gfs2_statfs_change(sdp, 0, +btotal, 0);
gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
ip->i_inode.i_gid);
ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
gfs2_dinode_out(ip, dibh->b_data);
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
if (rg_blocks)
goto restart;
out_rg_gunlock:
gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
out_rlist:
gfs2_rlist_free(&rlist);
out:
return error;
}
/**
* recursive_scan - recursively scan through the end of a file
* @ip: the inode
* @dibh: the dinode buffer
* @mp: the path through the metadata to the point to start
* @height: the height the recursion is at
* @block: the indirect block to look at
* @first: 1 if this is the first block
* @sm: data opaque to this function to pass to @bc
*
* When this is first called @height and @block should be zero and
* @first should be 1.
*
* Returns: errno
*/
static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
struct metapath *mp, unsigned int height,
u64 block, int first, struct strip_mine *sm)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *bh = NULL;
__be64 *top, *bottom;
u64 bn;
int error;
int mh_size = sizeof(struct gfs2_meta_header);
if (!height) {
error = gfs2_meta_inode_buffer(ip, &bh);
if (error)
return error;
dibh = bh;
top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
} else {
error = gfs2_meta_indirect_buffer(ip, height, block, &bh);
if (error)
return error;
top = (__be64 *)(bh->b_data + mh_size) +
(first ? mp->mp_list[height] : 0);
bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
}
error = do_strip(ip, dibh, bh, top, bottom, height, sm);
if (error)
goto out;
if (height < ip->i_height - 1) {
gfs2_metapath_ra(ip->i_gl, bh, top);
for (; top < bottom; top++, first = 0) {
if (!*top)
continue;
bn = be64_to_cpu(*top);
error = recursive_scan(ip, dibh, mp, height + 1, bn,
first, sm);
if (error)
break;
}
}
out:
brelse(bh);
return error;
}
/**
* gfs2_block_truncate_page - Deal with zeroing out data for truncate
*
@ -1106,41 +912,406 @@ out:
return error;
}
static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
/**
* sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
* @ip: inode
* @rg_gh: holder of resource group glock
* @mp: current metapath fully populated with buffers
* @btotal: place to keep count of total blocks freed
* @hgt: height we're processing
* @first: true if this is the first call to this function for this height
*
* We sweep a metadata buffer (provided by the metapath) for blocks we need to
* free, and free them all. However, we do it one rgrp at a time. If this
* block has references to multiple rgrps, we break it into individual
* transactions. This allows other processes to use the rgrps while we're
* focused on a single one, for better concurrency / performance.
* At every transaction boundary, we rewrite the inode into the journal.
* That way the bitmaps are kept consistent with the inode and we can recover
* if we're interrupted by power-outages.
*
* Returns: 0, or return code if an error occurred.
* *btotal has the total number of blocks freed
*/
static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
const struct metapath *mp, u32 *btotal, int hgt,
bool preserve1)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
unsigned int height = ip->i_height;
u64 lblock;
struct metapath mp;
int error;
struct gfs2_rgrpd *rgd;
struct gfs2_trans *tr;
struct buffer_head *bh = mp->mp_bh[hgt];
__be64 *top, *bottom, *p;
int blks_outside_rgrp;
u64 bn, bstart, isize_blks;
s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
int meta = ((hgt != ip->i_height - 1) ? 1 : 0);
int ret = 0;
bool buf_in_tr = false; /* buffer was added to transaction */
if (!size)
if (gfs2_metatype_check(sdp, bh,
(hgt ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)))
return -EIO;
more_rgrps:
blks_outside_rgrp = 0;
bstart = 0;
blen = 0;
top = metapointer(hgt, mp); /* first ptr from metapath */
/* If we're keeping some data at the truncation point, we've got to
preserve the metadata tree by adding 1 to the starting metapath. */
if (preserve1)
top++;
bottom = (__be64 *)(bh->b_data + bh->b_size);
for (p = top; p < bottom; p++) {
if (!*p)
continue;
bn = be64_to_cpu(*p);
if (gfs2_holder_initialized(rd_gh)) {
rgd = (struct gfs2_rgrpd *)rd_gh->gh_gl->gl_object;
gfs2_assert_withdraw(sdp,
gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
} else {
rgd = gfs2_blk2rgrpd(sdp, bn, false);
ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
0, rd_gh);
if (ret)
goto out;
/* Must be done with the rgrp glock held: */
if (gfs2_rs_active(&ip->i_res) &&
rgd == ip->i_res.rs_rbm.rgd)
gfs2_rs_deltree(&ip->i_res);
}
if (!rgrp_contains_block(rgd, bn)) {
blks_outside_rgrp++;
continue;
}
/* The size of our transactions will be unknown until we
actually process all the metadata blocks that relate to
the rgrp. So we estimate. We know it can't be more than
the dinode's i_blocks and we don't want to exceed the
journal flush threshold, sd_log_thresh2. */
if (current->journal_info == NULL) {
unsigned int jblocks_rqsted, revokes;
jblocks_rqsted = rgd->rd_length + RES_DINODE +
RES_INDIRECT;
isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
jblocks_rqsted +=
atomic_read(&sdp->sd_log_thresh2);
else
jblocks_rqsted += isize_blks;
revokes = jblocks_rqsted;
if (meta)
revokes += hptrs(sdp, hgt);
else if (ip->i_depth)
revokes += sdp->sd_inptrs;
ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
if (ret)
goto out_unlock;
down_write(&ip->i_rw_mutex);
}
/* check if we will exceed the transaction blocks requested */
tr = current->journal_info;
if (tr->tr_num_buf_new + RES_STATFS +
RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
/* We set blks_outside_rgrp to ensure the loop will
be repeated for the same rgrp, but with a new
transaction. */
blks_outside_rgrp++;
/* This next part is tricky. If the buffer was added
to the transaction, we've already set some block
pointers to 0, so we better follow through and free
them, or we will introduce corruption (so break).
This may be impossible, or at least rare, but I
decided to cover the case regardless.
If the buffer was not added to the transaction
(this call), doing so would exceed our transaction
size, so we need to end the transaction and start a
new one (so goto). */
if (buf_in_tr)
break;
goto out_unlock;
}
gfs2_trans_add_meta(ip->i_gl, bh);
buf_in_tr = true;
*p = 0;
if (bstart + blen == bn) {
blen++;
continue;
}
if (bstart) {
__gfs2_free_blocks(ip, bstart, (u32)blen, meta);
(*btotal) += blen;
gfs2_add_inode_blocks(&ip->i_inode, -blen);
}
bstart = bn;
blen = 1;
}
if (bstart) {
__gfs2_free_blocks(ip, bstart, (u32)blen, meta);
(*btotal) += blen;
gfs2_add_inode_blocks(&ip->i_inode, -blen);
}
out_unlock:
if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
outside the rgrp we just processed,
do it all over again. */
if (current->journal_info) {
struct buffer_head *dibh = mp->mp_bh[0];
/* Every transaction boundary, we rewrite the dinode
to keep its di_blocks current in case of failure. */
ip->i_inode.i_mtime = ip->i_inode.i_ctime =
CURRENT_TIME;
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
}
gfs2_glock_dq_uninit(rd_gh);
cond_resched();
goto more_rgrps;
}
out:
return ret;
}
/**
* find_nonnull_ptr - find a non-null pointer given a metapath and height
* assumes the metapath is valid (with buffers) out to height h
* @mp: starting metapath
* @h: desired height to search
*
* Returns: true if a non-null pointer was found in the metapath buffer
* false if all remaining pointers are NULL in the buffer
*/
static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
unsigned int h)
{
__be64 *ptr;
unsigned int ptrs = hptrs(sdp, h) - 1;
while (true) {
ptr = metapointer(h, mp);
if (*ptr) /* if we have a non-null pointer */
return true;
if (mp->mp_list[h] < ptrs)
mp->mp_list[h]++;
else
return false; /* no more pointers in this buffer */
}
}
enum dealloc_states {
DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
DEALLOC_DONE = 3, /* process complete */
};
/**
* trunc_dealloc - truncate a file down to a desired size
* @ip: inode to truncate
* @newsize: The desired size of the file
*
* This function truncates a file to newsize. It works from the
* bottom up, and from the right to the left. In other words, it strips off
* the highest layer (data) before stripping any of the metadata. Doing it
* this way is best in case the operation is interrupted by power failure, etc.
* The dinode is rewritten in every transaction to guarantee integrity.
*/
static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct metapath mp;
struct buffer_head *dibh, *bh;
struct gfs2_holder rd_gh;
u64 lblock;
__u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */
unsigned int strip_h = ip->i_height - 1;
u32 btotal = 0;
int ret, state;
int mp_h; /* metapath buffers are read in to this height */
sector_t last_ra = 0;
u64 prev_bnr = 0;
bool preserve1; /* need to preserve the first meta pointer? */
if (!newsize)
lblock = 0;
else
lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift;
memset(&mp, 0, sizeof(mp));
find_metapath(sdp, lblock, &mp, ip->i_height);
error = gfs2_rindex_update(sdp);
if (error)
return error;
error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
if (error)
return error;
memcpy(&nbof, &mp.mp_list, sizeof(nbof));
while (height--) {
struct strip_mine sm;
sm.sm_first = !!size;
sm.sm_height = height;
ret = gfs2_meta_inode_buffer(ip, &dibh);
if (ret)
return ret;
error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm);
if (error)
mp.mp_bh[0] = dibh;
ret = lookup_metapath(ip, &mp);
if (ret == ip->i_height)
state = DEALLOC_MP_FULL; /* We have a complete metapath */
else
state = DEALLOC_FILL_MP; /* deal with partial metapath */
ret = gfs2_rindex_update(sdp);
if (ret)
goto out_metapath;
ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
if (ret)
goto out_metapath;
gfs2_holder_mark_uninitialized(&rd_gh);
mp_h = strip_h;
while (state != DEALLOC_DONE) {
switch (state) {
/* Truncate a full metapath at the given strip height.
* Note that strip_h == mp_h in order to be in this state. */
case DEALLOC_MP_FULL:
if (mp_h > 0) { /* issue read-ahead on metadata */
__be64 *top;
bh = mp.mp_bh[mp_h - 1];
if (bh->b_blocknr != last_ra) {
last_ra = bh->b_blocknr;
top = metaptr1(mp_h - 1, &mp);
gfs2_metapath_ra(ip->i_gl, bh, top);
}
}
/* If we're truncating to a non-zero size and the mp is
at the beginning of file for the strip height, we
need to preserve the first metadata pointer. */
preserve1 = (newsize &&
(mp.mp_list[mp_h] == nbof[mp_h]));
bh = mp.mp_bh[mp_h];
gfs2_assert_withdraw(sdp, bh);
if (gfs2_assert_withdraw(sdp,
prev_bnr != bh->b_blocknr)) {
printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
"block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
sdp->sd_fsname,
(unsigned long long)ip->i_no_addr,
prev_bnr, ip->i_height, strip_h, mp_h);
}
prev_bnr = bh->b_blocknr;
ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal,
mp_h, preserve1);
/* If we hit an error or just swept dinode buffer,
just exit. */
if (ret || !mp_h) {
state = DEALLOC_DONE;
break;
}
state = DEALLOC_MP_LOWER;
break;
/* lower the metapath strip height */
case DEALLOC_MP_LOWER:
/* We're done with the current buffer, so release it,
unless it's the dinode buffer. Then back up to the
previous pointer. */
if (mp_h) {
brelse(mp.mp_bh[mp_h]);
mp.mp_bh[mp_h] = NULL;
}
/* If we can't get any lower in height, we've stripped
off all we can. Next step is to back up and start
stripping the previous level of metadata. */
if (mp_h == 0) {
strip_h--;
memcpy(&mp.mp_list, &nbof, sizeof(nbof));
mp_h = strip_h;
state = DEALLOC_FILL_MP;
break;
}
mp.mp_list[mp_h] = 0;
mp_h--; /* search one metadata height down */
if (mp.mp_list[mp_h] >= hptrs(sdp, mp_h) - 1)
break; /* loop around in the same state */
mp.mp_list[mp_h]++;
/* Here we've found a part of the metapath that is not
* allocated. We need to search at that height for the
* next non-null pointer. */
if (find_nonnull_ptr(sdp, &mp, mp_h)) {
state = DEALLOC_FILL_MP;
mp_h++;
}
/* No more non-null pointers at this height. Back up
to the previous height and try again. */
break; /* loop around in the same state */
/* Fill the metapath with buffers to the given height. */
case DEALLOC_FILL_MP:
/* Fill the buffers out to the current height. */
ret = fillup_metapath(ip, &mp, mp_h);
if (ret < 0)
goto out;
/* If buffers found for the entire strip height */
if ((ret == ip->i_height) && (mp_h == strip_h)) {
state = DEALLOC_MP_FULL;
break;
}
if (ret < ip->i_height) /* We have a partial height */
mp_h = ret - 1;
/* If we find a non-null block pointer, crawl a bit
higher up in the metapath and try again, otherwise
we need to look lower for a new starting point. */
if (find_nonnull_ptr(sdp, &mp, mp_h))
mp_h++;
else
state = DEALLOC_MP_LOWER;
break;
}
}
gfs2_quota_unhold(ip);
if (btotal) {
if (current->journal_info == NULL) {
ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
RES_QUOTA, 0);
if (ret)
goto out;
down_write(&ip->i_rw_mutex);
}
gfs2_statfs_change(sdp, 0, +btotal, 0);
gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
ip->i_inode.i_gid);
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
}
return error;
out:
if (gfs2_holder_initialized(&rd_gh))
gfs2_glock_dq_uninit(&rd_gh);
if (current->journal_info) {
up_write(&ip->i_rw_mutex);
gfs2_trans_end(sdp);
cond_resched();
}
gfs2_quota_unhold(ip);
out_metapath:
release_metapath(&mp);
return ret;
}
static int trunc_end(struct gfs2_inode *ip)

View File

@ -911,11 +911,15 @@ out_qunlock:
static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int ret;
if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip))
if (mode & ~FALLOC_FL_KEEP_SIZE)
return -EOPNOTSUPP;
/* fallocate is needed by gfs2_grow to reserve space in the rindex */
if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex)
return -EOPNOTSUPP;
inode_lock(inode);

View File

@ -73,7 +73,7 @@ static DEFINE_SPINLOCK(lru_lock);
static struct rhashtable_params ht_parms = {
.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
.key_len = sizeof(struct lm_lockname),
.key_len = offsetofend(struct lm_lockname, ln_type),
.key_offset = offsetof(struct gfs2_glock, gl_name),
.head_offset = offsetof(struct gfs2_glock, gl_node),
};
@ -449,6 +449,9 @@ __acquires(&gl->gl_lockref.lock)
unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
int ret;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
target != LM_ST_UNLOCKED)
return;
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
LM_FLAG_PRIORITY);
GLOCK_BUG_ON(gl, gl->gl_state == target);
@ -484,7 +487,8 @@ __acquires(&gl->gl_lockref.lock)
}
else if (ret) {
pr_err("lm_lock ret %d\n", ret);
GLOCK_BUG_ON(gl, 1);
GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN,
&sdp->sd_flags));
}
} else { /* lock_nolock */
finish_xmote(gl, target);
@ -653,10 +657,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct lm_lockname name = { .ln_number = number,
.ln_type = glops->go_type,
.ln_sbd = sdp };
struct gfs2_glock *gl, *tmp = NULL;
struct gfs2_glock *gl, *tmp;
struct address_space *mapping;
struct kmem_cache *cachep;
int ret, tries = 0;
int ret = 0;
rcu_read_lock();
gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
@ -721,35 +725,32 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
}
again:
ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node,
ht_parms);
if (ret == 0) {
rcu_read_lock();
tmp = rhashtable_lookup_get_insert_fast(&gl_hash_table, &gl->gl_node,
ht_parms);
if (!tmp) {
*glp = gl;
return 0;
goto out;
}
if (IS_ERR(tmp)) {
ret = PTR_ERR(tmp);
goto out_free;
}
if (lockref_get_not_dead(&tmp->gl_lockref)) {
*glp = tmp;
goto out_free;
}
rcu_read_unlock();
cond_resched();
goto again;
if (ret == -EEXIST) {
ret = 0;
rcu_read_lock();
tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
if (++tries < 100) {
rcu_read_unlock();
cond_resched();
goto again;
}
tmp = NULL;
ret = -ENOMEM;
}
rcu_read_unlock();
} else {
WARN_ON_ONCE(ret);
}
out_free:
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
atomic_dec(&sdp->sd_glock_disposal);
*glp = tmp;
out:
rcu_read_unlock();
return ret;
}
@ -1918,10 +1919,10 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
static int gfs2_glocks_open(struct inode *inode, struct file *file)
static int __gfs2_glocks_open(struct inode *inode, struct file *file,
const struct seq_operations *ops)
{
int ret = seq_open_private(file, &gfs2_glock_seq_ops,
sizeof(struct gfs2_glock_iter));
int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter));
if (ret == 0) {
struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private;
@ -1932,11 +1933,16 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE;
gi->gl = NULL;
ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL);
rhashtable_walk_enter(&gl_hash_table, &gi->hti);
}
return ret;
}
static int gfs2_glocks_open(struct inode *inode, struct file *file)
{
return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops);
}
static int gfs2_glocks_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
@ -1949,20 +1955,7 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
static int gfs2_glstats_open(struct inode *inode, struct file *file)
{
int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
sizeof(struct gfs2_glock_iter));
if (ret == 0) {
struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private;
gi->sdp = inode->i_private;
gi->last_pos = 0;
seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE;
gi->gl = NULL;
ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL);
}
return ret;
return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
}
static int gfs2_sbstats_open(struct inode *inode, struct file *file)

View File

@ -203,11 +203,15 @@ enum {
DFL_DLM_RECOVERY = 6,
};
/*
* We are using struct lm_lockname as an rhashtable key. Avoid holes within
* the struct; padding at the end is fine.
*/
struct lm_lockname {
struct gfs2_sbd *ln_sbd;
u64 ln_number;
struct gfs2_sbd *ln_sbd;
unsigned int ln_type;
} __packed __aligned(sizeof(int));
};
#define lm_name_equal(name1, name2) \
(((name1)->ln_number == (name2)->ln_number) && \

View File

@ -202,8 +202,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
fail_refresh:
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
ip->i_iopen_gh.gh_gl->gl_object = NULL;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_put:
if (io_gl)
gfs2_glock_put(io_gl);
@ -667,6 +666,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
ip->i_height = 0;
ip->i_depth = 0;
ip->i_entries = 0;
ip->i_no_addr = 0; /* Temporarily zero until real addr is assigned */
switch(mode & S_IFMT) {
case S_IFREG:

View File

@ -483,13 +483,6 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
}
}
static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
{
u64 first = rgd->rd_data0;
u64 last = first + rgd->rd_data;
return first <= block && block < last;
}
/**
* gfs2_blk2rgrpd - Find resource group for a given data/meta block number
* @sdp: The GFS2 superblock

View File

@ -83,5 +83,12 @@ static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
return rs && !RB_EMPTY_NODE(&rs->rs_node);
}
static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
{
u64 first = rgd->rd_data0;
u64 last = first + rgd->rd_data;
return first <= block && block < last;
}
extern void check_and_update_goal(struct gfs2_inode *ip);
#endif /* __RGRP_DOT_H__ */

View File

@ -793,7 +793,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC)))
return;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
if (ret) {
@ -1538,8 +1539,7 @@ static void gfs2_evict_inode(struct inode *inode)
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (unlikely(error)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out;
}
@ -1617,7 +1617,7 @@ out_unlock:
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_glock_dq(&ip->i_iopen_gh);
}
gfs2_holder_uninit(&ip->i_iopen_gh);
}
@ -1639,8 +1639,7 @@ out:
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
ip->i_iopen_gh.gh_gl->gl_object = NULL;
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
}
}