ufs: don't use lock_ufs() for block pointers tree protection
* stores to block pointers are under per-inode seqlock (meta_lock) and mutex (truncate_mutex) * fetches of block pointers are either under truncate_mutex, or wrapped into seqretry loop on meta_lock * all changes of ->i_size are under truncate_mutex and i_mutex * all changes of ->i_lastfrag are under truncate_mutex It's similar to what ext2 is doing; the main difference is that unlike ext2 we can't rely upon the atomicity of stores into block pointers - on UFS2 they are 64bit. So we can't cut the corner when switching a pointer from NULL to non-NULL as we could in ext2_splice_branch() and need to use meta_lock on all modifications. We use seqlock where ext2 uses rwlock; ext2 could probably also benefit from such change... Another non-trivial difference is that with UFS we *cannot* have reader grab truncate_mutex in case of race - it has to keep retrying. That might be possible to change, but not until we lift tail unpacking several levels up in call chain. After that commit we do *NOT* hold fs-wide serialization on accesses to block pointers anymore. Moreover, lock_ufs() can become a normal mutex now - it's only used on statfs, remount and sync_fs and none of those uses are recursive. As the matter of fact, *now* it can be collapsed with ->s_lock, and be eventually replaced with saner per-cylinder-group spinlocks, but that's a separate story. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
4af7b2c080
commit
724bb09fdc
|
@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
|
|||
if (oldcount == 0) {
|
||||
result = ufs_alloc_fragments (inode, cgno, goal, count, err);
|
||||
if (result) {
|
||||
write_seqlock(&UFS_I(inode)->meta_lock);
|
||||
ufs_cpu_to_data_ptr(sb, p, result);
|
||||
write_sequnlock(&UFS_I(inode)->meta_lock);
|
||||
*err = 0;
|
||||
UFS_I(inode)->i_lastfrag =
|
||||
max(UFS_I(inode)->i_lastfrag, fragment + count);
|
||||
|
@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
|
|||
ufs_change_blocknr(inode, fragment - oldcount, oldcount,
|
||||
uspi->s_sbbase + tmp,
|
||||
uspi->s_sbbase + result, locked_page);
|
||||
write_seqlock(&UFS_I(inode)->meta_lock);
|
||||
ufs_cpu_to_data_ptr(sb, p, result);
|
||||
write_sequnlock(&UFS_I(inode)->meta_lock);
|
||||
*err = 0;
|
||||
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
|
||||
fragment + count);
|
||||
|
|
138
fs/ufs/inode.c
138
fs/ufs/inode.c
|
@ -41,8 +41,6 @@
|
|||
#include "swab.h"
|
||||
#include "util.h"
|
||||
|
||||
static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock);
|
||||
|
||||
static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
|
||||
{
|
||||
struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
|
||||
|
@ -75,12 +73,53 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
|
|||
return n;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
void *p;
|
||||
union {
|
||||
__fs32 key32;
|
||||
__fs64 key64;
|
||||
};
|
||||
struct buffer_head *bh;
|
||||
} Indirect;
|
||||
|
||||
static inline int grow_chain32(struct ufs_inode_info *ufsi,
|
||||
struct buffer_head *bh, __fs32 *v,
|
||||
Indirect *from, Indirect *to)
|
||||
{
|
||||
Indirect *p;
|
||||
unsigned seq;
|
||||
to->bh = bh;
|
||||
do {
|
||||
seq = read_seqbegin(&ufsi->meta_lock);
|
||||
to->key32 = *(__fs32 *)(to->p = v);
|
||||
for (p = from; p <= to && p->key32 == *(__fs32 *)p->p; p++)
|
||||
;
|
||||
} while (read_seqretry(&ufsi->meta_lock, seq));
|
||||
return (p > to);
|
||||
}
|
||||
|
||||
static inline int grow_chain64(struct ufs_inode_info *ufsi,
|
||||
struct buffer_head *bh, __fs64 *v,
|
||||
Indirect *from, Indirect *to)
|
||||
{
|
||||
Indirect *p;
|
||||
unsigned seq;
|
||||
to->bh = bh;
|
||||
do {
|
||||
seq = read_seqbegin(&ufsi->meta_lock);
|
||||
to->key64 = *(__fs64 *)(to->p = v);
|
||||
for (p = from; p <= to && p->key64 == *(__fs64 *)p->p; p++)
|
||||
;
|
||||
} while (read_seqretry(&ufsi->meta_lock, seq));
|
||||
return (p > to);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the location of the fragment from
|
||||
* the beginning of the filesystem.
|
||||
*/
|
||||
|
||||
static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
|
||||
static u64 ufs_frag_map(struct inode *inode, sector_t frag)
|
||||
{
|
||||
struct ufs_inode_info *ufsi = UFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
@ -88,12 +127,10 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
|
|||
u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift;
|
||||
int shift = uspi->s_apbshift-uspi->s_fpbshift;
|
||||
sector_t offsets[4], *p;
|
||||
Indirect chain[4], *q = chain;
|
||||
int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets);
|
||||
u64 ret = 0L;
|
||||
__fs32 block;
|
||||
__fs64 u2_block = 0L;
|
||||
unsigned flags = UFS_SB(sb)->s_flags;
|
||||
u64 temp = 0L;
|
||||
u64 res = 0;
|
||||
|
||||
UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth);
|
||||
UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n",
|
||||
|
@ -101,59 +138,73 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
|
|||
(unsigned long long)mask);
|
||||
|
||||
if (depth == 0)
|
||||
return 0;
|
||||
goto no_block;
|
||||
|
||||
again:
|
||||
p = offsets;
|
||||
|
||||
if (needs_lock)
|
||||
lock_ufs(sb);
|
||||
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
|
||||
goto ufs2;
|
||||
|
||||
block = ufsi->i_u1.i_data[*p++];
|
||||
if (!block)
|
||||
goto out;
|
||||
if (!grow_chain32(ufsi, NULL, &ufsi->i_u1.i_data[*p++], chain, q))
|
||||
goto changed;
|
||||
if (!q->key32)
|
||||
goto no_block;
|
||||
while (--depth) {
|
||||
__fs32 *ptr;
|
||||
struct buffer_head *bh;
|
||||
sector_t n = *p++;
|
||||
|
||||
bh = sb_bread(sb, uspi->s_sbbase + fs32_to_cpu(sb, block)+(n>>shift));
|
||||
bh = sb_bread(sb, uspi->s_sbbase +
|
||||
fs32_to_cpu(sb, q->key32) + (n>>shift));
|
||||
if (!bh)
|
||||
goto out;
|
||||
block = ((__fs32 *) bh->b_data)[n & mask];
|
||||
brelse (bh);
|
||||
if (!block)
|
||||
goto out;
|
||||
goto no_block;
|
||||
ptr = (__fs32 *)bh->b_data + (n & mask);
|
||||
if (!grow_chain32(ufsi, bh, ptr, chain, ++q))
|
||||
goto changed;
|
||||
if (!q->key32)
|
||||
goto no_block;
|
||||
}
|
||||
ret = (u64) (uspi->s_sbbase + fs32_to_cpu(sb, block) + (frag & uspi->s_fpbmask));
|
||||
goto out;
|
||||
res = fs32_to_cpu(sb, q->key32);
|
||||
goto found;
|
||||
|
||||
ufs2:
|
||||
u2_block = ufsi->i_u1.u2_i_data[*p++];
|
||||
if (!u2_block)
|
||||
goto out;
|
||||
|
||||
if (!grow_chain64(ufsi, NULL, &ufsi->i_u1.u2_i_data[*p++], chain, q))
|
||||
goto changed;
|
||||
if (!q->key64)
|
||||
goto no_block;
|
||||
|
||||
while (--depth) {
|
||||
__fs64 *ptr;
|
||||
struct buffer_head *bh;
|
||||
sector_t n = *p++;
|
||||
|
||||
|
||||
temp = (u64)(uspi->s_sbbase) + fs64_to_cpu(sb, u2_block);
|
||||
bh = sb_bread(sb, temp +(u64) (n>>shift));
|
||||
bh = sb_bread(sb, uspi->s_sbbase +
|
||||
fs64_to_cpu(sb, q->key64) + (n>>shift));
|
||||
if (!bh)
|
||||
goto out;
|
||||
u2_block = ((__fs64 *)bh->b_data)[n & mask];
|
||||
brelse(bh);
|
||||
if (!u2_block)
|
||||
goto out;
|
||||
goto no_block;
|
||||
ptr = (__fs64 *)bh->b_data + (n & mask);
|
||||
if (!grow_chain64(ufsi, bh, ptr, chain, ++q))
|
||||
goto changed;
|
||||
if (!q->key64)
|
||||
goto no_block;
|
||||
}
|
||||
temp = (u64)uspi->s_sbbase + fs64_to_cpu(sb, u2_block);
|
||||
ret = temp + (u64) (frag & uspi->s_fpbmask);
|
||||
res = fs64_to_cpu(sb, q->key64);
|
||||
found:
|
||||
res += uspi->s_sbbase + (frag & uspi->s_fpbmask);
|
||||
no_block:
|
||||
while (q > chain) {
|
||||
brelse(q->bh);
|
||||
q--;
|
||||
}
|
||||
return res;
|
||||
|
||||
out:
|
||||
if (needs_lock)
|
||||
unlock_ufs(sb);
|
||||
return ret;
|
||||
changed:
|
||||
while (q > chain) {
|
||||
brelse(q->bh);
|
||||
q--;
|
||||
}
|
||||
goto again;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -421,10 +472,9 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
|
|||
int ret, err, new;
|
||||
unsigned long ptr,phys;
|
||||
u64 phys64 = 0;
|
||||
bool needs_lock = (sbi->mutex_owner != current);
|
||||
|
||||
if (!create) {
|
||||
phys64 = ufs_frag_map(inode, fragment, needs_lock);
|
||||
phys64 = ufs_frag_map(inode, fragment);
|
||||
UFSD("phys64 = %llu\n", (unsigned long long)phys64);
|
||||
if (phys64)
|
||||
map_bh(bh_result, sb, phys64);
|
||||
|
@ -438,8 +488,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
|
|||
ret = 0;
|
||||
bh = NULL;
|
||||
|
||||
if (needs_lock)
|
||||
lock_ufs(sb);
|
||||
mutex_lock(&UFS_I(inode)->truncate_mutex);
|
||||
|
||||
UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
|
||||
if (fragment >
|
||||
|
@ -501,8 +550,7 @@ out:
|
|||
set_buffer_new(bh_result);
|
||||
map_bh(bh_result, sb, phys);
|
||||
abort:
|
||||
if (needs_lock)
|
||||
unlock_ufs(sb);
|
||||
mutex_unlock(&UFS_I(inode)->truncate_mutex);
|
||||
|
||||
return err;
|
||||
|
||||
|
|
|
@ -1429,6 +1429,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
|
|||
return NULL;
|
||||
|
||||
ei->vfs_inode.i_version = 1;
|
||||
seqlock_init(&ei->meta_lock);
|
||||
mutex_init(&ei->truncate_mutex);
|
||||
return &ei->vfs_inode;
|
||||
}
|
||||
|
||||
|
|
|
@ -128,7 +128,9 @@ next1:
|
|||
tmp = ufs_data_ptr_to_cpu(sb, p);
|
||||
if (!tmp)
|
||||
continue;
|
||||
write_seqlock(&ufsi->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, p);
|
||||
write_sequnlock(&ufsi->meta_lock);
|
||||
|
||||
if (free_count == 0) {
|
||||
frag_to_free = tmp;
|
||||
|
@ -157,7 +159,9 @@ next1:
|
|||
if (!tmp )
|
||||
ufs_panic(sb, "ufs_truncate_direct", "internal error");
|
||||
frag4 = ufs_fragnum (frag4);
|
||||
write_seqlock(&ufsi->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, p);
|
||||
write_sequnlock(&ufsi->meta_lock);
|
||||
|
||||
ufs_free_fragments (inode, tmp, frag4);
|
||||
mark_inode_dirty(inode);
|
||||
|
@ -199,7 +203,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
|
|||
return 1;
|
||||
}
|
||||
if (!ind_ubh) {
|
||||
write_seqlock(&UFS_I(inode)->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, p);
|
||||
write_sequnlock(&UFS_I(inode)->meta_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -210,7 +216,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
|
|||
if (!tmp)
|
||||
continue;
|
||||
|
||||
write_seqlock(&UFS_I(inode)->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, ind);
|
||||
write_sequnlock(&UFS_I(inode)->meta_lock);
|
||||
ubh_mark_buffer_dirty(ind_ubh);
|
||||
if (free_count == 0) {
|
||||
frag_to_free = tmp;
|
||||
|
@ -235,7 +243,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
|
|||
break;
|
||||
if (i >= uspi->s_apb) {
|
||||
tmp = ufs_data_ptr_to_cpu(sb, p);
|
||||
write_seqlock(&UFS_I(inode)->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, p);
|
||||
write_sequnlock(&UFS_I(inode)->meta_lock);
|
||||
|
||||
ubh_bforget(ind_ubh);
|
||||
ufs_free_blocks (inode, tmp, uspi->s_fpb);
|
||||
|
@ -278,7 +288,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
|
|||
return 1;
|
||||
}
|
||||
if (!dind_bh) {
|
||||
write_seqlock(&UFS_I(inode)->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, p);
|
||||
write_sequnlock(&UFS_I(inode)->meta_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -297,7 +309,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
|
|||
break;
|
||||
if (i >= uspi->s_apb) {
|
||||
tmp = ufs_data_ptr_to_cpu(sb, p);
|
||||
write_seqlock(&UFS_I(inode)->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, p);
|
||||
write_sequnlock(&UFS_I(inode)->meta_lock);
|
||||
|
||||
ubh_bforget(dind_bh);
|
||||
ufs_free_blocks(inode, tmp, uspi->s_fpb);
|
||||
|
@ -339,7 +353,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
|
|||
return 1;
|
||||
}
|
||||
if (!tind_bh) {
|
||||
write_seqlock(&ufsi->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, p);
|
||||
write_sequnlock(&ufsi->meta_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -355,7 +371,9 @@ static int ufs_trunc_tindirect(struct inode *inode)
|
|||
break;
|
||||
if (i >= uspi->s_apb) {
|
||||
tmp = ufs_data_ptr_to_cpu(sb, p);
|
||||
write_seqlock(&ufsi->meta_lock);
|
||||
ufs_data_ptr_clear(uspi, p);
|
||||
write_sequnlock(&ufsi->meta_lock);
|
||||
|
||||
ubh_bforget(tind_bh);
|
||||
ufs_free_blocks(inode, tmp, uspi->s_fpb);
|
||||
|
@ -447,7 +465,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
|
|||
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
|
||||
int retry;
|
||||
|
||||
lock_ufs(sb);
|
||||
mutex_lock(&ufsi->truncate_mutex);
|
||||
while (1) {
|
||||
retry = ufs_trunc_direct(inode);
|
||||
retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
|
||||
|
@ -465,7 +483,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
|
|||
}
|
||||
|
||||
ufsi->i_lastfrag = DIRECT_FRAGMENT;
|
||||
unlock_ufs(sb);
|
||||
mutex_unlock(&ufsi->truncate_mutex);
|
||||
}
|
||||
|
||||
int ufs_truncate(struct inode *inode, loff_t size)
|
||||
|
|
|
@ -46,6 +46,8 @@ struct ufs_inode_info {
|
|||
__u32 i_oeftflag;
|
||||
__u16 i_osync;
|
||||
__u64 i_lastfrag;
|
||||
seqlock_t meta_lock;
|
||||
struct mutex truncate_mutex;
|
||||
__u32 i_dir_start_lookup;
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue