xfs: update for 4.1-rc6
Changes in this update: o regression fix for new rename whiteout code o regression fixes for new superblock generic per-cpu counter code o fix for incorrect error return sign introduced in 3.17 o metadata corruption fixes that need to go back to -stable kernels -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) iQIcBAABAgAGBQJVaO0JAAoJEK3oKUf0dfodd4UQANRdfXnUrpyQGhVS7HFoFoVt FIQ52pPGbMu72+DqHc+Q41uvgAPe65LFB2VUL6CUGCMExstF72F5+QonzppMgkMo unPER3eB8ya03SY+Kp+803ZGgzI2Nl2M6w8Kof730/RUk56PTGYIx4eLXd6iZSli RsYjw8JDbeue5OQo5FPmLCSQ/Kr5ZJXbgWVPyWkKg9aCcXLN5YSJIV3xcMTK9Q2I LqqODkyatnGc6YxGAKddS7Xzt1ntlZgbe5mndQw04a2g0Lf6emPH5r8b0UJXIu96 advOBX0pEbad4FeFS6Mo5D+nNCaaNP4WzN7wgdb+BYNVw3ss4Ebam7+yY6Gexg6y bzZOEkk9saL4YeBDgyYICNu7kG4BRVKRQiiX220G6SFXM3nqbl7qBPb3kVFyDpcI RRuFJ0ZV0kFJ+3IQ4xVnIh6nootceRk/mvZaK5HhLhQLzklpZ8fj4HF3oBDUAnvN wNd+7GoZy7zldjCkbF4BP3GjUeW+b9ngrCNc+bFXi5cUbdECXAa2krjxyY+MlQF2 veNVVcsoRdfeM0VjJh2/piGJxMWIlRqXdKzPKsfMWnlIaJ6YyslfbSq+2K7LxgGR Ho3Sjt0oUuPMZ9F/Mjj+XDqwmzgooUHXNyDBxhGXBNBPjApcRLcb2vQ2SrWEmeGJ vZmC2R1ZoGdBJg8a55BT =w5SP -----END PGP SIGNATURE----- Merge tag 'xfs-for-linus-4.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs Pull xfs fixes from Dave Chinner: "This is a little larger than I'd like late in the release cycle, but all the fixes are for regressions introduced in the 4.1-rc1 merge, or are needed back in -stable kernels fairly quickly as they are filesystem corruption or userspace visible correctness issues. Changes in this update: - regression fix for new rename whiteout code - regression fixes for new superblock generic per-cpu counter code - fix for incorrect error return sign introduced in 3.17 - metadata corruption fixes that need to go back to -stable kernels" * tag 'xfs-for-linus-4.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: fix broken i_nlink accounting for whiteout tmpfile inode xfs: xfs_iozero can return positive errno xfs: xfs_attr_inactive leaves inconsistent attr fork state behind xfs: extent size hints can round up extents past MAXEXTLEN xfs: inode and free block counters need to use __percpu_counter_compare percpu_counter: batch size aware __percpu_counter_compare() xfs: use percpu_counter_read_positive for mp->m_icount
This commit is contained in:
commit
1be44e234b
|
@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
|
|||
* After the last attribute is removed revert to original inode format,
|
||||
* making all literal area available to the data fork once more.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_attr_fork_reset(
|
||||
void
|
||||
xfs_attr_fork_remove(
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_trans *tp)
|
||||
{
|
||||
|
@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
|
|||
(mp->m_flags & XFS_MOUNT_ATTR2) &&
|
||||
(dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
|
||||
!(args->op_flags & XFS_DA_OP_ADDNAME)) {
|
||||
xfs_attr_fork_reset(dp, args->trans);
|
||||
xfs_attr_fork_remove(dp, args->trans);
|
||||
} else {
|
||||
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
|
||||
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
|
||||
|
@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform(
|
|||
if (forkoff == -1) {
|
||||
ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
|
||||
ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
|
||||
xfs_attr_fork_reset(dp, args->trans);
|
||||
xfs_attr_fork_remove(dp, args->trans);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args);
|
|||
int xfs_attr_shortform_list(struct xfs_attr_list_context *context);
|
||||
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
|
||||
int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
|
||||
|
||||
void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
|
||||
|
||||
/*
|
||||
* Internal routines when attribute fork size == XFS_LBSIZE(mp).
|
||||
|
|
|
@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align(
|
|||
align_alen += temp;
|
||||
align_off -= temp;
|
||||
}
|
||||
/*
|
||||
* Same adjustment for the end of the requested area.
|
||||
*/
|
||||
if ((temp = (align_alen % extsz))) {
|
||||
|
||||
/* Same adjustment for the end of the requested area. */
|
||||
temp = (align_alen % extsz);
|
||||
if (temp)
|
||||
align_alen += extsz - temp;
|
||||
}
|
||||
|
||||
/*
|
||||
* For large extent hint sizes, the aligned extent might be larger than
|
||||
* MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
|
||||
* the length back under MAXEXTLEN. The outer allocation loops handle
|
||||
* short allocation just fine, so it is safe to do this. We only want to
|
||||
* do it when we are forced to, though, because it means more allocation
|
||||
* operations are required.
|
||||
*/
|
||||
while (align_alen > MAXEXTLEN)
|
||||
align_alen -= extsz;
|
||||
ASSERT(align_alen <= MAXEXTLEN);
|
||||
|
||||
/*
|
||||
* If the previous block overlaps with this proposed allocation
|
||||
* then move the start forward without adjusting the length.
|
||||
|
@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align(
|
|||
return -EINVAL;
|
||||
} else {
|
||||
ASSERT(orig_off >= align_off);
|
||||
ASSERT(orig_end <= align_off + align_alen);
|
||||
/* see MAXEXTLEN handling above */
|
||||
ASSERT(orig_end <= align_off + align_alen ||
|
||||
align_alen + extsz > MAXEXTLEN);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
|
@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc(
|
|||
/* Figure out the extent size, adjust alen */
|
||||
extsz = xfs_get_extsz_hint(ip);
|
||||
if (extsz) {
|
||||
/*
|
||||
* Make sure we don't exceed a single extent length when we
|
||||
* align the extent by reducing length we are going to
|
||||
* allocate by the maximum amount extent size aligment may
|
||||
* require.
|
||||
*/
|
||||
alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
|
||||
error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
|
||||
1, 0, &aoff, &alen);
|
||||
ASSERT(!error);
|
||||
|
|
|
@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc(
|
|||
*/
|
||||
newlen = args.mp->m_ialloc_inos;
|
||||
if (args.mp->m_maxicount &&
|
||||
percpu_counter_read(&args.mp->m_icount) + newlen >
|
||||
percpu_counter_read_positive(&args.mp->m_icount) + newlen >
|
||||
args.mp->m_maxicount)
|
||||
return -ENOSPC;
|
||||
args.minlen = args.maxlen = args.mp->m_ialloc_blks;
|
||||
|
@ -1339,10 +1339,13 @@ xfs_dialloc(
|
|||
* If we have already hit the ceiling of inode blocks then clear
|
||||
* okalloc so we scan all available agi structures for a free
|
||||
* inode.
|
||||
*
|
||||
* Read rough value of mp->m_icount by percpu_counter_read_positive,
|
||||
* which will sacrifice the preciseness but improve the performance.
|
||||
*/
|
||||
if (mp->m_maxicount &&
|
||||
percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
|
||||
mp->m_maxicount) {
|
||||
percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
|
||||
> mp->m_maxicount) {
|
||||
noroom = 1;
|
||||
okalloc = 0;
|
||||
}
|
||||
|
|
|
@ -380,23 +380,31 @@ xfs_attr3_root_inactive(
|
|||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_attr_inactive kills all traces of an attribute fork on an inode. It
|
||||
* removes both the on-disk and in-memory inode fork. Note that this also has to
|
||||
* handle the condition of inodes without attributes but with an attribute fork
|
||||
* configured, so we can't use xfs_inode_hasattr() here.
|
||||
*
|
||||
* The in-memory attribute fork is removed even on error.
|
||||
*/
|
||||
int
|
||||
xfs_attr_inactive(xfs_inode_t *dp)
|
||||
xfs_attr_inactive(
|
||||
struct xfs_inode *dp)
|
||||
{
|
||||
xfs_trans_t *trans;
|
||||
xfs_mount_t *mp;
|
||||
int error;
|
||||
struct xfs_trans *trans;
|
||||
struct xfs_mount *mp;
|
||||
int cancel_flags = 0;
|
||||
int lock_mode = XFS_ILOCK_SHARED;
|
||||
int error = 0;
|
||||
|
||||
mp = dp->i_mount;
|
||||
ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
|
||||
|
||||
xfs_ilock(dp, XFS_ILOCK_SHARED);
|
||||
if (!xfs_inode_hasattr(dp) ||
|
||||
dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
|
||||
xfs_iunlock(dp, XFS_ILOCK_SHARED);
|
||||
return 0;
|
||||
}
|
||||
xfs_iunlock(dp, XFS_ILOCK_SHARED);
|
||||
xfs_ilock(dp, lock_mode);
|
||||
if (!XFS_IFORK_Q(dp))
|
||||
goto out_destroy_fork;
|
||||
xfs_iunlock(dp, lock_mode);
|
||||
|
||||
/*
|
||||
* Start our first transaction of the day.
|
||||
|
@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp)
|
|||
* the inode in every transaction to let it float upward through
|
||||
* the log.
|
||||
*/
|
||||
lock_mode = 0;
|
||||
trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
|
||||
error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
|
||||
if (error) {
|
||||
xfs_trans_cancel(trans, 0);
|
||||
return error;
|
||||
}
|
||||
xfs_ilock(dp, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
goto out_cancel;
|
||||
|
||||
lock_mode = XFS_ILOCK_EXCL;
|
||||
cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
|
||||
xfs_ilock(dp, lock_mode);
|
||||
|
||||
if (!XFS_IFORK_Q(dp))
|
||||
goto out_cancel;
|
||||
|
||||
/*
|
||||
* No need to make quota reservations here. We expect to release some
|
||||
|
@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp)
|
|||
*/
|
||||
xfs_trans_ijoin(trans, dp, 0);
|
||||
|
||||
/*
|
||||
* Decide on what work routines to call based on the inode size.
|
||||
*/
|
||||
if (!xfs_inode_hasattr(dp) ||
|
||||
dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
error = xfs_attr3_root_inactive(&trans, dp);
|
||||
if (error)
|
||||
goto out;
|
||||
/* invalidate and truncate the attribute fork extents */
|
||||
if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
|
||||
error = xfs_attr3_root_inactive(&trans, dp);
|
||||
if (error)
|
||||
goto out_cancel;
|
||||
|
||||
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
|
||||
if (error)
|
||||
goto out;
|
||||
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
|
||||
if (error)
|
||||
goto out_cancel;
|
||||
}
|
||||
|
||||
/* Reset the attribute fork - this also destroys the in-core fork */
|
||||
xfs_attr_fork_remove(dp, trans);
|
||||
|
||||
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
|
||||
xfs_iunlock(dp, XFS_ILOCK_EXCL);
|
||||
|
||||
xfs_iunlock(dp, lock_mode);
|
||||
return error;
|
||||
|
||||
out:
|
||||
xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
|
||||
xfs_iunlock(dp, XFS_ILOCK_EXCL);
|
||||
out_cancel:
|
||||
xfs_trans_cancel(trans, cancel_flags);
|
||||
out_destroy_fork:
|
||||
/* kill the in-core attr fork before we drop the inode lock */
|
||||
if (dp->i_afp)
|
||||
xfs_idestroy_fork(dp, XFS_ATTR_FORK);
|
||||
if (lock_mode)
|
||||
xfs_iunlock(dp, lock_mode);
|
||||
return error;
|
||||
}
|
||||
|
|
|
@ -124,7 +124,7 @@ xfs_iozero(
|
|||
status = 0;
|
||||
} while (count);
|
||||
|
||||
return (-status);
|
||||
return status;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -1946,21 +1946,17 @@ xfs_inactive(
|
|||
/*
|
||||
* If there are attributes associated with the file then blow them away
|
||||
* now. The code calls a routine that recursively deconstructs the
|
||||
* attribute fork. We need to just commit the current transaction
|
||||
* because we can't use it for xfs_attr_inactive().
|
||||
* attribute fork. If also blows away the in-core attribute fork.
|
||||
*/
|
||||
if (ip->i_d.di_anextents > 0) {
|
||||
ASSERT(ip->i_d.di_forkoff != 0);
|
||||
|
||||
if (XFS_IFORK_Q(ip)) {
|
||||
error = xfs_attr_inactive(ip);
|
||||
if (error)
|
||||
return;
|
||||
}
|
||||
|
||||
if (ip->i_afp)
|
||||
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
|
||||
|
||||
ASSERT(!ip->i_afp);
|
||||
ASSERT(ip->i_d.di_anextents == 0);
|
||||
ASSERT(ip->i_d.di_forkoff == 0);
|
||||
|
||||
/*
|
||||
* Free the inode.
|
||||
|
@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
/* Satisfy xfs_bumplink that this is a real tmpfile */
|
||||
/*
|
||||
* Prepare the tmpfile inode as if it were created through the VFS.
|
||||
* Otherwise, the link increment paths will complain about nlink 0->1.
|
||||
* Drop the link count as done by d_tmpfile(), complete the inode setup
|
||||
* and flag it as linkable.
|
||||
*/
|
||||
drop_nlink(VFS_I(tmpfile));
|
||||
xfs_finish_inode_setup(tmpfile);
|
||||
VFS_I(tmpfile)->i_state |= I_LINKABLE;
|
||||
|
||||
|
@ -3151,7 +3153,7 @@ xfs_rename(
|
|||
* intermediate state on disk.
|
||||
*/
|
||||
if (wip) {
|
||||
ASSERT(wip->i_d.di_nlink == 0);
|
||||
ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
|
||||
error = xfs_bumplink(tp, wip);
|
||||
if (error)
|
||||
goto out_trans_abort;
|
||||
|
|
|
@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp)
|
|||
return xfs_sync_sb(mp, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deltas for the inode count are +/-64, hence we use a large batch size
|
||||
* of 128 so we don't need to take the counter lock on every update.
|
||||
*/
|
||||
#define XFS_ICOUNT_BATCH 128
|
||||
int
|
||||
xfs_mod_icount(
|
||||
struct xfs_mount *mp,
|
||||
int64_t delta)
|
||||
{
|
||||
/* deltas are +/-64, hence the large batch size of 128. */
|
||||
__percpu_counter_add(&mp->m_icount, delta, 128);
|
||||
if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
|
||||
__percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
|
||||
if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
|
||||
ASSERT(0);
|
||||
percpu_counter_add(&mp->m_icount, -delta);
|
||||
return -EINVAL;
|
||||
|
@ -1113,6 +1117,14 @@ xfs_mod_ifree(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deltas for the block count can vary from 1 to very large, but lock contention
|
||||
* only occurs on frequent small block count updates such as in the delayed
|
||||
* allocation path for buffered writes (page a time updates). Hence we set
|
||||
* a large batch count (1024) to minimise global counter updates except when
|
||||
* we get near to ENOSPC and we have to be very accurate with our updates.
|
||||
*/
|
||||
#define XFS_FDBLOCKS_BATCH 1024
|
||||
int
|
||||
xfs_mod_fdblocks(
|
||||
struct xfs_mount *mp,
|
||||
|
@ -1151,25 +1163,19 @@ xfs_mod_fdblocks(
|
|||
* Taking blocks away, need to be more accurate the closer we
|
||||
* are to zero.
|
||||
*
|
||||
* batch size is set to a maximum of 1024 blocks - if we are
|
||||
* allocating of freeing extents larger than this then we aren't
|
||||
* going to be hammering the counter lock so a lock per update
|
||||
* is not a problem.
|
||||
*
|
||||
* If the counter has a value of less than 2 * max batch size,
|
||||
* then make everything serialise as we are real close to
|
||||
* ENOSPC.
|
||||
*/
|
||||
#define __BATCH 1024
|
||||
if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
|
||||
if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
|
||||
XFS_FDBLOCKS_BATCH) < 0)
|
||||
batch = 1;
|
||||
else
|
||||
batch = __BATCH;
|
||||
#undef __BATCH
|
||||
batch = XFS_FDBLOCKS_BATCH;
|
||||
|
||||
__percpu_counter_add(&mp->m_fdblocks, delta, batch);
|
||||
if (percpu_counter_compare(&mp->m_fdblocks,
|
||||
XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
|
||||
if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
|
||||
XFS_FDBLOCKS_BATCH) >= 0) {
|
||||
/* we had space! */
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc);
|
|||
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
|
||||
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
|
||||
s64 __percpu_counter_sum(struct percpu_counter *fbc);
|
||||
int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs);
|
||||
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
|
||||
|
||||
static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
|
||||
{
|
||||
return __percpu_counter_compare(fbc, rhs, percpu_counter_batch);
|
||||
}
|
||||
|
||||
static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
|
||||
{
|
||||
|
@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
|
||||
{
|
||||
return percpu_counter_compare(fbc, rhs);
|
||||
}
|
||||
|
||||
static inline void
|
||||
percpu_counter_add(struct percpu_counter *fbc, s64 amount)
|
||||
{
|
||||
|
|
|
@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
|
|||
* Compare counter against given value.
|
||||
* Return 1 if greater, 0 if equal and -1 if less
|
||||
*/
|
||||
int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
|
||||
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
|
||||
{
|
||||
s64 count;
|
||||
|
||||
count = percpu_counter_read(fbc);
|
||||
/* Check to see if rough count will be sufficient for comparison */
|
||||
if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
|
||||
if (abs(count - rhs) > (batch * num_online_cpus())) {
|
||||
if (count > rhs)
|
||||
return 1;
|
||||
else
|
||||
|
@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
|
|||
else
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(percpu_counter_compare);
|
||||
EXPORT_SYMBOL(__percpu_counter_compare);
|
||||
|
||||
static int __init percpu_counter_startup(void)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue