xfs: remove "no-allocation" reservations for file creations
If we create a new file we will need an inode, and usually some metadata in the parent direction. Aiming for everything to go well despite the lack of a reservation leads to dirty transactions cancelled under a heavy create/delete load. This patch removes those nospace transactions, which will lead to slightly earlier ENOSPC on some workloads, but instead prevent file system shutdowns due to cancelling dirty transactions for others. A customer could observe assertations failures and shutdowns due to cancelation of dirty transactions during heavy NFS workloads as shown below: 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728125] XFS: Assertion failed: error != -ENOSPC, file: fs/xfs/xfs_inode.c, line: 1262 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728222] Call Trace: 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728246] [<ffffffff81795daf>] dump_stack+0x63/0x81 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728262] [<ffffffff810a1a5a>] warn_slowpath_common+0x8a/0xc0 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728264] [<ffffffff810a1b8a>] warn_slowpath_null+0x1a/0x20 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728285] [<ffffffffa01bf403>] asswarn+0x33/0x40 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728308] [<ffffffffa01bb07e>] xfs_create+0x7be/0x7d0 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728329] [<ffffffffa01b6ffb>] xfs_generic_create+0x1fb/0x2e0 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728348] [<ffffffffa01b7114>] xfs_vn_mknod+0x14/0x20 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728366] [<ffffffffa01b7153>] xfs_vn_create+0x13/0x20 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728380] [<ffffffff81231de5>] vfs_create+0xd5/0x140 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728390] [<ffffffffa045ddb9>] do_nfsd_create+0x499/0x610 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728396] [<ffffffffa0465fa5>] nfsd3_proc_create+0x135/0x210 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728401] [<ffffffffa04561e3>] nfsd_dispatch+0xc3/0x210 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728416] [<ffffffffa03bfa43>] svc_process_common+0x453/0x6f0 [sunrpc] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728423] [<ffffffffa03bfdf3>] svc_process+0x113/0x1f0 [sunrpc] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728427] [<ffffffffa0455bcf>] nfsd+0x10f/0x180 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728432] [<ffffffffa0455ac0>] ? nfsd_destroy+0x80/0x80 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728438] [<ffffffff810c0d58>] kthread+0xd8/0xf0 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728441] [<ffffffff810c0c80>] ? kthread_create_on_node+0x1b0/0x1b0 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728451] [<ffffffff8179d962>] ret_from_fork+0x42/0x70 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728453] [<ffffffff810c0c80>] ? kthread_create_on_node+0x1b0/0x1b0 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728454] ---[ end trace f9822c842fec81d4 ]--- 2017-05-30 21:17:06 kernel: ALERT: [ 2670.728477] XFS (sdb): Internal error xfs_trans_cancel at line 983 of file fs/xfs/xfs_trans.c. Caller xfs_create+0x4ee/0x7d0 [xfs] 2017-05-30 21:17:06 kernel: ALERT: [ 2670.728684] XFS (sdb): Corruption of in-memory data detected. Shutting down filesystem 2017-05-30 21:17:06 kernel: ALERT: [ 2670.728685] XFS (sdb): Please umount the filesystem and rectify the problem(s) Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
This commit is contained in:
parent
eaf0ec303b
commit
f59cf5c299
|
@ -920,8 +920,7 @@ STATIC xfs_agnumber_t
|
|||
xfs_ialloc_ag_select(
|
||||
xfs_trans_t *tp, /* transaction pointer */
|
||||
xfs_ino_t parent, /* parent directory inode number */
|
||||
umode_t mode, /* bits set to indicate file type */
|
||||
int okalloc) /* ok to allocate more space */
|
||||
umode_t mode) /* bits set to indicate file type */
|
||||
{
|
||||
xfs_agnumber_t agcount; /* number of ag's in the filesystem */
|
||||
xfs_agnumber_t agno; /* current ag number */
|
||||
|
@ -978,9 +977,6 @@ xfs_ialloc_ag_select(
|
|||
return agno;
|
||||
}
|
||||
|
||||
if (!okalloc)
|
||||
goto nextag;
|
||||
|
||||
if (!pag->pagf_init) {
|
||||
error = xfs_alloc_pagf_init(mp, tp, agno, flags);
|
||||
if (error)
|
||||
|
@ -1680,7 +1676,6 @@ xfs_dialloc(
|
|||
struct xfs_trans *tp,
|
||||
xfs_ino_t parent,
|
||||
umode_t mode,
|
||||
int okalloc,
|
||||
struct xfs_buf **IO_agbp,
|
||||
xfs_ino_t *inop)
|
||||
{
|
||||
|
@ -1692,6 +1687,7 @@ xfs_dialloc(
|
|||
int noroom = 0;
|
||||
xfs_agnumber_t start_agno;
|
||||
struct xfs_perag *pag;
|
||||
int okalloc = 1;
|
||||
|
||||
if (*IO_agbp) {
|
||||
/*
|
||||
|
@ -1707,7 +1703,7 @@ xfs_dialloc(
|
|||
* We do not have an agbp, so select an initial allocation
|
||||
* group for inode allocation.
|
||||
*/
|
||||
start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
|
||||
start_agno = xfs_ialloc_ag_select(tp, parent, mode);
|
||||
if (start_agno == NULLAGNUMBER) {
|
||||
*inop = NULLFSINO;
|
||||
return 0;
|
||||
|
|
|
@ -81,7 +81,6 @@ xfs_dialloc(
|
|||
struct xfs_trans *tp, /* transaction pointer */
|
||||
xfs_ino_t parent, /* parent inode (directory) */
|
||||
umode_t mode, /* mode bits for new inode */
|
||||
int okalloc, /* ok to allocate more space */
|
||||
struct xfs_buf **agbp, /* buf for a.g. inode header */
|
||||
xfs_ino_t *inop); /* inode number allocated */
|
||||
|
||||
|
|
|
@ -749,7 +749,6 @@ xfs_ialloc(
|
|||
xfs_nlink_t nlink,
|
||||
dev_t rdev,
|
||||
prid_t prid,
|
||||
int okalloc,
|
||||
xfs_buf_t **ialloc_context,
|
||||
xfs_inode_t **ipp)
|
||||
{
|
||||
|
@ -765,7 +764,7 @@ xfs_ialloc(
|
|||
* Call the space management code to pick
|
||||
* the on-disk inode to be allocated.
|
||||
*/
|
||||
error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
|
||||
error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
|
||||
ialloc_context, &ino);
|
||||
if (error)
|
||||
return error;
|
||||
|
@ -957,7 +956,6 @@ xfs_dir_ialloc(
|
|||
xfs_nlink_t nlink,
|
||||
dev_t rdev,
|
||||
prid_t prid, /* project id */
|
||||
int okalloc, /* ok to allocate new space */
|
||||
xfs_inode_t **ipp, /* pointer to inode; it will be
|
||||
locked. */
|
||||
int *committed)
|
||||
|
@ -988,8 +986,8 @@ xfs_dir_ialloc(
|
|||
* transaction commit so that no other process can steal
|
||||
* the inode(s) that we've just allocated.
|
||||
*/
|
||||
code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
|
||||
&ialloc_context, &ip);
|
||||
code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
|
||||
&ip);
|
||||
|
||||
/*
|
||||
* Return an error if we were unable to allocate a new inode.
|
||||
|
@ -1061,7 +1059,7 @@ xfs_dir_ialloc(
|
|||
* this call should always succeed.
|
||||
*/
|
||||
code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
|
||||
okalloc, &ialloc_context, &ip);
|
||||
&ialloc_context, &ip);
|
||||
|
||||
/*
|
||||
* If we get an error at this point, return to the caller
|
||||
|
@ -1182,11 +1180,6 @@ xfs_create(
|
|||
xfs_flush_inodes(mp);
|
||||
error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
|
||||
}
|
||||
if (error == -ENOSPC) {
|
||||
/* No space at all so try a "no-allocation" reservation */
|
||||
resblks = 0;
|
||||
error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
|
||||
}
|
||||
if (error)
|
||||
goto out_release_inode;
|
||||
|
||||
|
@ -1203,19 +1196,13 @@ xfs_create(
|
|||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
if (!resblks) {
|
||||
error = xfs_dir_canenter(tp, dp, name);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
|
||||
/*
|
||||
* A newly created regular or special file just has one directory
|
||||
* entry pointing to them, but a directory also the "." entry
|
||||
* pointing to itself.
|
||||
*/
|
||||
error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
|
||||
prid, resblks > 0, &ip, NULL);
|
||||
error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
|
||||
NULL);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
|
@ -1340,11 +1327,6 @@ xfs_create_tmpfile(
|
|||
tres = &M_RES(mp)->tr_create_tmpfile;
|
||||
|
||||
error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
|
||||
if (error == -ENOSPC) {
|
||||
/* No space at all so try a "no-allocation" reservation */
|
||||
resblks = 0;
|
||||
error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
|
||||
}
|
||||
if (error)
|
||||
goto out_release_inode;
|
||||
|
||||
|
@ -1353,8 +1335,7 @@ xfs_create_tmpfile(
|
|||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
|
||||
prid, resblks > 0, &ip, NULL);
|
||||
error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
|
|
|
@ -428,7 +428,7 @@ xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
|
|||
xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
|
||||
|
||||
int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
|
||||
xfs_nlink_t, dev_t, prid_t, int,
|
||||
xfs_nlink_t, dev_t, prid_t,
|
||||
struct xfs_inode **, int *);
|
||||
|
||||
/* from xfs_file.c */
|
||||
|
|
|
@ -793,8 +793,8 @@ xfs_qm_qino_alloc(
|
|||
return error;
|
||||
|
||||
if (need_alloc) {
|
||||
error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
|
||||
&committed);
|
||||
error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
|
||||
&committed);
|
||||
if (error) {
|
||||
xfs_trans_cancel(tp);
|
||||
return error;
|
||||
|
|
|
@ -232,11 +232,6 @@ xfs_symlink(
|
|||
resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
|
||||
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
|
||||
if (error == -ENOSPC && fs_blocks == 0) {
|
||||
resblks = 0;
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
|
||||
&tp);
|
||||
}
|
||||
if (error)
|
||||
goto out_release_inode;
|
||||
|
||||
|
@ -259,14 +254,6 @@ xfs_symlink(
|
|||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
/*
|
||||
* Check for ability to enter directory entry, if no space reserved.
|
||||
*/
|
||||
if (!resblks) {
|
||||
error = xfs_dir_canenter(tp, dp, link_name);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
/*
|
||||
* Initialize the bmap freelist prior to calling either
|
||||
* bmapi or the directory create code.
|
||||
|
@ -277,7 +264,7 @@ xfs_symlink(
|
|||
* Allocate an inode for the symlink.
|
||||
*/
|
||||
error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
|
||||
prid, resblks > 0, &ip, NULL);
|
||||
prid, &ip, NULL);
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
|
||||
|
|
Loading…
Reference in New Issue