2018-06-06 10:42:14 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2005-11-02 11:58:39 +08:00
|
|
|
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
|
|
|
|
* All Rights Reserved.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
#include "xfs.h"
|
2005-11-02 11:38:42 +08:00
|
|
|
#include "xfs_fs.h"
|
2013-10-23 07:36:05 +08:00
|
|
|
#include "xfs_shared.h"
|
2013-10-23 07:50:10 +08:00
|
|
|
#include "xfs_format.h"
|
|
|
|
#include "xfs_log_format.h"
|
|
|
|
#include "xfs_trans_resv.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
#include "xfs_mount.h"
|
2016-08-03 09:15:38 +08:00
|
|
|
#include "xfs_defer.h"
|
2013-10-15 06:17:51 +08:00
|
|
|
#include "xfs_da_format.h"
|
2005-11-02 11:38:42 +08:00
|
|
|
#include "xfs_da_btree.h"
|
|
|
|
#include "xfs_attr_sf.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
#include "xfs_inode.h"
|
2013-10-23 07:50:10 +08:00
|
|
|
#include "xfs_trans.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
#include "xfs_bmap.h"
|
2013-10-23 07:51:50 +08:00
|
|
|
#include "xfs_bmap_btree.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
#include "xfs_attr.h"
|
|
|
|
#include "xfs_attr_leaf.h"
|
2013-04-03 13:11:27 +08:00
|
|
|
#include "xfs_attr_remote.h"
|
2005-04-17 06:20:36 +08:00
|
|
|
#include "xfs_quota.h"
|
|
|
|
#include "xfs_trans_space.h"
|
2009-12-15 07:14:59 +08:00
|
|
|
#include "xfs_trace.h"
|
2022-05-04 10:41:02 +08:00
|
|
|
#include "xfs_attr_item.h"
|
2022-05-11 15:01:13 +08:00
|
|
|
#include "xfs_log.h"
|
2022-05-04 10:41:02 +08:00
|
|
|
|
2022-05-22 13:59:48 +08:00
|
|
|
struct kmem_cache *xfs_attr_intent_cache;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* xfs_attr.c
|
|
|
|
*
|
|
|
|
* Provide the external interfaces to manage attribute lists.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*========================================================================
|
|
|
|
* Function prototypes for the kernel.
|
|
|
|
*========================================================================*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Internal routines when attribute list fits inside the inode.
|
|
|
|
*/
|
|
|
|
STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Internal routines when attribute list is one block.
|
|
|
|
*/
|
2005-06-21 13:36:52 +08:00
|
|
|
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
|
2005-04-17 06:20:36 +08:00
|
|
|
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
|
2020-07-21 12:47:22 +08:00
|
|
|
STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
|
2021-04-20 03:55:26 +08:00
|
|
|
STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Internal routines when attribute list is more than one block.
|
|
|
|
*/
|
2005-06-21 13:36:52 +08:00
|
|
|
STATIC int xfs_attr_node_get(xfs_da_args_t *args);
|
2021-04-27 07:50:26 +08:00
|
|
|
STATIC void xfs_attr_restore_rmt_blk(struct xfs_da_args *args);
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
static int xfs_attr_node_try_addname(struct xfs_attr_item *attr);
|
2022-05-11 15:01:22 +08:00
|
|
|
STATIC int xfs_attr_node_addname_find_attr(struct xfs_attr_item *attr);
|
2022-05-12 13:12:55 +08:00
|
|
|
STATIC int xfs_attr_node_remove_attr(struct xfs_attr_item *attr);
|
2022-05-22 13:59:34 +08:00
|
|
|
STATIC int xfs_attr_node_lookup(struct xfs_da_args *args,
|
|
|
|
struct xfs_da_state *state);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2013-08-12 18:49:38 +08:00
|
|
|
int
|
2008-06-23 11:23:41 +08:00
|
|
|
xfs_inode_hasattr(
|
|
|
|
struct xfs_inode *ip)
|
|
|
|
{
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
if (!XFS_IFORK_Q(ip))
|
|
|
|
return 0;
|
|
|
|
if (!ip->i_afp)
|
|
|
|
return 0;
|
|
|
|
if (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS &&
|
|
|
|
ip->i_afp->if_nextents == 0)
|
2008-06-23 11:23:41 +08:00
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-04-14 02:15:10 +08:00
|
|
|
/*
|
|
|
|
* Returns true if the there is exactly only block in the attr fork, in which
|
|
|
|
* case the attribute fork consists of a single leaf block entry.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
xfs_attr_is_leaf(
|
|
|
|
struct xfs_inode *ip)
|
|
|
|
{
|
|
|
|
struct xfs_ifork *ifp = ip->i_afp;
|
|
|
|
struct xfs_iext_cursor icur;
|
|
|
|
struct xfs_bmbt_irec imap;
|
|
|
|
|
|
|
|
if (ifp->if_nextents != 1 || ifp->if_format != XFS_DINODE_FMT_EXTENTS)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
xfs_iext_first(ifp, &icur);
|
|
|
|
xfs_iext_get_extent(ifp, &icur, &imap);
|
|
|
|
return imap.br_startoff == 0 && imap.br_blockcount == 1;
|
|
|
|
}
|
|
|
|
|
2022-05-12 13:12:56 +08:00
|
|
|
/*
|
|
|
|
* XXX (dchinner): name path state saving and refilling is an optimisation to
|
|
|
|
* avoid needing to look up name entries after rolling transactions removing
|
|
|
|
* remote xattr blocks between the name entry lookup and name entry removal.
|
|
|
|
* This optimisation got sidelined when combining the set and remove state
|
|
|
|
* machines, but the code has been left in place because it is worthwhile to
|
|
|
|
* restore the optimisation once the combined state machine paths have settled.
|
|
|
|
*
|
|
|
|
* This comment is a public service announcement to remind Future Dave that he
|
|
|
|
* still needs to restore this code to working order.
|
|
|
|
*/
|
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
* Fill in the disk block numbers in the state structure for the buffers
|
|
|
|
* that are attached to the state structure.
|
|
|
|
* This is done so that we can quickly reattach ourselves to those buffers
|
|
|
|
* after some set of transaction commits have released these buffers.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
xfs_attr_fillstate(xfs_da_state_t *state)
|
|
|
|
{
|
|
|
|
xfs_da_state_path_t *path;
|
|
|
|
xfs_da_state_blk_t *blk;
|
|
|
|
int level;
|
|
|
|
|
|
|
|
trace_xfs_attr_fillstate(state->args);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Roll down the "path" in the state structure, storing the on-disk
|
|
|
|
* block number for those buffers in the "path".
|
|
|
|
*/
|
|
|
|
path = &state->path;
|
|
|
|
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
|
|
|
|
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
|
|
|
|
if (blk->bp) {
|
|
|
|
blk->disk_blkno = xfs_buf_daddr(blk->bp);
|
|
|
|
blk->bp = NULL;
|
|
|
|
} else {
|
|
|
|
blk->disk_blkno = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Roll down the "altpath" in the state structure, storing the on-disk
|
|
|
|
* block number for those buffers in the "altpath".
|
|
|
|
*/
|
|
|
|
path = &state->altpath;
|
|
|
|
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
|
|
|
|
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
|
|
|
|
if (blk->bp) {
|
|
|
|
blk->disk_blkno = xfs_buf_daddr(blk->bp);
|
|
|
|
blk->bp = NULL;
|
|
|
|
} else {
|
|
|
|
blk->disk_blkno = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reattach the buffers to the state structure based on the disk block
|
|
|
|
* numbers stored in the state structure.
|
|
|
|
* This is done after some set of transaction commits have released those
|
|
|
|
* buffers from our grip.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
xfs_attr_refillstate(xfs_da_state_t *state)
|
|
|
|
{
|
|
|
|
xfs_da_state_path_t *path;
|
|
|
|
xfs_da_state_blk_t *blk;
|
|
|
|
int level, error;
|
|
|
|
|
|
|
|
trace_xfs_attr_refillstate(state->args);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Roll down the "path" in the state structure, storing the on-disk
|
|
|
|
* block number for those buffers in the "path".
|
|
|
|
*/
|
|
|
|
path = &state->path;
|
|
|
|
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
|
|
|
|
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
|
|
|
|
if (blk->disk_blkno) {
|
|
|
|
error = xfs_da3_node_read_mapped(state->args->trans,
|
|
|
|
state->args->dp, blk->disk_blkno,
|
|
|
|
&blk->bp, XFS_ATTR_FORK);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
} else {
|
|
|
|
blk->bp = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Roll down the "altpath" in the state structure, storing the on-disk
|
|
|
|
* block number for those buffers in the "altpath".
|
|
|
|
*/
|
|
|
|
path = &state->altpath;
|
|
|
|
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
|
|
|
|
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
|
|
|
|
if (blk->disk_blkno) {
|
|
|
|
error = xfs_da3_node_read_mapped(state->args->trans,
|
|
|
|
state->args->dp, blk->disk_blkno,
|
|
|
|
&blk->bp, XFS_ATTR_FORK);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
} else {
|
|
|
|
blk->bp = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static int xfs_attr_fillstate(xfs_da_state_t *state) { return 0; }
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*========================================================================
|
|
|
|
* Overall external interface routines.
|
|
|
|
*========================================================================*/
|
|
|
|
|
2019-08-30 00:04:08 +08:00
|
|
|
/*
|
|
|
|
* Retrieve an extended attribute and its value. Must have ilock.
|
|
|
|
* Returns 0 on successful retrieval, otherwise an error.
|
|
|
|
*/
|
2017-06-17 02:00:14 +08:00
|
|
|
int
|
|
|
|
xfs_attr_get_ilocked(
|
|
|
|
struct xfs_da_args *args)
|
|
|
|
{
|
2020-02-27 09:30:34 +08:00
|
|
|
ASSERT(xfs_isilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
2017-07-14 03:14:33 +08:00
|
|
|
|
2020-02-27 09:30:34 +08:00
|
|
|
if (!xfs_inode_hasattr(args->dp))
|
2017-06-17 02:00:14 +08:00
|
|
|
return -ENOATTR;
|
2020-02-27 09:30:34 +08:00
|
|
|
|
2020-05-19 01:28:05 +08:00
|
|
|
if (args->dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL)
|
2017-06-17 02:00:14 +08:00
|
|
|
return xfs_attr_shortform_getvalue(args);
|
2021-04-14 02:15:10 +08:00
|
|
|
if (xfs_attr_is_leaf(args->dp))
|
2017-06-17 02:00:14 +08:00
|
|
|
return xfs_attr_leaf_get(args);
|
2020-02-27 09:30:34 +08:00
|
|
|
return xfs_attr_node_get(args);
|
2017-06-17 02:00:14 +08:00
|
|
|
}
|
|
|
|
|
xfs: allocate xattr buffer on demand
When doing file lookups and checking for permissions, we end up in
xfs_get_acl() to see if there are any ACLs on the inode. This
requires and xattr lookup, and to do that we have to supply a buffer
large enough to hold an maximum sized xattr.
On workloads were we are accessing a wide range of cache cold files
under memory pressure (e.g. NFS fileservers) we end up spending a
lot of time allocating the buffer. The buffer is 64k in length, so
is a contiguous multi-page allocation, and if that then fails we
fall back to vmalloc(). Hence the allocation here is /expensive/
when we are looking up hundreds of thousands of files a second.
Initial numbers from a bpf trace show average time in xfs_get_acl()
is ~32us, with ~19us of that in the memory allocation. Note these
are average times, so there are going to be affected by the worst
case allocations more than the common fast case...
To avoid this, we could just do a "null" lookup to see if the ACL
xattr exists and then only do the allocation if it exists. This,
however, optimises the path for the "no ACL present" case at the
expense of the "acl present" case. i.e. we can halve the time in
xfs_get_acl() for the no acl case (i.e down to ~10-15us), but that
then increases the ACL case by 30% (i.e. up to 40-45us).
To solve this and speed up both cases, drive the xattr buffer
allocation into the attribute code once we know what the actual
xattr length is. For the no-xattr case, we avoid the allocation
completely, speeding up that case. For the common ACL case, we'll
end up with a fast heap allocation (because it'll be smaller than a
page), and only for the rarer "we have a remote xattr" will we have
a multi-page allocation occur. Hence the common ACL case will be
much faster, too.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-08-30 00:04:10 +08:00
|
|
|
/*
|
|
|
|
* Retrieve an extended attribute by name, and its value if requested.
|
|
|
|
*
|
2020-02-27 09:30:35 +08:00
|
|
|
* If args->valuelen is zero, then the caller does not want the value, just an
|
|
|
|
* indication whether the attribute exists and the size of the value if it
|
|
|
|
* exists. The size is returned in args.valuelen.
|
xfs: allocate xattr buffer on demand
When doing file lookups and checking for permissions, we end up in
xfs_get_acl() to see if there are any ACLs on the inode. This
requires and xattr lookup, and to do that we have to supply a buffer
large enough to hold an maximum sized xattr.
On workloads were we are accessing a wide range of cache cold files
under memory pressure (e.g. NFS fileservers) we end up spending a
lot of time allocating the buffer. The buffer is 64k in length, so
is a contiguous multi-page allocation, and if that then fails we
fall back to vmalloc(). Hence the allocation here is /expensive/
when we are looking up hundreds of thousands of files a second.
Initial numbers from a bpf trace show average time in xfs_get_acl()
is ~32us, with ~19us of that in the memory allocation. Note these
are average times, so there are going to be affected by the worst
case allocations more than the common fast case...
To avoid this, we could just do a "null" lookup to see if the ACL
xattr exists and then only do the allocation if it exists. This,
however, optimises the path for the "no ACL present" case at the
expense of the "acl present" case. i.e. we can halve the time in
xfs_get_acl() for the no acl case (i.e down to ~10-15us), but that
then increases the ACL case by 30% (i.e. up to 40-45us).
To solve this and speed up both cases, drive the xattr buffer
allocation into the attribute code once we know what the actual
xattr length is. For the no-xattr case, we avoid the allocation
completely, speeding up that case. For the common ACL case, we'll
end up with a fast heap allocation (because it'll be smaller than a
page), and only for the rarer "we have a remote xattr" will we have
a multi-page allocation occur. Hence the common ACL case will be
much faster, too.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-08-30 00:04:10 +08:00
|
|
|
*
|
2020-02-27 09:30:35 +08:00
|
|
|
* If args->value is NULL but args->valuelen is non-zero, allocate the buffer
|
|
|
|
* for the value after existence of the attribute has been determined. The
|
|
|
|
* caller always has to free args->value if it is set, no matter if this
|
|
|
|
* function was successful or not.
|
|
|
|
*
|
xfs: allocate xattr buffer on demand
When doing file lookups and checking for permissions, we end up in
xfs_get_acl() to see if there are any ACLs on the inode. This
requires and xattr lookup, and to do that we have to supply a buffer
large enough to hold an maximum sized xattr.
On workloads were we are accessing a wide range of cache cold files
under memory pressure (e.g. NFS fileservers) we end up spending a
lot of time allocating the buffer. The buffer is 64k in length, so
is a contiguous multi-page allocation, and if that then fails we
fall back to vmalloc(). Hence the allocation here is /expensive/
when we are looking up hundreds of thousands of files a second.
Initial numbers from a bpf trace show average time in xfs_get_acl()
is ~32us, with ~19us of that in the memory allocation. Note these
are average times, so there are going to be affected by the worst
case allocations more than the common fast case...
To avoid this, we could just do a "null" lookup to see if the ACL
xattr exists and then only do the allocation if it exists. This,
however, optimises the path for the "no ACL present" case at the
expense of the "acl present" case. i.e. we can halve the time in
xfs_get_acl() for the no acl case (i.e down to ~10-15us), but that
then increases the ACL case by 30% (i.e. up to 40-45us).
To solve this and speed up both cases, drive the xattr buffer
allocation into the attribute code once we know what the actual
xattr length is. For the no-xattr case, we avoid the allocation
completely, speeding up that case. For the common ACL case, we'll
end up with a fast heap allocation (because it'll be smaller than a
page), and only for the rarer "we have a remote xattr" will we have
a multi-page allocation occur. Hence the common ACL case will be
much faster, too.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-08-30 00:04:10 +08:00
|
|
|
* If the attribute is found, but exceeds the size limit set by the caller in
|
2020-02-27 09:30:34 +08:00
|
|
|
* args->valuelen, return -ERANGE with the size of the attribute that was found
|
|
|
|
* in args->valuelen.
|
xfs: allocate xattr buffer on demand
When doing file lookups and checking for permissions, we end up in
xfs_get_acl() to see if there are any ACLs on the inode. This
requires and xattr lookup, and to do that we have to supply a buffer
large enough to hold an maximum sized xattr.
On workloads were we are accessing a wide range of cache cold files
under memory pressure (e.g. NFS fileservers) we end up spending a
lot of time allocating the buffer. The buffer is 64k in length, so
is a contiguous multi-page allocation, and if that then fails we
fall back to vmalloc(). Hence the allocation here is /expensive/
when we are looking up hundreds of thousands of files a second.
Initial numbers from a bpf trace show average time in xfs_get_acl()
is ~32us, with ~19us of that in the memory allocation. Note these
are average times, so there are going to be affected by the worst
case allocations more than the common fast case...
To avoid this, we could just do a "null" lookup to see if the ACL
xattr exists and then only do the allocation if it exists. This,
however, optimises the path for the "no ACL present" case at the
expense of the "acl present" case. i.e. we can halve the time in
xfs_get_acl() for the no acl case (i.e down to ~10-15us), but that
then increases the ACL case by 30% (i.e. up to 40-45us).
To solve this and speed up both cases, drive the xattr buffer
allocation into the attribute code once we know what the actual
xattr length is. For the no-xattr case, we avoid the allocation
completely, speeding up that case. For the common ACL case, we'll
end up with a fast heap allocation (because it'll be smaller than a
page), and only for the rarer "we have a remote xattr" will we have
a multi-page allocation occur. Hence the common ACL case will be
much faster, too.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-08-30 00:04:10 +08:00
|
|
|
*/
|
2014-05-13 14:34:24 +08:00
|
|
|
int
|
|
|
|
xfs_attr_get(
|
2020-02-27 09:30:34 +08:00
|
|
|
struct xfs_da_args *args)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2014-05-13 14:34:24 +08:00
|
|
|
uint lock_mode;
|
|
|
|
int error;
|
|
|
|
|
2020-02-27 09:30:34 +08:00
|
|
|
XFS_STATS_INC(args->dp->i_mount, xs_attr_get);
|
2014-05-13 14:34:24 +08:00
|
|
|
|
2021-08-19 09:46:53 +08:00
|
|
|
if (xfs_is_shutdown(args->dp->i_mount))
|
2014-06-25 12:58:08 +08:00
|
|
|
return -EIO;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2020-02-27 09:30:34 +08:00
|
|
|
args->geo = args->dp->i_mount->m_attr_geo;
|
|
|
|
args->whichfork = XFS_ATTR_FORK;
|
|
|
|
args->hashval = xfs_da_hashname(args->name, args->namelen);
|
2014-05-13 14:34:24 +08:00
|
|
|
|
xfs: set XFS_DA_OP_OKNOENT in xfs_attr_get
It's entirely possible for userspace to ask for an xattr which
does not exist.
Normally, there is no problem whatsoever when we ask for such
a thing, but when we look at an obfuscated metadump image
on a debug kernel with selinux, we trip over this ASSERT in
xfs_da3_path_shift():
*result = -ENOENT; /* we're out of our tree */
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
It (more or less) only shows up in the above scenario, because
xfs_metadump obfuscates attr names, but chooses names which
keep the same hash value - and xfs_da3_node_lookup_int does:
if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
(blk->hashval == args->hashval)) {
error = xfs_da3_path_shift(state, &state->path, 1, 1,
&retval);
IOWS, we only get down to the xfs_da3_path_shift() ASSERT
if we are looking for an xattr which doesn't exist, but we
find xattrs on disk which have the same hash, and so might be
a hash collision, so we try the path shift. When *that*
fails to find what we're looking for, we hit the assert about
XFS_DA_OP_OKNOENT.
Simply setting XFS_DA_OP_OKNOENT in xfs_attr_get solves this
rather corner-case problem with no ill side effects. It's
fine for an attr name lookup to fail.
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-08-19 08:30:48 +08:00
|
|
|
/* Entirely possible to look up a name which doesn't exist */
|
2020-02-27 09:30:34 +08:00
|
|
|
args->op_flags = XFS_DA_OP_OKNOENT;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2020-02-27 09:30:34 +08:00
|
|
|
lock_mode = xfs_ilock_attr_map_shared(args->dp);
|
2020-02-27 09:30:34 +08:00
|
|
|
error = xfs_attr_get_ilocked(args);
|
2020-02-27 09:30:34 +08:00
|
|
|
xfs_iunlock(args->dp, lock_mode);
|
xfs: allocate xattr buffer on demand
When doing file lookups and checking for permissions, we end up in
xfs_get_acl() to see if there are any ACLs on the inode. This
requires and xattr lookup, and to do that we have to supply a buffer
large enough to hold an maximum sized xattr.
On workloads were we are accessing a wide range of cache cold files
under memory pressure (e.g. NFS fileservers) we end up spending a
lot of time allocating the buffer. The buffer is 64k in length, so
is a contiguous multi-page allocation, and if that then fails we
fall back to vmalloc(). Hence the allocation here is /expensive/
when we are looking up hundreds of thousands of files a second.
Initial numbers from a bpf trace show average time in xfs_get_acl()
is ~32us, with ~19us of that in the memory allocation. Note these
are average times, so there are going to be affected by the worst
case allocations more than the common fast case...
To avoid this, we could just do a "null" lookup to see if the ACL
xattr exists and then only do the allocation if it exists. This,
however, optimises the path for the "no ACL present" case at the
expense of the "acl present" case. i.e. we can halve the time in
xfs_get_acl() for the no acl case (i.e down to ~10-15us), but that
then increases the ACL case by 30% (i.e. up to 40-45us).
To solve this and speed up both cases, drive the xattr buffer
allocation into the attribute code once we know what the actual
xattr length is. For the no-xattr case, we avoid the allocation
completely, speeding up that case. For the common ACL case, we'll
end up with a fast heap allocation (because it'll be smaller than a
page), and only for the rarer "we have a remote xattr" will we have
a multi-page allocation occur. Hence the common ACL case will be
much faster, too.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
2019-08-30 00:04:10 +08:00
|
|
|
|
2020-02-27 09:30:34 +08:00
|
|
|
return error;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2008-08-13 14:03:35 +08:00
|
|
|
/*
|
|
|
|
* Calculate how many blocks we need for the new attribute,
|
|
|
|
*/
|
2022-05-04 10:41:02 +08:00
|
|
|
int
|
2008-08-13 14:03:35 +08:00
|
|
|
xfs_attr_calc_size(
|
2014-05-13 14:40:19 +08:00
|
|
|
struct xfs_da_args *args,
|
2008-08-13 14:03:35 +08:00
|
|
|
int *local)
|
|
|
|
{
|
2014-05-13 14:40:19 +08:00
|
|
|
struct xfs_mount *mp = args->dp->i_mount;
|
2008-08-13 14:03:35 +08:00
|
|
|
int size;
|
|
|
|
int nblks;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine space new attribute will use, and if it would be
|
|
|
|
* "local" or "remote" (note: local != inline).
|
|
|
|
*/
|
2014-06-06 13:21:27 +08:00
|
|
|
size = xfs_attr_leaf_newentsize(args, local);
|
2008-08-13 14:03:35 +08:00
|
|
|
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
|
|
|
|
if (*local) {
|
2014-06-06 13:21:10 +08:00
|
|
|
if (size > (args->geo->blksize / 2)) {
|
2008-08-13 14:03:35 +08:00
|
|
|
/* Double split possible */
|
|
|
|
nblks *= 2;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Out of line attribute, cannot double split, but
|
|
|
|
* make room for the attribute value itself.
|
|
|
|
*/
|
2014-05-15 07:39:28 +08:00
|
|
|
uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
|
2008-08-13 14:03:35 +08:00
|
|
|
nblks += dblocks;
|
|
|
|
nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
|
|
|
|
}
|
|
|
|
|
|
|
|
return nblks;
|
|
|
|
}
|
|
|
|
|
2022-05-11 15:01:23 +08:00
|
|
|
/* Initialize transaction reservation for attr operations */
|
|
|
|
void
|
|
|
|
xfs_init_attr_trans(
|
|
|
|
struct xfs_da_args *args,
|
|
|
|
struct xfs_trans_res *tres,
|
|
|
|
unsigned int *total)
|
|
|
|
{
|
|
|
|
struct xfs_mount *mp = args->dp->i_mount;
|
|
|
|
|
|
|
|
if (args->value) {
|
|
|
|
tres->tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
|
|
|
|
M_RES(mp)->tr_attrsetrt.tr_logres *
|
|
|
|
args->total;
|
|
|
|
tres->tr_logcount = XFS_ATTRSET_LOG_COUNT;
|
|
|
|
tres->tr_logflags = XFS_TRANS_PERM_LOG_RES;
|
|
|
|
*total = args->total;
|
|
|
|
} else {
|
|
|
|
*tres = M_RES(mp)->tr_attrrm;
|
|
|
|
*total = XFS_ATTRRM_SPACE_RES(mp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
/*
|
|
|
|
* Add an attr to a shortform fork. If there is no space,
|
|
|
|
* xfs_attr_shortform_addname() will convert to leaf format and return -ENOSPC.
|
|
|
|
* to use.
|
|
|
|
*/
|
2018-10-18 14:20:50 +08:00
|
|
|
STATIC int
|
|
|
|
xfs_attr_try_sf_addname(
|
|
|
|
struct xfs_inode *dp,
|
|
|
|
struct xfs_da_args *args)
|
|
|
|
{
|
|
|
|
|
2020-07-21 12:47:24 +08:00
|
|
|
int error;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Build initial attribute list (if required).
|
|
|
|
*/
|
|
|
|
if (dp->i_afp->if_format == XFS_DINODE_FMT_EXTENTS)
|
|
|
|
xfs_attr_shortform_create(args);
|
2018-10-18 14:20:50 +08:00
|
|
|
|
|
|
|
error = xfs_attr_shortform_addname(args);
|
|
|
|
if (error == -ENOSPC)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Commit the shortform mods, and we're done.
|
|
|
|
* NOTE: this is also the error path (EEXIST, etc).
|
|
|
|
*/
|
2020-02-27 09:30:36 +08:00
|
|
|
if (!error && !(args->op_flags & XFS_DA_OP_NOTIME))
|
2018-10-18 14:20:50 +08:00
|
|
|
xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
|
|
|
|
|
2021-08-19 09:46:52 +08:00
|
|
|
if (xfs_has_wsync(dp->i_mount))
|
2018-10-18 14:20:50 +08:00
|
|
|
xfs_trans_set_sync(args->trans);
|
|
|
|
|
2020-07-21 12:47:24 +08:00
|
|
|
return error;
|
2018-10-18 14:20:50 +08:00
|
|
|
}
|
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
static int
|
2021-05-29 06:15:05 +08:00
|
|
|
xfs_attr_sf_addname(
|
2022-05-11 15:01:22 +08:00
|
|
|
struct xfs_attr_item *attr)
|
2020-07-21 12:47:28 +08:00
|
|
|
{
|
2022-05-11 15:01:22 +08:00
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
2021-05-22 06:48:13 +08:00
|
|
|
struct xfs_inode *dp = args->dp;
|
|
|
|
int error = 0;
|
2020-07-21 12:47:28 +08:00
|
|
|
|
|
|
|
error = xfs_attr_try_sf_addname(dp, args);
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
if (error != -ENOSPC) {
|
|
|
|
ASSERT(!error || error == -EEXIST);
|
|
|
|
attr->xattri_dela_state = XFS_DAS_DONE;
|
|
|
|
goto out;
|
|
|
|
}
|
2021-02-18 16:09:18 +08:00
|
|
|
|
2020-07-21 12:47:28 +08:00
|
|
|
/*
|
|
|
|
* It won't fit in the shortform, transform to a leaf block. GROT:
|
|
|
|
* another possible req'mt for a double-split btree op.
|
|
|
|
*/
|
2022-05-11 15:01:22 +08:00
|
|
|
error = xfs_attr_shortform_to_leaf(args, &attr->xattri_leaf_bp);
|
2020-07-21 12:47:28 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prevent the leaf buffer from being unlocked so that a concurrent AIL
|
|
|
|
* push cannot grab the half-baked leaf buffer and run into problems
|
2021-02-18 16:09:18 +08:00
|
|
|
* with the write verifier.
|
2020-07-21 12:47:28 +08:00
|
|
|
*/
|
2022-05-11 15:01:22 +08:00
|
|
|
xfs_trans_bhold(args->trans, attr->xattri_leaf_bp);
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
attr->xattri_dela_state = XFS_DAS_LEAF_ADD;
|
|
|
|
out:
|
|
|
|
trace_xfs_attr_sf_addname_return(attr->xattri_dela_state, args->dp);
|
|
|
|
return error;
|
2020-07-21 12:47:28 +08:00
|
|
|
}
|
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
/*
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
* Handle the state change on completion of a multi-state attr operation.
|
|
|
|
*
|
|
|
|
* If the XFS_DA_OP_REPLACE flag is set, this means the operation was the first
|
|
|
|
* modification in a attr replace operation and we still have to do the second
|
|
|
|
* state, indicated by @replace_state.
|
|
|
|
*
|
|
|
|
* We consume the XFS_DA_OP_REPLACE flag so that when we are called again on
|
|
|
|
* completion of the second half of the attr replace operation we correctly
|
|
|
|
* signal that it is done.
|
2022-05-12 13:12:55 +08:00
|
|
|
*/
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
static enum xfs_delattr_state
|
|
|
|
xfs_attr_complete_op(
|
2022-05-12 13:12:55 +08:00
|
|
|
struct xfs_attr_item *attr,
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
enum xfs_delattr_state replace_state)
|
2022-05-12 13:12:55 +08:00
|
|
|
{
|
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
bool do_replace = args->op_flags & XFS_DA_OP_REPLACE;
|
2022-05-12 13:12:55 +08:00
|
|
|
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
args->op_flags &= ~XFS_DA_OP_REPLACE;
|
|
|
|
if (do_replace) {
|
|
|
|
args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
|
|
|
|
return replace_state;
|
|
|
|
}
|
|
|
|
return XFS_DAS_DONE;
|
2022-05-12 13:12:55 +08:00
|
|
|
}
|
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
static int
|
2022-05-11 15:01:22 +08:00
|
|
|
xfs_attr_leaf_addname(
|
|
|
|
struct xfs_attr_item *attr)
|
|
|
|
{
|
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
|
|
|
int error;
|
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
ASSERT(xfs_attr_is_leaf(args->dp));
|
2022-05-11 15:01:22 +08:00
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
/*
|
|
|
|
* Use the leaf buffer we may already hold locked as a result of
|
|
|
|
* a sf-to-leaf conversion. The held buffer is no longer valid
|
|
|
|
* after this call, regardless of the result.
|
|
|
|
*/
|
|
|
|
error = xfs_attr_leaf_try_add(args, attr->xattri_leaf_bp);
|
|
|
|
attr->xattri_leaf_bp = NULL;
|
2022-05-11 15:04:23 +08:00
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
if (error == -ENOSPC) {
|
|
|
|
error = xfs_attr3_leaf_to_node(args);
|
2022-05-11 15:01:22 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
/*
|
|
|
|
* We're not in leaf format anymore, so roll the transaction and
|
|
|
|
* retry the add to the newly allocated node block.
|
|
|
|
*/
|
|
|
|
attr->xattri_dela_state = XFS_DAS_NODE_ADD;
|
|
|
|
goto out;
|
2022-05-11 15:02:23 +08:00
|
|
|
}
|
|
|
|
if (error)
|
|
|
|
return error;
|
2022-05-11 15:01:22 +08:00
|
|
|
|
2022-05-11 15:02:23 +08:00
|
|
|
/*
|
|
|
|
* We need to commit and roll if we need to allocate remote xattr blocks
|
|
|
|
* or perform more xattr manipulations. Otherwise there is nothing more
|
|
|
|
* to do and we can return success.
|
|
|
|
*/
|
2022-05-12 13:12:55 +08:00
|
|
|
if (args->rmtblkno)
|
2022-05-12 13:12:55 +08:00
|
|
|
attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT;
|
2022-05-12 13:12:55 +08:00
|
|
|
else
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
attr->xattri_dela_state = xfs_attr_complete_op(attr,
|
|
|
|
XFS_DAS_LEAF_REPLACE);
|
2022-05-11 15:02:23 +08:00
|
|
|
out:
|
2022-05-11 15:01:22 +08:00
|
|
|
trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
|
2022-05-11 15:02:23 +08:00
|
|
|
return error;
|
2022-05-11 15:01:22 +08:00
|
|
|
}
|
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
/*
|
|
|
|
* Add an entry to a node format attr tree.
|
|
|
|
*
|
|
|
|
* Note that we might still have a leaf here - xfs_attr_is_leaf() cannot tell
|
|
|
|
* the difference between leaf + remote attr blocks and a node format tree,
|
|
|
|
* so we may still end up having to convert from leaf to node format here.
|
|
|
|
*/
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
static int
|
|
|
|
xfs_attr_node_addname(
|
|
|
|
struct xfs_attr_item *attr)
|
|
|
|
{
|
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
ASSERT(!attr->xattri_leaf_bp);
|
|
|
|
|
|
|
|
error = xfs_attr_node_addname_find_attr(attr);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
error = xfs_attr_node_try_addname(attr);
|
2022-05-12 13:12:55 +08:00
|
|
|
if (error == -ENOSPC) {
|
|
|
|
error = xfs_attr3_leaf_to_node(args);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
/*
|
|
|
|
* No state change, we really are in node form now
|
|
|
|
* but we need the transaction rolled to continue.
|
|
|
|
*/
|
|
|
|
goto out;
|
|
|
|
}
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
if (args->rmtblkno)
|
2022-05-12 13:12:55 +08:00
|
|
|
attr->xattri_dela_state = XFS_DAS_NODE_SET_RMT;
|
2022-05-12 13:12:55 +08:00
|
|
|
else
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
attr->xattri_dela_state = xfs_attr_complete_op(attr,
|
|
|
|
XFS_DAS_NODE_REPLACE);
|
2022-05-12 13:12:55 +08:00
|
|
|
out:
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
trace_xfs_attr_node_addname_return(attr->xattri_dela_state, args->dp);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
static int
|
|
|
|
xfs_attr_rmtval_alloc(
|
|
|
|
struct xfs_attr_item *attr)
|
|
|
|
{
|
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there was an out-of-line value, allocate the blocks we
|
|
|
|
* identified for its storage and copy the value. This is done
|
|
|
|
* after we create the attribute so that we don't overflow the
|
|
|
|
* maximum size of a transaction and/or hit a deadlock.
|
|
|
|
*/
|
|
|
|
if (attr->xattri_blkcnt > 0) {
|
|
|
|
error = xfs_attr_rmtval_set_blk(attr);
|
|
|
|
if (error)
|
|
|
|
return error;
|
2022-05-12 13:12:55 +08:00
|
|
|
/* Roll the transaction only if there is more to allocate. */
|
2022-05-12 13:12:55 +08:00
|
|
|
if (attr->xattri_blkcnt > 0)
|
2022-05-12 13:12:55 +08:00
|
|
|
goto out;
|
2022-05-12 13:12:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
error = xfs_attr_rmtval_set_value(args);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
attr->xattri_dela_state = xfs_attr_complete_op(attr,
|
|
|
|
++attr->xattri_dela_state);
|
|
|
|
/*
|
|
|
|
* If we are not doing a rename, we've finished the operation but still
|
|
|
|
* have to clear the incomplete flag protecting the new attr from
|
|
|
|
* exposing partially initialised state if we crash during creation.
|
|
|
|
*/
|
|
|
|
if (attr->xattri_dela_state == XFS_DAS_DONE)
|
2022-05-12 13:12:55 +08:00
|
|
|
error = xfs_attr3_leaf_clearflag(args);
|
|
|
|
out:
|
|
|
|
trace_xfs_attr_rmtval_alloc(attr->xattri_dela_state, args->dp);
|
|
|
|
return error;
|
|
|
|
}
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
|
2022-05-12 13:12:56 +08:00
|
|
|
/*
|
|
|
|
* Mark an attribute entry INCOMPLETE and save pointers to the relevant buffers
|
|
|
|
* for later deletion of the entry.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
xfs_attr_leaf_mark_incomplete(
|
|
|
|
struct xfs_da_args *args,
|
|
|
|
struct xfs_da_state *state)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fill in disk block numbers in the state structure
|
|
|
|
* so that we can get the buffers back after we commit
|
|
|
|
* several transactions in the following calls.
|
|
|
|
*/
|
|
|
|
error = xfs_attr_fillstate(state);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mark the attribute as INCOMPLETE
|
|
|
|
*/
|
|
|
|
return xfs_attr3_leaf_setflag(args);
|
|
|
|
}
|
|
|
|
|
2022-05-22 13:59:34 +08:00
|
|
|
/* Ensure the da state of an xattr deferred work item is ready to go. */
|
|
|
|
static inline void
|
|
|
|
xfs_attr_item_init_da_state(
|
|
|
|
struct xfs_attr_item *attr)
|
|
|
|
{
|
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
|
|
|
|
|
|
|
if (!attr->xattri_da_state)
|
|
|
|
attr->xattri_da_state = xfs_da_state_alloc(args);
|
|
|
|
else
|
|
|
|
xfs_da_state_reset(attr->xattri_da_state, args);
|
|
|
|
}
|
|
|
|
|
2022-05-12 13:12:56 +08:00
|
|
|
/*
|
|
|
|
* Initial setup for xfs_attr_node_removename. Make sure the attr is there and
|
|
|
|
* the blocks are valid. Attr keys with remote blocks will be marked
|
|
|
|
* incomplete.
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
int xfs_attr_node_removename_setup(
|
|
|
|
struct xfs_attr_item *attr)
|
|
|
|
{
|
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
2022-05-20 12:41:34 +08:00
|
|
|
struct xfs_da_state *state;
|
2022-05-12 13:12:56 +08:00
|
|
|
int error;
|
|
|
|
|
2022-05-22 13:59:34 +08:00
|
|
|
xfs_attr_item_init_da_state(attr);
|
|
|
|
error = xfs_attr_node_lookup(args, attr->xattri_da_state);
|
2022-05-12 13:12:56 +08:00
|
|
|
if (error != -EEXIST)
|
|
|
|
goto out;
|
|
|
|
error = 0;
|
|
|
|
|
2022-05-20 12:41:34 +08:00
|
|
|
state = attr->xattri_da_state;
|
|
|
|
ASSERT(state->path.blk[state->path.active - 1].bp != NULL);
|
|
|
|
ASSERT(state->path.blk[state->path.active - 1].magic ==
|
2022-05-12 13:12:56 +08:00
|
|
|
XFS_ATTR_LEAF_MAGIC);
|
|
|
|
|
2022-05-20 12:41:34 +08:00
|
|
|
error = xfs_attr_leaf_mark_incomplete(args, state);
|
2022-05-12 13:12:56 +08:00
|
|
|
if (error)
|
|
|
|
goto out;
|
|
|
|
if (args->rmtblkno > 0)
|
2022-05-12 13:12:56 +08:00
|
|
|
error = xfs_attr_rmtval_invalidate(args);
|
|
|
|
out:
|
2022-05-20 12:41:34 +08:00
|
|
|
if (error) {
|
|
|
|
xfs_da_state_free(attr->xattri_da_state);
|
|
|
|
attr->xattri_da_state = NULL;
|
|
|
|
}
|
2022-05-12 13:12:56 +08:00
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
/*
|
|
|
|
* Remove the original attr we have just replaced. This is dependent on the
|
|
|
|
* original lookup and insert placing the old attr in args->blkno/args->index
|
|
|
|
* and the new attr in args->blkno2/args->index2.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
xfs_attr_leaf_remove_attr(
|
|
|
|
struct xfs_attr_item *attr)
|
|
|
|
{
|
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
|
|
|
struct xfs_inode *dp = args->dp;
|
|
|
|
struct xfs_buf *bp = NULL;
|
|
|
|
int forkoff;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
|
|
|
|
&bp);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
xfs_attr3_leaf_remove(bp, args);
|
|
|
|
|
|
|
|
forkoff = xfs_attr_shortform_allfit(bp, dp);
|
|
|
|
if (forkoff)
|
|
|
|
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
|
|
|
|
/* bp is gone due to xfs_da_shrink_inode */
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Shrink an attribute from leaf to shortform. Used by the node format remove
|
|
|
|
* path when the node format collapses to a single block and so we have to check
|
|
|
|
* if it can be collapsed further.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
xfs_attr_leaf_shrink(
|
2022-05-12 13:12:56 +08:00
|
|
|
struct xfs_da_args *args)
|
2022-05-12 13:12:55 +08:00
|
|
|
{
|
|
|
|
struct xfs_inode *dp = args->dp;
|
|
|
|
struct xfs_buf *bp;
|
2022-05-12 13:12:56 +08:00
|
|
|
int forkoff;
|
|
|
|
int error;
|
2022-05-12 13:12:55 +08:00
|
|
|
|
|
|
|
if (!xfs_attr_is_leaf(dp))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
forkoff = xfs_attr_shortform_allfit(bp, dp);
|
|
|
|
if (forkoff) {
|
|
|
|
error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
|
|
|
|
/* bp is gone due to xfs_da_shrink_inode */
|
|
|
|
} else {
|
|
|
|
xfs_trans_brelse(args->trans, bp);
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2018-10-18 14:21:16 +08:00
|
|
|
/*
|
2022-05-12 13:12:55 +08:00
|
|
|
* Run the attribute operation specified in @attr.
|
|
|
|
*
|
|
|
|
* This routine is meant to function as a delayed operation and will set the
|
|
|
|
* state to XFS_DAS_DONE when the operation is complete. Calling functions will
|
|
|
|
* need to handle this, and recall the function until either an error or
|
|
|
|
* XFS_DAS_DONE is detected.
|
2018-10-18 14:21:16 +08:00
|
|
|
*/
|
|
|
|
int
|
2021-05-22 06:48:13 +08:00
|
|
|
xfs_attr_set_iter(
|
2022-05-11 15:01:22 +08:00
|
|
|
struct xfs_attr_item *attr)
|
2018-10-18 14:21:16 +08:00
|
|
|
{
|
2022-05-11 15:01:22 +08:00
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
2022-05-12 13:12:55 +08:00
|
|
|
int error = 0;
|
2021-05-22 06:48:13 +08:00
|
|
|
|
|
|
|
/* State machine switch */
|
2022-05-12 13:12:54 +08:00
|
|
|
next_state:
|
2022-05-11 15:01:22 +08:00
|
|
|
switch (attr->xattri_dela_state) {
|
2021-05-22 06:48:13 +08:00
|
|
|
case XFS_DAS_UNINIT:
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
ASSERT(0);
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
case XFS_DAS_SF_ADD:
|
|
|
|
return xfs_attr_sf_addname(attr);
|
|
|
|
case XFS_DAS_LEAF_ADD:
|
2022-05-11 15:01:22 +08:00
|
|
|
return xfs_attr_leaf_addname(attr);
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
case XFS_DAS_NODE_ADD:
|
|
|
|
return xfs_attr_node_addname(attr);
|
2021-02-13 03:27:14 +08:00
|
|
|
|
2022-05-12 13:12:56 +08:00
|
|
|
case XFS_DAS_SF_REMOVE:
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
error = xfs_attr_sf_removename(args);
|
|
|
|
attr->xattri_dela_state = xfs_attr_complete_op(attr,
|
|
|
|
xfs_attr_init_add_state(args));
|
|
|
|
break;
|
2022-05-12 13:12:56 +08:00
|
|
|
case XFS_DAS_LEAF_REMOVE:
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
error = xfs_attr_leaf_removename(args);
|
|
|
|
attr->xattri_dela_state = xfs_attr_complete_op(attr,
|
|
|
|
xfs_attr_init_add_state(args));
|
|
|
|
break;
|
2022-05-12 13:12:56 +08:00
|
|
|
case XFS_DAS_NODE_REMOVE:
|
|
|
|
error = xfs_attr_node_removename_setup(attr);
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
if (error == -ENOATTR &&
|
|
|
|
(args->op_flags & XFS_DA_OP_RECOVERY)) {
|
|
|
|
attr->xattri_dela_state = xfs_attr_complete_op(attr,
|
|
|
|
xfs_attr_init_add_state(args));
|
|
|
|
error = 0;
|
|
|
|
break;
|
|
|
|
}
|
2022-05-12 13:12:56 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
attr->xattri_dela_state = XFS_DAS_NODE_REMOVE_RMT;
|
|
|
|
if (args->rmtblkno == 0)
|
|
|
|
attr->xattri_dela_state++;
|
|
|
|
break;
|
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
case XFS_DAS_LEAF_SET_RMT:
|
|
|
|
case XFS_DAS_NODE_SET_RMT:
|
|
|
|
error = xfs_attr_rmtval_find_space(attr);
|
|
|
|
if (error)
|
|
|
|
return error;
|
2022-05-12 13:12:54 +08:00
|
|
|
attr->xattri_dela_state++;
|
2022-05-12 13:12:54 +08:00
|
|
|
fallthrough;
|
2022-05-12 13:12:55 +08:00
|
|
|
|
2022-05-12 13:12:54 +08:00
|
|
|
case XFS_DAS_LEAF_ALLOC_RMT:
|
2022-05-12 13:12:54 +08:00
|
|
|
case XFS_DAS_NODE_ALLOC_RMT:
|
2022-05-12 13:12:55 +08:00
|
|
|
error = xfs_attr_rmtval_alloc(attr);
|
|
|
|
if (error)
|
2020-07-21 12:47:24 +08:00
|
|
|
return error;
|
2022-05-12 13:12:55 +08:00
|
|
|
if (attr->xattri_dela_state == XFS_DAS_DONE)
|
|
|
|
break;
|
2022-05-12 13:12:55 +08:00
|
|
|
goto next_state;
|
2020-07-21 12:47:24 +08:00
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
case XFS_DAS_LEAF_REPLACE:
|
|
|
|
case XFS_DAS_NODE_REPLACE:
|
2021-04-20 03:55:26 +08:00
|
|
|
/*
|
2022-05-12 13:12:55 +08:00
|
|
|
* We must "flip" the incomplete flags on the "new" and "old"
|
|
|
|
* attribute/value pairs so that one disappears and one appears
|
2022-05-12 13:12:55 +08:00
|
|
|
* atomically.
|
2021-04-20 03:55:26 +08:00
|
|
|
*/
|
2022-05-12 13:12:55 +08:00
|
|
|
error = xfs_attr3_leaf_flipflags(args);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
/*
|
2022-05-12 13:12:55 +08:00
|
|
|
* We must commit the flag value change now to make it atomic
|
|
|
|
* and then we can start the next trans in series at REMOVE_OLD.
|
2022-05-12 13:12:55 +08:00
|
|
|
*/
|
2022-05-12 13:12:54 +08:00
|
|
|
attr->xattri_dela_state++;
|
2022-05-12 13:12:55 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case XFS_DAS_LEAF_REMOVE_OLD:
|
|
|
|
case XFS_DAS_NODE_REMOVE_OLD:
|
2021-04-20 03:55:26 +08:00
|
|
|
/*
|
2022-05-12 13:12:55 +08:00
|
|
|
* If we have a remote attr, start the process of removing it
|
|
|
|
* by invalidating any cached buffers.
|
|
|
|
*
|
|
|
|
* If we don't have a remote attr, we skip the remote block
|
|
|
|
* removal state altogether with a second state increment.
|
2021-04-20 03:55:26 +08:00
|
|
|
*/
|
|
|
|
xfs_attr_restore_rmt_blk(args);
|
|
|
|
if (args->rmtblkno) {
|
2022-05-12 13:12:55 +08:00
|
|
|
error = xfs_attr_rmtval_invalidate(args);
|
2021-04-20 03:55:26 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
2022-05-12 13:12:55 +08:00
|
|
|
} else {
|
|
|
|
attr->xattri_dela_state++;
|
2021-04-20 03:55:26 +08:00
|
|
|
}
|
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
attr->xattri_dela_state++;
|
|
|
|
goto next_state;
|
|
|
|
|
|
|
|
case XFS_DAS_LEAF_REMOVE_RMT:
|
|
|
|
case XFS_DAS_NODE_REMOVE_RMT:
|
|
|
|
error = xfs_attr_rmtval_remove(attr);
|
2022-05-12 13:12:55 +08:00
|
|
|
if (error == -EAGAIN) {
|
|
|
|
error = 0;
|
2022-05-12 13:12:55 +08:00
|
|
|
break;
|
2022-05-12 13:12:55 +08:00
|
|
|
}
|
2022-05-12 13:12:55 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2022-05-12 13:12:54 +08:00
|
|
|
/*
|
2022-05-12 13:12:55 +08:00
|
|
|
* We've finished removing the remote attr blocks, so commit the
|
|
|
|
* transaction and move on to removing the attr name from the
|
|
|
|
* leaf/node block. Removing the attr might require a full
|
|
|
|
* transaction reservation for btree block freeing, so we
|
|
|
|
* can't do that in the same transaction where we removed the
|
|
|
|
* remote attr blocks.
|
2022-05-12 13:12:54 +08:00
|
|
|
*/
|
|
|
|
attr->xattri_dela_state++;
|
2022-05-12 13:12:55 +08:00
|
|
|
break;
|
2022-05-12 13:12:54 +08:00
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
case XFS_DAS_LEAF_REMOVE_ATTR:
|
|
|
|
error = xfs_attr_leaf_remove_attr(attr);
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
attr->xattri_dela_state = xfs_attr_complete_op(attr,
|
|
|
|
xfs_attr_init_add_state(args));
|
2022-05-12 13:12:55 +08:00
|
|
|
break;
|
2020-07-21 12:47:24 +08:00
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
case XFS_DAS_NODE_REMOVE_ATTR:
|
|
|
|
error = xfs_attr_node_remove_attr(attr);
|
|
|
|
if (!error)
|
2022-05-12 13:12:56 +08:00
|
|
|
error = xfs_attr_leaf_shrink(args);
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
attr->xattri_dela_state = xfs_attr_complete_op(attr,
|
|
|
|
xfs_attr_init_add_state(args));
|
2021-05-22 06:48:13 +08:00
|
|
|
break;
|
|
|
|
default:
|
2021-05-21 15:57:15 +08:00
|
|
|
ASSERT(0);
|
2021-05-22 06:48:13 +08:00
|
|
|
break;
|
|
|
|
}
|
2022-05-12 13:12:55 +08:00
|
|
|
|
|
|
|
trace_xfs_attr_set_iter_return(attr->xattri_dela_state, args->dp);
|
2021-04-13 05:15:31 +08:00
|
|
|
return error;
|
2018-10-18 14:21:16 +08:00
|
|
|
}
|
|
|
|
|
2021-05-22 06:48:13 +08:00
|
|
|
|
2020-07-21 12:47:22 +08:00
|
|
|
/*
|
|
|
|
* Return EEXIST if attr is found, or ENOATTR if not
|
|
|
|
*/
|
2021-08-19 09:46:25 +08:00
|
|
|
static int
|
|
|
|
xfs_attr_lookup(
|
2020-07-21 12:47:22 +08:00
|
|
|
struct xfs_da_args *args)
|
|
|
|
{
|
|
|
|
struct xfs_inode *dp = args->dp;
|
|
|
|
struct xfs_buf *bp = NULL;
|
2022-05-22 13:59:34 +08:00
|
|
|
struct xfs_da_state *state;
|
2020-07-21 12:47:22 +08:00
|
|
|
int error;
|
|
|
|
|
|
|
|
if (!xfs_inode_hasattr(dp))
|
|
|
|
return -ENOATTR;
|
|
|
|
|
2021-04-14 02:15:11 +08:00
|
|
|
if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL)
|
2020-07-21 12:47:22 +08:00
|
|
|
return xfs_attr_sf_findname(args, NULL, NULL);
|
|
|
|
|
2021-04-14 02:15:10 +08:00
|
|
|
if (xfs_attr_is_leaf(dp)) {
|
2020-07-21 12:47:22 +08:00
|
|
|
error = xfs_attr_leaf_hasname(args, &bp);
|
|
|
|
|
|
|
|
if (bp)
|
|
|
|
xfs_trans_brelse(args->trans, bp);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2022-05-22 13:59:34 +08:00
|
|
|
state = xfs_da_state_alloc(args);
|
|
|
|
error = xfs_attr_node_lookup(args, state);
|
|
|
|
xfs_da_state_free(state);
|
|
|
|
return error;
|
2020-07-21 12:47:22 +08:00
|
|
|
}
|
|
|
|
|
2022-05-11 15:05:23 +08:00
|
|
|
static int
|
|
|
|
xfs_attr_item_init(
|
|
|
|
struct xfs_da_args *args,
|
|
|
|
unsigned int op_flags, /* op flag (set or remove) */
|
|
|
|
struct xfs_attr_item **attr) /* new xfs_attr_item */
|
|
|
|
{
|
|
|
|
|
|
|
|
struct xfs_attr_item *new;
|
|
|
|
|
2022-05-22 13:59:48 +08:00
|
|
|
new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL);
|
2022-05-11 15:05:23 +08:00
|
|
|
new->xattri_op_flags = op_flags;
|
|
|
|
new->xattri_da_args = args;
|
|
|
|
|
|
|
|
*attr = new;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sets an attribute for an inode as a deferred operation */
|
|
|
|
static int
|
|
|
|
xfs_attr_defer_add(
|
|
|
|
struct xfs_da_args *args)
|
|
|
|
{
|
|
|
|
struct xfs_attr_item *new;
|
|
|
|
int error = 0;
|
|
|
|
|
2022-05-22 13:59:48 +08:00
|
|
|
error = xfs_attr_item_init(args, XFS_ATTRI_OP_FLAGS_SET, &new);
|
2022-05-11 15:05:23 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
new->xattri_dela_state = xfs_attr_init_add_state(args);
|
2022-05-11 15:05:23 +08:00
|
|
|
xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
|
|
|
|
trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Sets an attribute for an inode as a deferred operation */
|
|
|
|
static int
|
|
|
|
xfs_attr_defer_replace(
|
|
|
|
struct xfs_da_args *args)
|
|
|
|
{
|
|
|
|
struct xfs_attr_item *new;
|
|
|
|
int error = 0;
|
|
|
|
|
2022-05-22 13:59:48 +08:00
|
|
|
error = xfs_attr_item_init(args, XFS_ATTRI_OP_FLAGS_REPLACE, &new);
|
2022-05-11 15:05:23 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
new->xattri_dela_state = xfs_attr_init_replace_state(args);
|
2022-05-11 15:05:23 +08:00
|
|
|
xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
|
|
|
|
trace_xfs_attr_defer_replace(new->xattri_dela_state, args->dp);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Removes an attribute for an inode as a deferred operation */
|
|
|
|
static int
|
|
|
|
xfs_attr_defer_remove(
|
|
|
|
struct xfs_da_args *args)
|
|
|
|
{
|
|
|
|
|
|
|
|
struct xfs_attr_item *new;
|
|
|
|
int error;
|
|
|
|
|
2022-05-22 13:59:48 +08:00
|
|
|
error = xfs_attr_item_init(args, XFS_ATTRI_OP_FLAGS_REMOVE, &new);
|
2022-05-11 15:05:23 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2022-05-12 13:12:56 +08:00
|
|
|
new->xattri_dela_state = xfs_attr_init_remove_state(args);
|
2022-05-11 15:05:23 +08:00
|
|
|
xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
|
|
|
|
trace_xfs_attr_defer_remove(new->xattri_dela_state, args->dp);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-02-27 09:30:29 +08:00
|
|
|
/*
|
2020-02-27 09:30:33 +08:00
|
|
|
* Note: If args->value is NULL the attribute will be removed, just like the
|
2020-02-27 09:30:29 +08:00
|
|
|
* Linux ->setattr API.
|
|
|
|
*/
|
2014-05-13 14:34:14 +08:00
|
|
|
int
|
|
|
|
xfs_attr_set(
|
2020-02-27 09:30:33 +08:00
|
|
|
struct xfs_da_args *args)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2020-02-27 09:30:33 +08:00
|
|
|
struct xfs_inode *dp = args->dp;
|
2013-08-12 18:49:59 +08:00
|
|
|
struct xfs_mount *mp = dp->i_mount;
|
|
|
|
struct xfs_trans_res tres;
|
2020-02-27 09:30:42 +08:00
|
|
|
bool rsvd = (args->attr_filter & XFS_ATTR_ROOT);
|
2018-10-18 14:20:50 +08:00
|
|
|
int error, local;
|
2021-01-23 08:48:13 +08:00
|
|
|
int rmt_blks = 0;
|
2020-02-27 09:30:29 +08:00
|
|
|
unsigned int total;
|
2022-05-11 15:01:13 +08:00
|
|
|
int delayed = xfs_has_larp(mp);
|
2014-05-13 14:34:14 +08:00
|
|
|
|
2021-08-19 09:46:53 +08:00
|
|
|
if (xfs_is_shutdown(dp->i_mount))
|
2014-06-25 12:58:08 +08:00
|
|
|
return -EIO;
|
2014-05-13 14:34:14 +08:00
|
|
|
|
2020-02-27 09:30:29 +08:00
|
|
|
error = xfs_qm_dqattach(dp);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2020-02-27 09:30:33 +08:00
|
|
|
args->geo = mp->m_attr_geo;
|
|
|
|
args->whichfork = XFS_ATTR_FORK;
|
|
|
|
args->hashval = xfs_da_hashname(args->name, args->namelen);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
2020-02-27 09:30:29 +08:00
|
|
|
* We have no control over the attribute names that userspace passes us
|
|
|
|
* to remove, so we have to allow the name lookup prior to attribute
|
|
|
|
* removal to fail as well.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2020-02-27 09:30:33 +08:00
|
|
|
args->op_flags = XFS_DA_OP_OKNOENT;
|
2007-02-10 15:35:58 +08:00
|
|
|
|
2020-02-27 09:30:33 +08:00
|
|
|
if (args->value) {
|
2020-02-27 09:30:29 +08:00
|
|
|
XFS_STATS_INC(mp, xs_attr_set);
|
2020-02-27 09:30:33 +08:00
|
|
|
args->total = xfs_attr_calc_size(args, &local);
|
2020-02-27 09:30:29 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the inode doesn't have an attribute fork, add one.
|
|
|
|
* (inode must not be locked when we call this routine)
|
|
|
|
*/
|
|
|
|
if (XFS_IFORK_Q(dp) == 0) {
|
|
|
|
int sf_size = sizeof(struct xfs_attr_sf_hdr) +
|
2020-09-07 23:08:50 +08:00
|
|
|
xfs_attr_sf_entsize_byname(args->namelen,
|
2020-02-27 09:30:33 +08:00
|
|
|
args->valuelen);
|
2020-02-27 09:30:29 +08:00
|
|
|
|
|
|
|
error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2021-01-23 08:48:13 +08:00
|
|
|
if (!local)
|
|
|
|
rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen);
|
2020-02-27 09:30:29 +08:00
|
|
|
} else {
|
|
|
|
XFS_STATS_INC(mp, xs_attr_remove);
|
2021-01-23 08:48:13 +08:00
|
|
|
rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
|
2020-02-27 09:30:29 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2022-05-11 15:01:13 +08:00
|
|
|
if (delayed) {
|
|
|
|
error = xfs_attr_use_log_assist(mp);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Root fork attributes can use reserved data blocks for this
|
|
|
|
* operation if necessary
|
|
|
|
*/
|
2022-05-11 15:01:23 +08:00
|
|
|
xfs_init_attr_trans(args, &tres, &total);
|
2021-01-27 08:44:07 +08:00
|
|
|
error = xfs_trans_alloc_inode(dp, &tres, total, 0, rsvd, &args->trans);
|
2016-04-06 07:19:55 +08:00
|
|
|
if (error)
|
2022-05-11 15:01:13 +08:00
|
|
|
goto drop_incompat;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2021-01-23 08:48:13 +08:00
|
|
|
if (args->value || xfs_inode_hasattr(dp)) {
|
|
|
|
error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK,
|
|
|
|
XFS_IEXT_ATTR_MANIP_CNT(rmt_blks));
|
2022-03-09 15:49:36 +08:00
|
|
|
if (error == -EFBIG)
|
|
|
|
error = xfs_iext_count_upgrade(args->trans, dp,
|
|
|
|
XFS_IEXT_ATTR_MANIP_CNT(rmt_blks));
|
2021-01-23 08:48:13 +08:00
|
|
|
if (error)
|
|
|
|
goto out_trans_cancel;
|
|
|
|
}
|
|
|
|
|
2021-08-19 09:46:25 +08:00
|
|
|
error = xfs_attr_lookup(args);
|
2022-05-11 15:05:23 +08:00
|
|
|
switch (error) {
|
|
|
|
case -EEXIST:
|
|
|
|
/* if no value, we are performing a remove operation */
|
|
|
|
if (!args->value) {
|
|
|
|
error = xfs_attr_defer_remove(args);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Pure create fails if the attr already exists */
|
|
|
|
if (args->attr_flags & XATTR_CREATE)
|
2020-07-21 12:47:23 +08:00
|
|
|
goto out_trans_cancel;
|
|
|
|
|
2022-05-11 15:05:23 +08:00
|
|
|
error = xfs_attr_defer_replace(args);
|
|
|
|
break;
|
|
|
|
case -ENOATTR:
|
|
|
|
/* Can't remove what isn't there. */
|
|
|
|
if (!args->value)
|
2020-02-27 09:30:29 +08:00
|
|
|
goto out_trans_cancel;
|
2022-05-11 15:01:13 +08:00
|
|
|
|
2022-05-11 15:05:23 +08:00
|
|
|
/* Pure replace fails if no existing attr to replace. */
|
|
|
|
if (args->attr_flags & XATTR_REPLACE)
|
2020-07-21 12:47:23 +08:00
|
|
|
goto out_trans_cancel;
|
|
|
|
|
2022-05-11 15:05:23 +08:00
|
|
|
error = xfs_attr_defer_add(args);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto out_trans_cancel;
|
2018-10-18 14:21:16 +08:00
|
|
|
}
|
2022-05-11 15:05:23 +08:00
|
|
|
if (error)
|
|
|
|
goto out_trans_cancel;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If this is a synchronous mount, make sure that the
|
|
|
|
* transaction goes to disk before returning to the user.
|
|
|
|
*/
|
2021-08-19 09:46:52 +08:00
|
|
|
if (xfs_has_wsync(mp))
|
2020-02-27 09:30:33 +08:00
|
|
|
xfs_trans_set_sync(args->trans);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2020-02-27 09:30:36 +08:00
|
|
|
if (!(args->op_flags & XFS_DA_OP_NOTIME))
|
2020-02-27 09:30:33 +08:00
|
|
|
xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
|
2010-09-28 10:27:25 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Commit the last in the sequence of transactions.
|
|
|
|
*/
|
2020-02-27 09:30:33 +08:00
|
|
|
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
|
|
|
|
error = xfs_trans_commit(args->trans);
|
2018-10-18 14:21:16 +08:00
|
|
|
out_unlock:
|
2005-04-17 06:20:36 +08:00
|
|
|
xfs_iunlock(dp, XFS_ILOCK_EXCL);
|
2022-05-11 15:01:13 +08:00
|
|
|
drop_incompat:
|
|
|
|
if (delayed)
|
|
|
|
xlog_drop_incompat_feat(mp->m_log);
|
2014-05-13 14:34:14 +08:00
|
|
|
return error;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2018-10-18 14:21:16 +08:00
|
|
|
out_trans_cancel:
|
2020-02-27 09:30:33 +08:00
|
|
|
if (args->trans)
|
|
|
|
xfs_trans_cancel(args->trans);
|
2018-10-18 14:21:16 +08:00
|
|
|
goto out_unlock;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*========================================================================
|
|
|
|
* External routines when attribute list is inside the inode
|
|
|
|
*========================================================================*/
|
|
|
|
|
2020-09-07 23:08:50 +08:00
|
|
|
static inline int xfs_attr_sf_totsize(struct xfs_inode *dp)
|
|
|
|
{
|
|
|
|
struct xfs_attr_shortform *sf;
|
|
|
|
|
|
|
|
sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data;
|
|
|
|
return be16_to_cpu(sf->hdr.totsize);
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Add a name to the shortform attribute list structure
|
|
|
|
* This is the external routine.
|
|
|
|
*/
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
static int
|
|
|
|
xfs_attr_shortform_addname(
|
|
|
|
struct xfs_da_args *args)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
int newsize, forkoff;
|
|
|
|
int error;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2012-03-22 13:15:13 +08:00
|
|
|
trace_xfs_attr_sf_addname(args);
|
|
|
|
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
error = xfs_attr_shortform_lookup(args);
|
|
|
|
switch (error) {
|
|
|
|
case -ENOATTR:
|
|
|
|
if (args->op_flags & XFS_DA_OP_REPLACE)
|
|
|
|
return error;
|
|
|
|
break;
|
|
|
|
case -EEXIST:
|
|
|
|
if (!(args->op_flags & XFS_DA_OP_REPLACE))
|
|
|
|
return error;
|
|
|
|
|
|
|
|
error = xfs_attr_sf_removename(args);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2018-04-18 10:10:15 +08:00
|
|
|
/*
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
* Since we have removed the old attr, clear XFS_DA_OP_REPLACE
|
|
|
|
* so that the new attr doesn't fit in shortform format, the
|
|
|
|
* leaf format add routine won't trip over the attr not being
|
|
|
|
* around.
|
2018-04-18 10:10:15 +08:00
|
|
|
*/
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
args->op_flags &= ~XFS_DA_OP_REPLACE;
|
|
|
|
break;
|
|
|
|
case 0:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return error;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2005-11-02 07:34:53 +08:00
|
|
|
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
|
|
|
|
args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
|
2014-06-25 12:58:08 +08:00
|
|
|
return -ENOSPC;
|
2005-11-02 07:34:53 +08:00
|
|
|
|
2020-09-07 23:08:50 +08:00
|
|
|
newsize = xfs_attr_sf_totsize(args->dp);
|
|
|
|
newsize += xfs_attr_sf_entsize_byname(args->namelen, args->valuelen);
|
2005-11-02 07:34:53 +08:00
|
|
|
|
|
|
|
forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
|
|
|
|
if (!forkoff)
|
2014-06-25 12:58:08 +08:00
|
|
|
return -ENOSPC;
|
2005-11-02 07:34:53 +08:00
|
|
|
|
|
|
|
xfs_attr_shortform_add(args, forkoff);
|
2014-06-22 13:03:54 +08:00
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*========================================================================
|
|
|
|
* External routines when attribute list is one block
|
|
|
|
*========================================================================*/
|
|
|
|
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
/* Save the current remote block info and clear the current pointers. */
|
|
|
|
static void
|
2020-07-21 12:47:29 +08:00
|
|
|
xfs_attr_save_rmt_blk(
|
|
|
|
struct xfs_da_args *args)
|
|
|
|
{
|
|
|
|
args->blkno2 = args->blkno;
|
|
|
|
args->index2 = args->index;
|
|
|
|
args->rmtblkno2 = args->rmtblkno;
|
|
|
|
args->rmtblkcnt2 = args->rmtblkcnt;
|
|
|
|
args->rmtvaluelen2 = args->rmtvaluelen;
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
args->rmtblkno = 0;
|
|
|
|
args->rmtblkcnt = 0;
|
|
|
|
args->rmtvaluelen = 0;
|
2020-07-21 12:47:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Set stored info about a remote block */
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
static void
|
2020-07-21 12:47:29 +08:00
|
|
|
xfs_attr_restore_rmt_blk(
|
|
|
|
struct xfs_da_args *args)
|
|
|
|
{
|
|
|
|
args->blkno = args->blkno2;
|
|
|
|
args->index = args->index2;
|
|
|
|
args->rmtblkno = args->rmtblkno2;
|
|
|
|
args->rmtblkcnt = args->rmtblkcnt2;
|
|
|
|
args->rmtvaluelen = args->rmtvaluelen2;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2020-07-21 12:47:24 +08:00
|
|
|
* Tries to add an attribute to an inode in leaf form
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
2020-07-21 12:47:24 +08:00
|
|
|
* This function is meant to execute as part of a delayed operation and leaves
|
|
|
|
* the transaction handling to the caller. On success the attribute is added
|
|
|
|
* and the inode and transaction are left dirty. If there is not enough space,
|
|
|
|
* the attr data is converted to node format and -ENOSPC is returned. Caller is
|
|
|
|
* responsible for handling the dirty inode and transaction or adding the attr
|
|
|
|
* in node format.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2007-11-23 13:28:09 +08:00
|
|
|
STATIC int
|
2020-07-21 12:47:24 +08:00
|
|
|
xfs_attr_leaf_try_add(
|
|
|
|
struct xfs_da_args *args,
|
|
|
|
struct xfs_buf *bp)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2022-05-11 15:04:23 +08:00
|
|
|
int error;
|
2012-03-22 13:15:13 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2022-05-11 15:04:23 +08:00
|
|
|
* If the caller provided a buffer to us, it is locked and held in
|
|
|
|
* the transaction because it just did a shortform to leaf conversion.
|
|
|
|
* Hence we don't need to read it again. Otherwise read in the leaf
|
|
|
|
* buffer.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2022-05-11 15:04:23 +08:00
|
|
|
if (bp) {
|
|
|
|
xfs_trans_bhold_release(args->trans, bp);
|
|
|
|
} else {
|
|
|
|
error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look up the xattr name to set the insertion point for the new xattr.
|
|
|
|
*/
|
|
|
|
error = xfs_attr3_leaf_lookup_int(bp, args);
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
switch (error) {
|
|
|
|
case -ENOATTR:
|
|
|
|
if (args->op_flags & XFS_DA_OP_REPLACE)
|
|
|
|
goto out_brelse;
|
|
|
|
break;
|
|
|
|
case -EEXIST:
|
|
|
|
if (!(args->op_flags & XFS_DA_OP_REPLACE))
|
2020-02-27 09:30:42 +08:00
|
|
|
goto out_brelse;
|
2012-03-22 13:15:13 +08:00
|
|
|
|
|
|
|
trace_xfs_attr_leaf_replace(args);
|
xfs: remote attribute overwrite causes transaction overrun
Commit e461fcb ("xfs: remote attribute lookups require the value
length") passes the remote attribute length in the xfs_da_args
structure on lookup so that CRC calculations and validity checking
can be performed correctly by related code. This, unfortunately has
the side effect of changing the args->valuelen parameter in cases
where it shouldn't.
That is, when we replace a remote attribute, the incoming
replacement stores the value and length in args->value and
args->valuelen, but then the lookup which finds the existing remote
attribute overwrites args->valuelen with the length of the remote
attribute being replaced. Hence when we go to create the new
attribute, we create it of the size of the existing remote
attribute, not the size it is supposed to be. When the new attribute
is much smaller than the old attribute, this results in a
transaction overrun and an ASSERT() failure on a debug kernel:
XFS: Assertion failed: tp->t_blk_res_used <= tp->t_blk_res, file: fs/xfs/xfs_trans.c, line: 331
Fix this by keeping the remote attribute value length separate to
the attribute value length in the xfs_da_args structure. The enables
us to pass the length of the remote attribute to be removed without
overwriting the new attribute's length.
Also, ensure that when we save remote block contexts for a later
rename we zero the original state variables so that we don't confuse
the state of the attribute to be removes with the state of the new
attribute that we just added. [Spotted by Brain Foster.]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-05-06 05:37:31 +08:00
|
|
|
/*
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
* Save the existing remote attr state so that the current
|
|
|
|
* values reflect the state of the new attribute we are about to
|
xfs: remote attribute overwrite causes transaction overrun
Commit e461fcb ("xfs: remote attribute lookups require the value
length") passes the remote attribute length in the xfs_da_args
structure on lookup so that CRC calculations and validity checking
can be performed correctly by related code. This, unfortunately has
the side effect of changing the args->valuelen parameter in cases
where it shouldn't.
That is, when we replace a remote attribute, the incoming
replacement stores the value and length in args->value and
args->valuelen, but then the lookup which finds the existing remote
attribute overwrites args->valuelen with the length of the remote
attribute being replaced. Hence when we go to create the new
attribute, we create it of the size of the existing remote
attribute, not the size it is supposed to be. When the new attribute
is much smaller than the old attribute, this results in a
transaction overrun and an ASSERT() failure on a debug kernel:
XFS: Assertion failed: tp->t_blk_res_used <= tp->t_blk_res, file: fs/xfs/xfs_trans.c, line: 331
Fix this by keeping the remote attribute value length separate to
the attribute value length in the xfs_da_args structure. The enables
us to pass the length of the remote attribute to be removed without
overwriting the new attribute's length.
Also, ensure that when we save remote block contexts for a later
rename we zero the original state variables so that we don't confuse
the state of the attribute to be removes with the state of the new
attribute that we just added. [Spotted by Brain Foster.]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-05-06 05:37:31 +08:00
|
|
|
* add, not the attribute we just found and will remove later.
|
|
|
|
*/
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
xfs_attr_save_rmt_blk(args);
|
|
|
|
break;
|
|
|
|
case 0:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto out_brelse;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2020-07-21 12:47:31 +08:00
|
|
|
return xfs_attr3_leaf_add(bp, args);
|
|
|
|
|
2020-07-21 12:47:24 +08:00
|
|
|
out_brelse:
|
|
|
|
xfs_trans_brelse(args->trans, bp);
|
2022-05-11 15:04:23 +08:00
|
|
|
return error;
|
2020-07-21 12:47:24 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2020-07-21 12:47:22 +08:00
|
|
|
/*
|
|
|
|
* Return EEXIST if attr is found, or ENOATTR if not
|
|
|
|
*/
|
|
|
|
STATIC int
|
|
|
|
xfs_attr_leaf_hasname(
|
|
|
|
struct xfs_da_args *args,
|
|
|
|
struct xfs_buf **bp)
|
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
error = xfs_attr3_leaf_read(args->trans, args->dp, 0, bp);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
error = xfs_attr3_leaf_lookup_int(*bp, args);
|
|
|
|
if (error != -ENOATTR && error != -EEXIST)
|
|
|
|
xfs_trans_brelse(args->trans, *bp);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Remove a name from the leaf attribute list structure
|
|
|
|
*
|
|
|
|
* This leaf block cannot have a "remote" value, we only call this routine
|
|
|
|
* if bmap_one_block() says there is only one block (ie: no remote blks).
|
|
|
|
*/
|
|
|
|
STATIC int
|
2018-07-12 13:26:11 +08:00
|
|
|
xfs_attr_leaf_removename(
|
|
|
|
struct xfs_da_args *args)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2018-07-12 13:26:11 +08:00
|
|
|
struct xfs_inode *dp;
|
|
|
|
struct xfs_buf *bp;
|
|
|
|
int error, forkoff;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2012-03-22 13:15:13 +08:00
|
|
|
trace_xfs_attr_leaf_removename(args);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Remove the attribute.
|
|
|
|
*/
|
|
|
|
dp = args->dp;
|
|
|
|
|
2020-07-21 12:47:22 +08:00
|
|
|
error = xfs_attr_leaf_hasname(args, &bp);
|
2014-06-25 12:58:08 +08:00
|
|
|
if (error == -ENOATTR) {
|
2012-06-22 16:50:14 +08:00
|
|
|
xfs_trans_brelse(args->trans, bp);
|
xfs: ATTR_REPLACE algorithm with LARP enabled needs rework
We can't use the same algorithm for replacing an existing attribute
when logging attributes. The existing algorithm is essentially:
1. create new attr w/ INCOMPLETE
2. atomically flip INCOMPLETE flags between old + new attribute
3. remove old attr which is marked w/ INCOMPLETE
This algorithm guarantees that we see either the old or new
attribute, and if we fail after the atomic flag flip, we don't have
to recover the removal of the old attr because we never see
INCOMPLETE attributes in lookups.
For logged attributes, however, this does not work. The logged
attribute intents do not track the work that has been done as the
transaction rolls, and hence the only recovery mechanism we have is
"run the replace operation from scratch".
This is further exacerbated by the attempt to avoid needing the
INCOMPLETE flag to create an atomic swap. This means we can create
a second active attribute of the same name before we remove the
original. If we fail at any point after the create but before the
removal has completed, we end up with duplicate attributes in
the attr btree and recovery only tries to replace one of them.
There are several other failure modes where we can leave partially
allocated remote attributes that expose stale data, partially free
remote attributes that enable UAF based stale data exposure, etc.
TO fix this, we need a different algorithm for replace operations
when LARP is enabled. Luckily, it's not that complex if we take the
right first step. That is, the first thing we log is the attri
intent with the new name/value pair and mark the old attr as
INCOMPLETE in the same transaction.
From there, we then remove the old attr and keep relogging the
new name/value in the intent, such that we always know that we have
to create the new attr in recovery. Once the old attr is removed,
we then run a normal ATTR_CREATE operation relogging the intent as
we go. If the new attr is local, then it gets created in a single
atomic transaction that also logs the final intent done. If the new
attr is remote, the we set INCOMPLETE on the new attr while we
allocate and set the remote value, and then we clear the INCOMPLETE
flag at in the last transaction taht logs the final intent done.
If we fail at any point in this algorithm, log recovery will always
see the same state on disk: the new name/value in the intent, and
either an INCOMPLETE attr or no attr in the attr btree. If we find
an INCOMPLETE attr, we run the full replace starting with removing
the INCOMPLETE attr. If we don't find it, then we simply create the
new attr.
Notably, recovery of a failed create that has an INCOMPLETE flag set
is now the same - we start with the lookup of the INCOMPLETE attr,
and if that exists then we do the full replace recovery process,
otherwise we just create the new attr.
Hence changing the way we do the replace operation when LARP is
enabled allows us to use the same log recovery algorithm for both
the ATTR_CREATE and ATTR_REPLACE operations. This is also the same
algorithm we use for runtime ATTR_REPLACE operations (except for the
step setting up the initial conditions).
The result is that:
- ATTR_CREATE uses the same algorithm regardless of whether LARP is
enabled or not
- ATTR_REPLACE with larp=0 is identical to the old algorithm
- ATTR_REPLACE with larp=1 runs an unmodified attr removal algorithm
from the larp=0 code and then runs the unmodified ATTR_CREATE
code.
- log recovery when larp=1 runs the same ATTR_REPLACE algorithm as
it uses at runtime.
Because the state machine is now quite clean, changing the algorithm
is really just a case of changing the initial state and how the
states link together for the ATTR_REPLACE case. Hence it's not a
huge amount of code for what is a fairly substantial rework
of the attr logging and recovery algorithm....
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
if (args->op_flags & XFS_DA_OP_RECOVERY)
|
|
|
|
return 0;
|
2013-04-24 16:58:55 +08:00
|
|
|
return error;
|
2020-07-21 12:47:22 +08:00
|
|
|
} else if (error != -EEXIST)
|
|
|
|
return error;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2013-04-24 16:58:55 +08:00
|
|
|
xfs_attr3_leaf_remove(bp, args);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the result is small enough, shrink it all into the inode.
|
|
|
|
*/
|
2020-07-21 12:47:27 +08:00
|
|
|
forkoff = xfs_attr_shortform_allfit(bp, dp);
|
|
|
|
if (forkoff)
|
|
|
|
return xfs_attr3_leaf_to_shortform(bp, args, forkoff);
|
2005-04-17 06:20:36 +08:00
|
|
|
/* bp is gone due to xfs_da_shrink_inode */
|
2020-07-21 12:47:27 +08:00
|
|
|
|
2013-04-24 16:58:55 +08:00
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look up a name in a leaf attribute list structure.
|
|
|
|
*
|
|
|
|
* This leaf block cannot have a "remote" value, we only call this routine
|
|
|
|
* if bmap_one_block() says there is only one block (ie: no remote blks).
|
2019-08-30 00:04:08 +08:00
|
|
|
*
|
|
|
|
* Returns 0 on successful retrieval, otherwise an error.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2005-06-21 13:36:52 +08:00
|
|
|
STATIC int
|
2005-04-17 06:20:36 +08:00
|
|
|
xfs_attr_leaf_get(xfs_da_args_t *args)
|
|
|
|
{
|
2012-06-22 16:50:14 +08:00
|
|
|
struct xfs_buf *bp;
|
2005-04-17 06:20:36 +08:00
|
|
|
int error;
|
|
|
|
|
2012-11-12 19:53:53 +08:00
|
|
|
trace_xfs_attr_leaf_get(args);
|
|
|
|
|
2020-07-21 12:47:22 +08:00
|
|
|
error = xfs_attr_leaf_hasname(args, &bp);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2020-07-21 12:47:22 +08:00
|
|
|
if (error == -ENOATTR) {
|
2012-06-22 16:50:14 +08:00
|
|
|
xfs_trans_brelse(args->trans, bp);
|
2013-04-24 16:58:55 +08:00
|
|
|
return error;
|
2020-07-21 12:47:22 +08:00
|
|
|
} else if (error != -EEXIST)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
2013-04-24 16:58:55 +08:00
|
|
|
error = xfs_attr3_leaf_getvalue(bp, args);
|
2012-06-22 16:50:14 +08:00
|
|
|
xfs_trans_brelse(args->trans, bp);
|
2019-08-30 00:04:09 +08:00
|
|
|
return error;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2022-05-22 13:59:34 +08:00
|
|
|
/* Return EEXIST if attr is found, or ENOATTR if not. */
|
2020-07-21 12:47:22 +08:00
|
|
|
STATIC int
|
2022-05-22 13:59:34 +08:00
|
|
|
xfs_attr_node_lookup(
|
2020-07-21 12:47:22 +08:00
|
|
|
struct xfs_da_args *args,
|
2022-05-22 13:59:34 +08:00
|
|
|
struct xfs_da_state *state)
|
2020-07-21 12:47:22 +08:00
|
|
|
{
|
|
|
|
int retval, error;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search to see if name exists, and get back a pointer to it.
|
|
|
|
*/
|
|
|
|
error = xfs_da3_node_lookup_int(state, &retval);
|
2021-11-25 02:06:02 +08:00
|
|
|
if (error)
|
2022-05-22 13:59:34 +08:00
|
|
|
return error;
|
2021-11-25 02:06:02 +08:00
|
|
|
|
2020-07-21 12:47:22 +08:00
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*========================================================================
|
2014-06-06 13:21:45 +08:00
|
|
|
* External routines when attribute list size > geo->blksize
|
2005-04-17 06:20:36 +08:00
|
|
|
*========================================================================*/
|
|
|
|
|
|
|
|
STATIC int
|
2021-04-13 05:15:31 +08:00
|
|
|
xfs_attr_node_addname_find_attr(
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
struct xfs_attr_item *attr)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
|
|
|
int error;
|
2012-03-22 13:15:13 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Search to see if name already exists, and get back a pointer
|
|
|
|
* to where it should go.
|
|
|
|
*/
|
2022-05-22 13:59:34 +08:00
|
|
|
xfs_attr_item_init_da_state(attr);
|
|
|
|
error = xfs_attr_node_lookup(args, attr->xattri_da_state);
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
switch (error) {
|
|
|
|
case -ENOATTR:
|
|
|
|
if (args->op_flags & XFS_DA_OP_REPLACE)
|
|
|
|
goto error;
|
|
|
|
break;
|
|
|
|
case -EEXIST:
|
|
|
|
if (!(args->op_flags & XFS_DA_OP_REPLACE))
|
2021-04-13 05:15:31 +08:00
|
|
|
goto error;
|
2012-03-22 13:15:13 +08:00
|
|
|
|
xfs: remote attribute overwrite causes transaction overrun
Commit e461fcb ("xfs: remote attribute lookups require the value
length") passes the remote attribute length in the xfs_da_args
structure on lookup so that CRC calculations and validity checking
can be performed correctly by related code. This, unfortunately has
the side effect of changing the args->valuelen parameter in cases
where it shouldn't.
That is, when we replace a remote attribute, the incoming
replacement stores the value and length in args->value and
args->valuelen, but then the lookup which finds the existing remote
attribute overwrites args->valuelen with the length of the remote
attribute being replaced. Hence when we go to create the new
attribute, we create it of the size of the existing remote
attribute, not the size it is supposed to be. When the new attribute
is much smaller than the old attribute, this results in a
transaction overrun and an ASSERT() failure on a debug kernel:
XFS: Assertion failed: tp->t_blk_res_used <= tp->t_blk_res, file: fs/xfs/xfs_trans.c, line: 331
Fix this by keeping the remote attribute value length separate to
the attribute value length in the xfs_da_args structure. The enables
us to pass the length of the remote attribute to be removed without
overwriting the new attribute's length.
Also, ensure that when we save remote block contexts for a later
rename we zero the original state variables so that we don't confuse
the state of the attribute to be removes with the state of the new
attribute that we just added. [Spotted by Brain Foster.]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-05-06 05:37:31 +08:00
|
|
|
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
trace_xfs_attr_node_replace(args);
|
xfs: remote attribute overwrite causes transaction overrun
Commit e461fcb ("xfs: remote attribute lookups require the value
length") passes the remote attribute length in the xfs_da_args
structure on lookup so that CRC calculations and validity checking
can be performed correctly by related code. This, unfortunately has
the side effect of changing the args->valuelen parameter in cases
where it shouldn't.
That is, when we replace a remote attribute, the incoming
replacement stores the value and length in args->value and
args->valuelen, but then the lookup which finds the existing remote
attribute overwrites args->valuelen with the length of the remote
attribute being replaced. Hence when we go to create the new
attribute, we create it of the size of the existing remote
attribute, not the size it is supposed to be. When the new attribute
is much smaller than the old attribute, this results in a
transaction overrun and an ASSERT() failure on a debug kernel:
XFS: Assertion failed: tp->t_blk_res_used <= tp->t_blk_res, file: fs/xfs/xfs_trans.c, line: 331
Fix this by keeping the remote attribute value length separate to
the attribute value length in the xfs_da_args structure. The enables
us to pass the length of the remote attribute to be removed without
overwriting the new attribute's length.
Also, ensure that when we save remote block contexts for a later
rename we zero the original state variables so that we don't confuse
the state of the attribute to be removes with the state of the new
attribute that we just added. [Spotted by Brain Foster.]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-05-06 05:37:31 +08:00
|
|
|
/*
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
* Save the existing remote attr state so that the current
|
|
|
|
* values reflect the state of the new attribute we are about to
|
xfs: remote attribute overwrite causes transaction overrun
Commit e461fcb ("xfs: remote attribute lookups require the value
length") passes the remote attribute length in the xfs_da_args
structure on lookup so that CRC calculations and validity checking
can be performed correctly by related code. This, unfortunately has
the side effect of changing the args->valuelen parameter in cases
where it shouldn't.
That is, when we replace a remote attribute, the incoming
replacement stores the value and length in args->value and
args->valuelen, but then the lookup which finds the existing remote
attribute overwrites args->valuelen with the length of the remote
attribute being replaced. Hence when we go to create the new
attribute, we create it of the size of the existing remote
attribute, not the size it is supposed to be. When the new attribute
is much smaller than the old attribute, this results in a
transaction overrun and an ASSERT() failure on a debug kernel:
XFS: Assertion failed: tp->t_blk_res_used <= tp->t_blk_res, file: fs/xfs/xfs_trans.c, line: 331
Fix this by keeping the remote attribute value length separate to
the attribute value length in the xfs_da_args structure. The enables
us to pass the length of the remote attribute to be removed without
overwriting the new attribute's length.
Also, ensure that when we save remote block contexts for a later
rename we zero the original state variables so that we don't confuse
the state of the attribute to be removes with the state of the new
attribute that we just added. [Spotted by Brain Foster.]
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-05-06 05:37:31 +08:00
|
|
|
* add, not the attribute we just found and will remove later.
|
|
|
|
*/
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
xfs_attr_save_rmt_blk(args);
|
|
|
|
break;
|
|
|
|
case 0:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto error;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2021-04-13 05:15:31 +08:00
|
|
|
return 0;
|
|
|
|
error:
|
2022-05-20 12:41:34 +08:00
|
|
|
if (attr->xattri_da_state) {
|
2022-05-11 15:01:22 +08:00
|
|
|
xfs_da_state_free(attr->xattri_da_state);
|
2022-05-20 12:41:34 +08:00
|
|
|
attr->xattri_da_state = NULL;
|
|
|
|
}
|
xfs: use XFS_DA_OP flags in deferred attr ops
We currently store the high level attr operation in
args->attr_flags. This field contains what the VFS is telling us to
do, but don't necessarily match what we are doing in the low level
modification state machine. e.g. XATTR_REPLACE implies both
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME because it is doing both a
remove and adding a new attr.
However, deep in the individual state machine operations, we check
errors against this high level VFS op flags, not the low level
XFS_DA_OP flags. Indeed, we don't even have a low level flag for
a REMOVE operation, so the only way we know we are doing a remove
is the complete absence of XATTR_REPLACE, XATTR_CREATE,
XFS_DA_OP_ADDNAME and XFS_DA_OP_RENAME. And because there are other
flags in these fields, this is a pain to check if we need to.
As the XFS_DA_OP flags are only needed once the deferred operations
are set up, set these flags appropriately when we set the initial
operation state. We also introduce a XFS_DA_OP_REMOVE flag to make
it easy to know that we are doing a remove operation.
With these, we can remove the use of XATTR_REPLACE and XATTR_CREATE
in low level lookup operations, and manipulate the low level flags
according to the low level context that is operating. e.g. log
recovery does not have a VFS xattr operation state to copy into
args->attr_flags, and the low level state machine ops we do for
recovery do not match the high level VFS operations that were in
progress when the system failed...
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:56 +08:00
|
|
|
return error;
|
2021-04-13 05:15:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add a name to a Btree-format attribute list.
|
|
|
|
*
|
|
|
|
* This will involve walking down the Btree, and may involve splitting
|
|
|
|
* leaf nodes and even splitting intermediate nodes up to and including
|
|
|
|
* the root node (a special case of an intermediate node).
|
|
|
|
*/
|
xfs: separate out initial attr_set states
We current use XFS_DAS_UNINIT for several steps in the attr_set
state machine. We use it for setting shortform xattrs, converting
from shortform to leaf, leaf add, leaf-to-node and leaf add. All of
these things are essentially known before we start the state machine
iterating, so we really should separate them out:
XFS_DAS_SF_ADD:
- tries to do a shortform add
- on success -> done
- on ENOSPC converts to leaf, -> XFS_DAS_LEAF_ADD
- on error, dies.
XFS_DAS_LEAF_ADD:
- tries to do leaf add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_LBLK
- on ENOSPC converts to node, -> XFS_DAS_NODE_ADD
- on error, dies
XFS_DAS_NODE_ADD:
- tries to do node add
- on success:
- inline attr -> done
- remote xattr || REPLACE -> XFS_DAS_FOUND_NBLK
- on error, dies
This makes it easier to understand how the state machine starts
up and sets us up on the path to further state machine
simplifications.
This also converts the DAS state tracepoints to use strings rather
than numbers, as converting between enums and numbers requires
manual counting rather than just reading the name.
This also introduces a XFS_DAS_DONE state so that we can trace
successful operation completions easily.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Allison Henderson<allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2022-05-12 13:12:52 +08:00
|
|
|
static int
|
|
|
|
xfs_attr_node_try_addname(
|
2022-05-11 15:01:22 +08:00
|
|
|
struct xfs_attr_item *attr)
|
2021-04-13 05:15:31 +08:00
|
|
|
{
|
2022-05-11 15:01:22 +08:00
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
|
|
|
struct xfs_da_state *state = attr->xattri_da_state;
|
2021-05-22 06:48:13 +08:00
|
|
|
struct xfs_da_state_blk *blk;
|
|
|
|
int error;
|
2021-04-13 05:15:31 +08:00
|
|
|
|
|
|
|
trace_xfs_attr_node_addname(args);
|
|
|
|
|
|
|
|
blk = &state->path.blk[state->path.active-1];
|
|
|
|
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
|
|
|
|
|
2021-04-27 07:50:26 +08:00
|
|
|
error = xfs_attr3_leaf_add(blk->bp, state->args);
|
|
|
|
if (error == -ENOSPC) {
|
2005-04-17 06:20:36 +08:00
|
|
|
if (state->path.active == 1) {
|
|
|
|
/*
|
|
|
|
* Its really a single leaf node, but it had
|
|
|
|
* out-of-line values so it looked like it *might*
|
2022-05-12 13:12:55 +08:00
|
|
|
* have been a b-tree. Let the caller deal with this.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2022-05-12 13:12:55 +08:00
|
|
|
goto out;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Split as many Btree elements as required.
|
|
|
|
* This code tracks the new and old attr's location
|
|
|
|
* in the index/blkno/rmtblkno/rmtblkcnt fields and
|
|
|
|
* in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
|
|
|
|
*/
|
2013-04-24 16:58:02 +08:00
|
|
|
error = xfs_da3_split(state);
|
2017-08-29 01:21:04 +08:00
|
|
|
if (error)
|
2018-09-29 11:41:58 +08:00
|
|
|
goto out;
|
2005-04-17 06:20:36 +08:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Addition succeeded, update Btree hashvals.
|
|
|
|
*/
|
2013-04-24 16:58:02 +08:00
|
|
|
xfs_da3_fixhashpath(state, &state->path);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2021-02-18 16:24:24 +08:00
|
|
|
out:
|
2022-05-12 13:12:55 +08:00
|
|
|
xfs_da_state_free(state);
|
2022-05-20 12:41:34 +08:00
|
|
|
attr->xattri_da_state = NULL;
|
2021-04-27 07:50:26 +08:00
|
|
|
return error;
|
2021-02-18 16:24:24 +08:00
|
|
|
}
|
|
|
|
|
2022-05-12 13:12:56 +08:00
|
|
|
static int
|
|
|
|
xfs_attr_node_removename(
|
|
|
|
struct xfs_da_args *args,
|
|
|
|
struct xfs_da_state *state)
|
|
|
|
{
|
|
|
|
struct xfs_da_state_blk *blk;
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove the name and update the hashvals in the tree.
|
|
|
|
*/
|
|
|
|
blk = &state->path.blk[state->path.active-1];
|
|
|
|
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
|
|
|
|
retval = xfs_attr3_leaf_remove(blk->bp, args);
|
|
|
|
xfs_da3_fixhashpath(state, &state->path);
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
}
|
2021-02-18 16:24:24 +08:00
|
|
|
|
2022-05-12 13:12:55 +08:00
|
|
|
static int
|
|
|
|
xfs_attr_node_remove_attr(
|
2022-05-11 15:01:22 +08:00
|
|
|
struct xfs_attr_item *attr)
|
2021-02-18 16:24:24 +08:00
|
|
|
{
|
2022-05-11 15:01:22 +08:00
|
|
|
struct xfs_da_args *args = attr->xattri_da_args;
|
2021-02-18 16:24:24 +08:00
|
|
|
struct xfs_da_state *state = NULL;
|
|
|
|
int retval = 0;
|
|
|
|
int error = 0;
|
|
|
|
|
2020-07-21 12:47:31 +08:00
|
|
|
/*
|
2022-05-12 13:12:56 +08:00
|
|
|
* The attr we are removing has already been marked incomplete, so
|
|
|
|
* we need to set the filter appropriately to re-find the "old"
|
|
|
|
* attribute entry after any split ops.
|
2020-07-21 12:47:31 +08:00
|
|
|
*/
|
2022-05-12 13:12:56 +08:00
|
|
|
args->attr_filter |= XFS_ATTR_INCOMPLETE;
|
2020-07-21 12:47:31 +08:00
|
|
|
state = xfs_da_state_alloc(args);
|
|
|
|
state->inleaf = 0;
|
|
|
|
error = xfs_da3_node_lookup_int(state, &retval);
|
|
|
|
if (error)
|
|
|
|
goto out;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2021-05-29 06:15:05 +08:00
|
|
|
error = xfs_attr_node_removename(args, state);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2020-07-21 12:47:31 +08:00
|
|
|
/*
|
|
|
|
* Check to see if the tree needs to be collapsed.
|
|
|
|
*/
|
|
|
|
if (retval && (state->path.active > 1)) {
|
|
|
|
error = xfs_da3_join(state);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (error)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
retval = error = 0;
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (state)
|
|
|
|
xfs_da_state_free(state);
|
|
|
|
if (error)
|
2014-06-22 13:03:54 +08:00
|
|
|
return error;
|
|
|
|
return retval;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2019-08-30 00:04:08 +08:00
|
|
|
* Retrieve the attribute data from a node attribute list.
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
* This routine gets called for any attribute fork that has more than one
|
|
|
|
* block, ie: both true Btree attr lists and for single-leaf-blocks with
|
|
|
|
* "remote" values taking up more blocks.
|
2019-08-30 00:04:08 +08:00
|
|
|
*
|
|
|
|
* Returns 0 on successful retrieval, otherwise an error.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2005-06-21 13:36:52 +08:00
|
|
|
STATIC int
|
2020-07-21 12:47:22 +08:00
|
|
|
xfs_attr_node_get(
|
|
|
|
struct xfs_da_args *args)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2022-05-22 13:59:34 +08:00
|
|
|
struct xfs_da_state *state;
|
2020-07-21 12:47:22 +08:00
|
|
|
struct xfs_da_state_blk *blk;
|
|
|
|
int i;
|
|
|
|
int error;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2012-11-12 19:53:53 +08:00
|
|
|
trace_xfs_attr_node_get(args);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Search to see if name exists, and get back a pointer to it.
|
|
|
|
*/
|
2022-05-22 13:59:34 +08:00
|
|
|
state = xfs_da_state_alloc(args);
|
|
|
|
error = xfs_attr_node_lookup(args, state);
|
2020-07-21 12:47:22 +08:00
|
|
|
if (error != -EEXIST)
|
2019-08-30 00:04:08 +08:00
|
|
|
goto out_release;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the value, local or "remote"
|
|
|
|
*/
|
|
|
|
blk = &state->path.blk[state->path.active - 1];
|
2020-07-21 12:47:22 +08:00
|
|
|
error = xfs_attr3_leaf_getvalue(blk->bp, args);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If not in a transaction, we have to release all the buffers.
|
|
|
|
*/
|
2019-08-30 00:04:08 +08:00
|
|
|
out_release:
|
2020-07-21 12:47:22 +08:00
|
|
|
for (i = 0; state != NULL && i < state->path.active; i++) {
|
2012-06-22 16:50:14 +08:00
|
|
|
xfs_trans_brelse(args->trans, state->path.blk[i].bp);
|
2005-04-17 06:20:36 +08:00
|
|
|
state->path.blk[i].bp = NULL;
|
|
|
|
}
|
|
|
|
|
2022-05-22 13:59:34 +08:00
|
|
|
xfs_da_state_free(state);
|
2020-07-21 12:47:22 +08:00
|
|
|
return error;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2019-02-02 01:08:54 +08:00
|
|
|
|
|
|
|
/* Returns true if the attribute entry name is valid. */
|
|
|
|
bool
|
|
|
|
xfs_attr_namecheck(
|
|
|
|
const void *name,
|
|
|
|
size_t length)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* MAXNAMELEN includes the trailing null, but (name/length) leave it
|
|
|
|
* out, so use >= for the length check.
|
|
|
|
*/
|
|
|
|
if (length >= MAXNAMELEN)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* There shouldn't be any nulls here */
|
|
|
|
return !memchr(name, 0, length);
|
|
|
|
}
|
2022-05-22 13:59:48 +08:00
|
|
|
|
|
|
|
int __init
|
|
|
|
xfs_attr_intent_init_cache(void)
|
|
|
|
{
|
|
|
|
xfs_attr_intent_cache = kmem_cache_create("xfs_attr_item",
|
|
|
|
sizeof(struct xfs_attr_item),
|
|
|
|
0, 0, NULL);
|
|
|
|
|
|
|
|
return xfs_attr_intent_cache != NULL ? 0 : -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
xfs_attr_intent_destroy_cache(void)
|
|
|
|
{
|
|
|
|
kmem_cache_destroy(xfs_attr_intent_cache);
|
|
|
|
xfs_attr_intent_cache = NULL;
|
|
|
|
}
|