xfs: exact busy extent tracking
Update the extent tree in case we have to reuse a busy extent, so that it always is kept uptodate. This is done by replacing the busy list searches with a new xfs_alloc_busy_reuse helper, which updates the busy extent tree in case of a reuse. This allows us to allow reusing metadata extents unconditionally, and thus avoid log forces especially for allocation btree blocks. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
This commit is contained in:
parent
e26f0501cf
commit
97d3ac75e5
|
@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
#define XFS_BUSY_SYNC \
|
DECLARE_EVENT_CLASS(xfs_busy_class,
|
||||||
{ 0, "async" }, \
|
|
||||||
{ 1, "sync" }
|
|
||||||
|
|
||||||
TRACE_EVENT(xfs_alloc_busy,
|
|
||||||
TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
|
|
||||||
xfs_agblock_t agbno, xfs_extlen_t len, int sync),
|
|
||||||
TP_ARGS(trans, agno, agbno, len, sync),
|
|
||||||
TP_STRUCT__entry(
|
|
||||||
__field(dev_t, dev)
|
|
||||||
__field(struct xfs_trans *, tp)
|
|
||||||
__field(int, tid)
|
|
||||||
__field(xfs_agnumber_t, agno)
|
|
||||||
__field(xfs_agblock_t, agbno)
|
|
||||||
__field(xfs_extlen_t, len)
|
|
||||||
__field(int, sync)
|
|
||||||
),
|
|
||||||
TP_fast_assign(
|
|
||||||
__entry->dev = trans->t_mountp->m_super->s_dev;
|
|
||||||
__entry->tp = trans;
|
|
||||||
__entry->tid = trans->t_ticket->t_tid;
|
|
||||||
__entry->agno = agno;
|
|
||||||
__entry->agbno = agbno;
|
|
||||||
__entry->len = len;
|
|
||||||
__entry->sync = sync;
|
|
||||||
),
|
|
||||||
TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
|
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
|
||||||
__entry->tp,
|
|
||||||
__entry->tid,
|
|
||||||
__entry->agno,
|
|
||||||
__entry->agbno,
|
|
||||||
__entry->len,
|
|
||||||
__print_symbolic(__entry->sync, XFS_BUSY_SYNC))
|
|
||||||
|
|
||||||
);
|
|
||||||
|
|
||||||
TRACE_EVENT(xfs_alloc_unbusy,
|
|
||||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||||
xfs_agblock_t agbno, xfs_extlen_t len),
|
xfs_agblock_t agbno, xfs_extlen_t len),
|
||||||
TP_ARGS(mp, agno, agbno, len),
|
TP_ARGS(mp, agno, agbno, len),
|
||||||
|
@ -1210,36 +1173,16 @@ TRACE_EVENT(xfs_alloc_unbusy,
|
||||||
__entry->agbno,
|
__entry->agbno,
|
||||||
__entry->len)
|
__entry->len)
|
||||||
);
|
);
|
||||||
|
#define DEFINE_BUSY_EVENT(name) \
|
||||||
#define XFS_BUSY_STATES \
|
DEFINE_EVENT(xfs_busy_class, name, \
|
||||||
{ 0, "missing" }, \
|
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
|
||||||
{ 1, "found" }
|
xfs_agblock_t agbno, xfs_extlen_t len), \
|
||||||
|
TP_ARGS(mp, agno, agbno, len))
|
||||||
TRACE_EVENT(xfs_alloc_busysearch,
|
DEFINE_BUSY_EVENT(xfs_alloc_busy);
|
||||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
|
||||||
xfs_agblock_t agbno, xfs_extlen_t len, int found),
|
DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
|
||||||
TP_ARGS(mp, agno, agbno, len, found),
|
DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
|
||||||
TP_STRUCT__entry(
|
DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
|
||||||
__field(dev_t, dev)
|
|
||||||
__field(xfs_agnumber_t, agno)
|
|
||||||
__field(xfs_agblock_t, agbno)
|
|
||||||
__field(xfs_extlen_t, len)
|
|
||||||
__field(int, found)
|
|
||||||
),
|
|
||||||
TP_fast_assign(
|
|
||||||
__entry->dev = mp->m_super->s_dev;
|
|
||||||
__entry->agno = agno;
|
|
||||||
__entry->agbno = agbno;
|
|
||||||
__entry->len = len;
|
|
||||||
__entry->found = found;
|
|
||||||
),
|
|
||||||
TP_printk("dev %d:%d agno %u agbno %u len %u %s",
|
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
|
||||||
__entry->agno,
|
|
||||||
__entry->agbno,
|
|
||||||
__entry->len,
|
|
||||||
__print_symbolic(__entry->found, XFS_BUSY_STATES))
|
|
||||||
);
|
|
||||||
|
|
||||||
TRACE_EVENT(xfs_alloc_busy_trim,
|
TRACE_EVENT(xfs_alloc_busy_trim,
|
||||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||||
|
|
|
@ -187,7 +187,6 @@ struct xfs_busy_extent {
|
||||||
xfs_agnumber_t agno;
|
xfs_agnumber_t agno;
|
||||||
xfs_agblock_t bno;
|
xfs_agblock_t bno;
|
||||||
xfs_extlen_t length;
|
xfs_extlen_t length;
|
||||||
xlog_tid_t tid; /* transaction that created this */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1396,8 +1396,9 @@ xfs_alloc_ag_vextent_small(
|
||||||
if (error)
|
if (error)
|
||||||
goto error0;
|
goto error0;
|
||||||
if (fbno != NULLAGBLOCK) {
|
if (fbno != NULLAGBLOCK) {
|
||||||
if (xfs_alloc_busy_search(args->mp, args->agno, fbno, 1))
|
xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
|
||||||
xfs_trans_set_sync(args->tp);
|
args->userdata);
|
||||||
|
|
||||||
if (args->userdata) {
|
if (args->userdata) {
|
||||||
xfs_buf_t *bp;
|
xfs_buf_t *bp;
|
||||||
|
|
||||||
|
@ -2475,100 +2476,6 @@ error0:
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* AG Busy list management
|
|
||||||
* The busy list contains block ranges that have been freed but whose
|
|
||||||
* transactions have not yet hit disk. If any block listed in a busy
|
|
||||||
* list is reused, the transaction that freed it must be forced to disk
|
|
||||||
* before continuing to use the block.
|
|
||||||
*
|
|
||||||
* xfs_alloc_busy_insert - add to the per-ag busy list
|
|
||||||
* xfs_alloc_busy_clear - remove an item from the per-ag busy list
|
|
||||||
* xfs_alloc_busy_search - search for a busy extent
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Insert a new extent into the busy tree.
|
|
||||||
*
|
|
||||||
* The busy extent tree is indexed by the start block of the busy extent.
|
|
||||||
* there can be multiple overlapping ranges in the busy extent tree but only
|
|
||||||
* ever one entry at a given start block. The reason for this is that
|
|
||||||
* multi-block extents can be freed, then smaller chunks of that extent
|
|
||||||
* allocated and freed again before the first transaction commit is on disk.
|
|
||||||
* If the exact same start block is freed a second time, we have to wait for
|
|
||||||
* that busy extent to pass out of the tree before the new extent is inserted.
|
|
||||||
* There are two main cases we have to handle here.
|
|
||||||
*
|
|
||||||
* The first case is a transaction that triggers a "free - allocate - free"
|
|
||||||
* cycle. This can occur during btree manipulations as a btree block is freed
|
|
||||||
* to the freelist, then allocated from the free list, then freed again. In
|
|
||||||
* this case, the second extxpnet free is what triggers the duplicate and as
|
|
||||||
* such the transaction IDs should match. Because the extent was allocated in
|
|
||||||
* this transaction, the transaction must be marked as synchronous. This is
|
|
||||||
* true for all cases where the free/alloc/free occurs in the one transaction,
|
|
||||||
* hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
|
|
||||||
* This serves to catch violations of the second case quite effectively.
|
|
||||||
*
|
|
||||||
* The second case is where the free/alloc/free occur in different
|
|
||||||
* transactions. In this case, the thread freeing the extent the second time
|
|
||||||
* can't mark the extent busy immediately because it is already tracked in a
|
|
||||||
* transaction that may be committing. When the log commit for the existing
|
|
||||||
* busy extent completes, the busy extent will be removed from the tree. If we
|
|
||||||
* allow the second busy insert to continue using that busy extent structure,
|
|
||||||
* it can be freed before this transaction is safely in the log. Hence our
|
|
||||||
* only option in this case is to force the log to remove the existing busy
|
|
||||||
* extent from the list before we insert the new one with the current
|
|
||||||
* transaction ID.
|
|
||||||
*
|
|
||||||
* The problem we are trying to avoid in the free-alloc-free in separate
|
|
||||||
* transactions is most easily described with a timeline:
|
|
||||||
*
|
|
||||||
* Thread 1 Thread 2 Thread 3 xfslogd
|
|
||||||
* xact alloc
|
|
||||||
* free X
|
|
||||||
* mark busy
|
|
||||||
* commit xact
|
|
||||||
* free xact
|
|
||||||
* xact alloc
|
|
||||||
* alloc X
|
|
||||||
* busy search
|
|
||||||
* mark xact sync
|
|
||||||
* commit xact
|
|
||||||
* free xact
|
|
||||||
* force log
|
|
||||||
* checkpoint starts
|
|
||||||
* ....
|
|
||||||
* xact alloc
|
|
||||||
* free X
|
|
||||||
* mark busy
|
|
||||||
* finds match
|
|
||||||
* *** KABOOM! ***
|
|
||||||
* ....
|
|
||||||
* log IO completes
|
|
||||||
* unbusy X
|
|
||||||
* checkpoint completes
|
|
||||||
*
|
|
||||||
* By issuing a log force in thread 3 @ "KABOOM", the thread will block until
|
|
||||||
* the checkpoint completes, and the busy extent it matched will have been
|
|
||||||
* removed from the tree when it is woken. Hence it can then continue safely.
|
|
||||||
*
|
|
||||||
* However, to ensure this matching process is robust, we need to use the
|
|
||||||
* transaction ID for identifying transaction, as delayed logging results in
|
|
||||||
* the busy extent and transaction lifecycles being different. i.e. the busy
|
|
||||||
* extent is active for a lot longer than the transaction. Hence the
|
|
||||||
* transaction structure can be freed and reallocated, then mark the same
|
|
||||||
* extent busy again in the new transaction. In this case the new transaction
|
|
||||||
* will have a different tid but can have the same address, and hence we need
|
|
||||||
* to check against the tid.
|
|
||||||
*
|
|
||||||
* Future: for delayed logging, we could avoid the log force if the extent was
|
|
||||||
* first freed in the current checkpoint sequence. This, however, requires the
|
|
||||||
* ability to pin the current checkpoint in memory until this transaction
|
|
||||||
* commits to ensure that both the original free and the current one combine
|
|
||||||
* logically into the one checkpoint. If the checkpoint sequences are
|
|
||||||
* different, however, we still need to wait on a log force.
|
|
||||||
*/
|
|
||||||
void
|
void
|
||||||
xfs_alloc_busy_insert(
|
xfs_alloc_busy_insert(
|
||||||
struct xfs_trans *tp,
|
struct xfs_trans *tp,
|
||||||
|
@ -2580,9 +2487,7 @@ xfs_alloc_busy_insert(
|
||||||
struct xfs_busy_extent *busyp;
|
struct xfs_busy_extent *busyp;
|
||||||
struct xfs_perag *pag;
|
struct xfs_perag *pag;
|
||||||
struct rb_node **rbp;
|
struct rb_node **rbp;
|
||||||
struct rb_node *parent;
|
struct rb_node *parent = NULL;
|
||||||
int match;
|
|
||||||
|
|
||||||
|
|
||||||
new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
|
new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
|
||||||
if (!new) {
|
if (!new) {
|
||||||
|
@ -2591,7 +2496,7 @@ xfs_alloc_busy_insert(
|
||||||
* block, make this a synchronous transaction to insure that
|
* block, make this a synchronous transaction to insure that
|
||||||
* the block is not reused before this transaction commits.
|
* the block is not reused before this transaction commits.
|
||||||
*/
|
*/
|
||||||
trace_xfs_alloc_busy(tp, agno, bno, len, 1);
|
trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
|
||||||
xfs_trans_set_sync(tp);
|
xfs_trans_set_sync(tp);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2599,66 +2504,28 @@ xfs_alloc_busy_insert(
|
||||||
new->agno = agno;
|
new->agno = agno;
|
||||||
new->bno = bno;
|
new->bno = bno;
|
||||||
new->length = len;
|
new->length = len;
|
||||||
new->tid = xfs_log_get_trans_ident(tp);
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&new->list);
|
INIT_LIST_HEAD(&new->list);
|
||||||
|
|
||||||
/* trace before insert to be able to see failed inserts */
|
/* trace before insert to be able to see failed inserts */
|
||||||
trace_xfs_alloc_busy(tp, agno, bno, len, 0);
|
trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
|
||||||
|
|
||||||
pag = xfs_perag_get(tp->t_mountp, new->agno);
|
pag = xfs_perag_get(tp->t_mountp, new->agno);
|
||||||
restart:
|
|
||||||
spin_lock(&pag->pagb_lock);
|
spin_lock(&pag->pagb_lock);
|
||||||
rbp = &pag->pagb_tree.rb_node;
|
rbp = &pag->pagb_tree.rb_node;
|
||||||
parent = NULL;
|
while (*rbp) {
|
||||||
busyp = NULL;
|
|
||||||
match = 0;
|
|
||||||
while (*rbp && match >= 0) {
|
|
||||||
parent = *rbp;
|
parent = *rbp;
|
||||||
busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
|
busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
|
||||||
|
|
||||||
if (new->bno < busyp->bno) {
|
if (new->bno < busyp->bno) {
|
||||||
/* may overlap, but exact start block is lower */
|
|
||||||
rbp = &(*rbp)->rb_left;
|
rbp = &(*rbp)->rb_left;
|
||||||
if (new->bno + new->length > busyp->bno)
|
ASSERT(new->bno + new->length <= busyp->bno);
|
||||||
match = busyp->tid == new->tid ? 1 : -1;
|
|
||||||
} else if (new->bno > busyp->bno) {
|
} else if (new->bno > busyp->bno) {
|
||||||
/* may overlap, but exact start block is higher */
|
|
||||||
rbp = &(*rbp)->rb_right;
|
rbp = &(*rbp)->rb_right;
|
||||||
if (bno < busyp->bno + busyp->length)
|
ASSERT(bno >= busyp->bno + busyp->length);
|
||||||
match = busyp->tid == new->tid ? 1 : -1;
|
|
||||||
} else {
|
} else {
|
||||||
match = busyp->tid == new->tid ? 1 : -1;
|
ASSERT(0);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (match < 0) {
|
|
||||||
/* overlap marked busy in different transaction */
|
|
||||||
spin_unlock(&pag->pagb_lock);
|
|
||||||
xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
|
|
||||||
goto restart;
|
|
||||||
}
|
|
||||||
if (match > 0) {
|
|
||||||
/*
|
|
||||||
* overlap marked busy in same transaction. Update if exact
|
|
||||||
* start block match, otherwise combine the busy extents into
|
|
||||||
* a single range.
|
|
||||||
*/
|
|
||||||
if (busyp->bno == new->bno) {
|
|
||||||
busyp->length = max(busyp->length, new->length);
|
|
||||||
spin_unlock(&pag->pagb_lock);
|
|
||||||
ASSERT(tp->t_flags & XFS_TRANS_SYNC);
|
|
||||||
xfs_perag_put(pag);
|
|
||||||
kmem_free(new);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
|
||||||
new->length = max(busyp->bno + busyp->length,
|
|
||||||
new->bno + new->length) -
|
|
||||||
min(busyp->bno, new->bno);
|
|
||||||
new->bno = min(busyp->bno, new->bno);
|
|
||||||
} else
|
|
||||||
busyp = NULL;
|
|
||||||
|
|
||||||
rb_link_node(&new->rb_node, parent, rbp);
|
rb_link_node(&new->rb_node, parent, rbp);
|
||||||
rb_insert_color(&new->rb_node, &pag->pagb_tree);
|
rb_insert_color(&new->rb_node, &pag->pagb_tree);
|
||||||
|
@ -2666,7 +2533,6 @@ restart:
|
||||||
list_add(&new->list, &tp->t_busy);
|
list_add(&new->list, &tp->t_busy);
|
||||||
spin_unlock(&pag->pagb_lock);
|
spin_unlock(&pag->pagb_lock);
|
||||||
xfs_perag_put(pag);
|
xfs_perag_put(pag);
|
||||||
kmem_free(busyp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2715,11 +2581,195 @@ xfs_alloc_busy_search(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
spin_unlock(&pag->pagb_lock);
|
spin_unlock(&pag->pagb_lock);
|
||||||
trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
|
|
||||||
xfs_perag_put(pag);
|
xfs_perag_put(pag);
|
||||||
return match;
|
return match;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The found free extent [fbno, fend] overlaps part or all of the given busy
|
||||||
|
* extent. If the overlap covers the beginning, the end, or all of the busy
|
||||||
|
* extent, the overlapping portion can be made unbusy and used for the
|
||||||
|
* allocation. We can't split a busy extent because we can't modify a
|
||||||
|
* transaction/CIL context busy list, but we can update an entries block
|
||||||
|
* number or length.
|
||||||
|
*
|
||||||
|
* Returns true if the extent can safely be reused, or false if the search
|
||||||
|
* needs to be restarted.
|
||||||
|
*/
|
||||||
|
STATIC bool
|
||||||
|
xfs_alloc_busy_update_extent(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
struct xfs_perag *pag,
|
||||||
|
struct xfs_busy_extent *busyp,
|
||||||
|
xfs_agblock_t fbno,
|
||||||
|
xfs_extlen_t flen,
|
||||||
|
bool userdata)
|
||||||
|
{
|
||||||
|
xfs_agblock_t fend = fbno + flen;
|
||||||
|
xfs_agblock_t bbno = busyp->bno;
|
||||||
|
xfs_agblock_t bend = bbno + busyp->length;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there is a busy extent overlapping a user allocation, we have
|
||||||
|
* no choice but to force the log and retry the search.
|
||||||
|
*
|
||||||
|
* Fortunately this does not happen during normal operation, but
|
||||||
|
* only if the filesystem is very low on space and has to dip into
|
||||||
|
* the AGFL for normal allocations.
|
||||||
|
*/
|
||||||
|
if (userdata)
|
||||||
|
goto out_force_log;
|
||||||
|
|
||||||
|
if (bbno < fbno && bend > fend) {
|
||||||
|
/*
|
||||||
|
* Case 1:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +---------+
|
||||||
|
* fbno fend
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We would have to split the busy extent to be able to track
|
||||||
|
* it correct, which we cannot do because we would have to
|
||||||
|
* modify the list of busy extents attached to the transaction
|
||||||
|
* or CIL context, which is immutable.
|
||||||
|
*
|
||||||
|
* Force out the log to clear the busy extent and retry the
|
||||||
|
* search.
|
||||||
|
*/
|
||||||
|
goto out_force_log;
|
||||||
|
} else if (bbno >= fbno && bend <= fend) {
|
||||||
|
/*
|
||||||
|
* Case 2:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +-----------------+
|
||||||
|
* fbno fend
|
||||||
|
*
|
||||||
|
* Case 3:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +--------------------------+
|
||||||
|
* fbno fend
|
||||||
|
*
|
||||||
|
* Case 4:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +--------------------------+
|
||||||
|
* fbno fend
|
||||||
|
*
|
||||||
|
* Case 5:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +-----------------------------------+
|
||||||
|
* fbno fend
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The busy extent is fully covered by the extent we are
|
||||||
|
* allocating, and can simply be removed from the rbtree.
|
||||||
|
* However we cannot remove it from the immutable list
|
||||||
|
* tracking busy extents in the transaction or CIL context,
|
||||||
|
* so set the length to zero to mark it invalid.
|
||||||
|
*
|
||||||
|
* We also need to restart the busy extent search from the
|
||||||
|
* tree root, because erasing the node can rearrange the
|
||||||
|
* tree topology.
|
||||||
|
*/
|
||||||
|
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
||||||
|
busyp->length = 0;
|
||||||
|
return false;
|
||||||
|
} else if (fend < bend) {
|
||||||
|
/*
|
||||||
|
* Case 6:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +---------+
|
||||||
|
* fbno fend
|
||||||
|
*
|
||||||
|
* Case 7:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +------------------+
|
||||||
|
* fbno fend
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
busyp->bno = fend;
|
||||||
|
} else if (bbno < fbno) {
|
||||||
|
/*
|
||||||
|
* Case 8:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +-------------+
|
||||||
|
* fbno fend
|
||||||
|
*
|
||||||
|
* Case 9:
|
||||||
|
* bbno bend
|
||||||
|
* +BBBBBBBBBBBBBBBBB+
|
||||||
|
* +----------------------+
|
||||||
|
* fbno fend
|
||||||
|
*/
|
||||||
|
busyp->length = fbno - busyp->bno;
|
||||||
|
} else {
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
|
||||||
|
return true;
|
||||||
|
|
||||||
|
out_force_log:
|
||||||
|
spin_unlock(&pag->pagb_lock);
|
||||||
|
xfs_log_force(mp, XFS_LOG_SYNC);
|
||||||
|
trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
|
||||||
|
spin_lock(&pag->pagb_lock);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For a given extent [fbno, flen], make sure we can reuse it safely.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
xfs_alloc_busy_reuse(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
xfs_agnumber_t agno,
|
||||||
|
xfs_agblock_t fbno,
|
||||||
|
xfs_extlen_t flen,
|
||||||
|
bool userdata)
|
||||||
|
{
|
||||||
|
struct xfs_perag *pag;
|
||||||
|
struct rb_node *rbp;
|
||||||
|
|
||||||
|
ASSERT(flen > 0);
|
||||||
|
|
||||||
|
pag = xfs_perag_get(mp, agno);
|
||||||
|
spin_lock(&pag->pagb_lock);
|
||||||
|
restart:
|
||||||
|
rbp = pag->pagb_tree.rb_node;
|
||||||
|
while (rbp) {
|
||||||
|
struct xfs_busy_extent *busyp =
|
||||||
|
rb_entry(rbp, struct xfs_busy_extent, rb_node);
|
||||||
|
xfs_agblock_t bbno = busyp->bno;
|
||||||
|
xfs_agblock_t bend = bbno + busyp->length;
|
||||||
|
|
||||||
|
if (fbno + flen <= bbno) {
|
||||||
|
rbp = rbp->rb_left;
|
||||||
|
continue;
|
||||||
|
} else if (fbno >= bend) {
|
||||||
|
rbp = rbp->rb_right;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
|
||||||
|
userdata))
|
||||||
|
goto restart;
|
||||||
|
}
|
||||||
|
spin_unlock(&pag->pagb_lock);
|
||||||
|
xfs_perag_put(pag);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For a given extent [fbno, flen], search the busy extent list to find a
|
* For a given extent [fbno, flen], search the busy extent list to find a
|
||||||
* subset of the extent that is not busy. If *rlen is smaller than
|
* subset of the extent that is not busy. If *rlen is smaller than
|
||||||
|
@ -2734,13 +2784,16 @@ xfs_alloc_busy_trim(
|
||||||
xfs_agblock_t *rbno,
|
xfs_agblock_t *rbno,
|
||||||
xfs_extlen_t *rlen)
|
xfs_extlen_t *rlen)
|
||||||
{
|
{
|
||||||
xfs_agblock_t fbno = bno;
|
xfs_agblock_t fbno;
|
||||||
xfs_extlen_t flen = len;
|
xfs_extlen_t flen;
|
||||||
struct rb_node *rbp;
|
struct rb_node *rbp;
|
||||||
|
|
||||||
ASSERT(flen > 0);
|
ASSERT(len > 0);
|
||||||
|
|
||||||
spin_lock(&args->pag->pagb_lock);
|
spin_lock(&args->pag->pagb_lock);
|
||||||
|
restart:
|
||||||
|
fbno = bno;
|
||||||
|
flen = len;
|
||||||
rbp = args->pag->pagb_tree.rb_node;
|
rbp = args->pag->pagb_tree.rb_node;
|
||||||
while (rbp && flen >= args->minlen) {
|
while (rbp && flen >= args->minlen) {
|
||||||
struct xfs_busy_extent *busyp =
|
struct xfs_busy_extent *busyp =
|
||||||
|
@ -2757,6 +2810,18 @@ xfs_alloc_busy_trim(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this is a metadata allocation, try to reuse the busy
|
||||||
|
* extent instead of trimming the allocation.
|
||||||
|
*/
|
||||||
|
if (!args->userdata) {
|
||||||
|
if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
|
||||||
|
busyp, fbno, flen,
|
||||||
|
false))
|
||||||
|
goto restart;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (bbno <= fbno) {
|
if (bbno <= fbno) {
|
||||||
/* start overlap */
|
/* start overlap */
|
||||||
|
|
||||||
|
@ -2906,17 +2971,15 @@ xfs_alloc_busy_clear(
|
||||||
{
|
{
|
||||||
struct xfs_perag *pag;
|
struct xfs_perag *pag;
|
||||||
|
|
||||||
trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
|
|
||||||
busyp->length);
|
|
||||||
|
|
||||||
ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
|
|
||||||
busyp->length) == 1);
|
|
||||||
|
|
||||||
list_del_init(&busyp->list);
|
list_del_init(&busyp->list);
|
||||||
|
|
||||||
pag = xfs_perag_get(mp, busyp->agno);
|
pag = xfs_perag_get(mp, busyp->agno);
|
||||||
spin_lock(&pag->pagb_lock);
|
spin_lock(&pag->pagb_lock);
|
||||||
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
if (busyp->length) {
|
||||||
|
trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
|
||||||
|
busyp->length);
|
||||||
|
rb_erase(&busyp->rb_node, &pag->pagb_tree);
|
||||||
|
}
|
||||||
spin_unlock(&pag->pagb_lock);
|
spin_unlock(&pag->pagb_lock);
|
||||||
xfs_perag_put(pag);
|
xfs_perag_put(pag);
|
||||||
|
|
||||||
|
|
|
@ -145,6 +145,10 @@ xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
|
||||||
int
|
int
|
||||||
xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
|
xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||||
xfs_agblock_t bno, xfs_extlen_t len);
|
xfs_agblock_t bno, xfs_extlen_t len);
|
||||||
|
|
||||||
|
void
|
||||||
|
xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||||
|
xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -94,8 +94,8 @@ xfs_allocbt_alloc_block(
|
||||||
*stat = 0;
|
*stat = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (xfs_alloc_busy_search(cur->bc_mp, cur->bc_private.a.agno, bno, 1))
|
|
||||||
xfs_trans_set_sync(cur->bc_tp);
|
xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
|
||||||
|
|
||||||
xfs_trans_agbtree_delta(cur->bc_tp, 1);
|
xfs_trans_agbtree_delta(cur->bc_tp, 1);
|
||||||
new->s = cpu_to_be32(bno);
|
new->s = cpu_to_be32(bno);
|
||||||
|
@ -120,17 +120,6 @@ xfs_allocbt_free_block(
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
/*
|
|
||||||
* Since blocks move to the free list without the coordination used in
|
|
||||||
* xfs_bmap_finish, we can't allow block to be available for
|
|
||||||
* reallocation and non-transaction writing (user data) until we know
|
|
||||||
* that the transaction that moved it to the free list is permanently
|
|
||||||
* on disk. We track the blocks by declaring these blocks as "busy";
|
|
||||||
* the busy list is maintained on a per-ag basis and each transaction
|
|
||||||
* records which entries should be removed when the iclog commits to
|
|
||||||
* disk. If a busy block is allocated, the iclog is pushed up to the
|
|
||||||
* LSN that freed the block.
|
|
||||||
*/
|
|
||||||
xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
|
xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
|
||||||
xfs_trans_agbtree_delta(cur->bc_tp, -1);
|
xfs_trans_agbtree_delta(cur->bc_tp, -1);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -3248,13 +3248,6 @@ xfs_log_ticket_get(
|
||||||
return ticket;
|
return ticket;
|
||||||
}
|
}
|
||||||
|
|
||||||
xlog_tid_t
|
|
||||||
xfs_log_get_trans_ident(
|
|
||||||
struct xfs_trans *tp)
|
|
||||||
{
|
|
||||||
return tp->t_ticket->t_tid;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate and initialise a new log ticket.
|
* Allocate and initialise a new log ticket.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *);
|
||||||
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
|
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
|
||||||
void xfs_log_ticket_put(struct xlog_ticket *ticket);
|
void xfs_log_ticket_put(struct xlog_ticket *ticket);
|
||||||
|
|
||||||
xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
|
|
||||||
|
|
||||||
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
|
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
|
||||||
struct xfs_log_vec *log_vector,
|
struct xfs_log_vec *log_vector,
|
||||||
xfs_lsn_t *commit_lsn, int flags);
|
xfs_lsn_t *commit_lsn, int flags);
|
||||||
|
|
|
@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
|
||||||
shutdown */
|
shutdown */
|
||||||
#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
|
#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
|
||||||
|
|
||||||
|
typedef __uint32_t xlog_tid_t;
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
/*
|
/*
|
||||||
* Below are states for covering allocation transactions.
|
* Below are states for covering allocation transactions.
|
||||||
|
|
|
@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
|
||||||
typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
|
typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
|
||||||
typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
|
typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
|
||||||
|
|
||||||
typedef __uint32_t xlog_tid_t; /* transaction ID type */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These types are 64 bits on disk but are either 32 or 64 bits in memory.
|
* These types are 64 bits on disk but are either 32 or 64 bits in memory.
|
||||||
* Disk based types:
|
* Disk based types:
|
||||||
|
|
Loading…
Reference in New Issue