2019-05-31 16:09:56 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2006-01-17 00:50:04 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
2006-05-19 03:09:15 +08:00
|
|
|
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
|
2006-01-17 00:50:04 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __LOG_DOT_H__
|
|
|
|
#define __LOG_DOT_H__
|
|
|
|
|
2006-09-05 22:39:21 +08:00
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/spinlock.h>
|
2011-04-18 21:18:09 +08:00
|
|
|
#include <linux/writeback.h>
|
2006-09-05 22:39:21 +08:00
|
|
|
#include "incore.h"
|
2018-06-04 20:50:16 +08:00
|
|
|
#include "inode.h"
|
2006-09-05 22:39:21 +08:00
|
|
|
|
gfs2: Rework the log space allocation logic
The current log space allocation logic is hard to understand or extend.
The principle it that when the log is flushed, we may or may not have a
transaction active that has space allocated in the log. To deal with
that, we set aside a magical number of blocks to be used in case we
don't have an active transaction. It isn't clear that the pool will
always be big enough. In addition, we can't return unused log space at
the end of a transaction, so the number of blocks allocated must exactly
match the number of blocks used.
Simplify this as follows:
* When transactions are allocated or merged, always reserve enough
blocks to flush the transaction (err on the safe side).
* In gfs2_log_flush, return any allocated blocks that haven't been used.
* Maintain a pool of spare blocks big enough to do one log flush, as
before.
* In gfs2_log_flush, when we have no active transaction, allocate a
suitable number of blocks. For that, use the spare pool when
called from logd, and leave the pool alone otherwise. This means
that when the log is almost full, logd will still be able to do one
more log flush, which will result in more log space becoming
available.
This will make the log space allocator code easier to work with in
the future.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2020-12-10 19:49:56 +08:00
|
|
|
/*
|
|
|
|
* The minimum amount of log space required for a log flush is one block for
|
|
|
|
* revokes and one block for the log header. Log flushes other than
|
|
|
|
* GFS2_LOG_HEAD_FLUSH_NORMAL may write one or two more log headers.
|
|
|
|
*/
|
|
|
|
#define GFS2_LOG_FLUSH_MIN_BLOCKS 4
|
|
|
|
|
2006-01-17 00:50:04 +08:00
|
|
|
/**
|
|
|
|
* gfs2_log_lock - acquire the right to mess with the log manager
|
|
|
|
* @sdp: the filesystem
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
|
2008-05-30 09:27:51 +08:00
|
|
|
__acquires(&sdp->sd_log_lock)
|
2006-01-17 00:50:04 +08:00
|
|
|
{
|
|
|
|
spin_lock(&sdp->sd_log_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* gfs2_log_unlock - release the right to mess with the log manager
|
|
|
|
* @sdp: the filesystem
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
|
2008-05-30 09:27:51 +08:00
|
|
|
__releases(&sdp->sd_log_lock)
|
2006-01-17 00:50:04 +08:00
|
|
|
{
|
|
|
|
spin_unlock(&sdp->sd_log_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
|
|
|
|
unsigned int value)
|
|
|
|
{
|
|
|
|
if (++value == sdp->sd_jdesc->jd_blocks) {
|
|
|
|
value = 0;
|
|
|
|
}
|
2020-12-19 14:15:17 +08:00
|
|
|
sdp->sd_log_tail = value;
|
|
|
|
sdp->sd_log_flush_tail = value;
|
|
|
|
sdp->sd_log_head = value;
|
2006-01-17 00:50:04 +08:00
|
|
|
}
|
|
|
|
|
2013-01-28 17:30:07 +08:00
|
|
|
static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
|
|
|
|
{
|
2018-10-13 02:07:27 +08:00
|
|
|
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
|
2013-01-28 17:30:07 +08:00
|
|
|
|
2018-10-13 02:07:27 +08:00
|
|
|
if (gfs2_is_jdata(ip) || !gfs2_is_ordered(sdp))
|
2018-06-04 20:50:16 +08:00
|
|
|
return;
|
|
|
|
|
2020-06-17 20:47:34 +08:00
|
|
|
if (list_empty(&ip->i_ordered)) {
|
2013-01-28 17:30:07 +08:00
|
|
|
spin_lock(&sdp->sd_ordered_lock);
|
2020-06-17 20:47:34 +08:00
|
|
|
if (list_empty(&ip->i_ordered))
|
2019-04-05 19:16:14 +08:00
|
|
|
list_add(&ip->i_ordered, &sdp->sd_log_ordered);
|
2013-01-28 17:30:07 +08:00
|
|
|
spin_unlock(&sdp->sd_ordered_lock);
|
|
|
|
}
|
|
|
|
}
|
2019-12-13 22:10:51 +08:00
|
|
|
|
2013-01-28 17:30:07 +08:00
|
|
|
extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
|
2019-12-13 22:10:51 +08:00
|
|
|
extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
|
gfs2: Wipe jdata and ail1 in gfs2_journal_wipe, formerly gfs2_meta_wipe
Before this patch, when blocks were freed, it called gfs2_meta_wipe to
take the metadata out of the pending journal blocks. It did this mostly
by calling another function called gfs2_remove_from_journal. This is
shortsighted because it does not do anything with jdata blocks which
may also be in the journal.
This patch expands the function so that it wipes out jdata blocks from
the journal as well, and it wipes it from the ail1 list if it hasn't
been written back yet. Since it now processes jdata blocks as well,
the function has been renamed from gfs2_meta_wipe to gfs2_journal_wipe.
New function gfs2_ail1_wipe wants a static view of the ail list, so it
locks the sd_ail_lock when removing items. To accomplish this, function
gfs2_remove_from_journal no longer locks the sd_ail_lock, and it's now
the caller's responsibility to do so.
I was going to make sd_ail_lock locking conditional, but the practice is
generally frowned upon. For details, see: https://lwn.net/Articles/109066/
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2020-07-22 23:27:50 +08:00
|
|
|
extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
|
2020-12-13 16:21:34 +08:00
|
|
|
extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
|
gfs2: Per-revoke accounting in transactions
In the log, revokes are stored as a revoke descriptor (struct
gfs2_log_descriptor), followed by zero or more additional revoke blocks
(struct gfs2_meta_header). On filesystems with a blocksize of 4k, the
revoke descriptor contains up to 503 revokes, and the metadata blocks
contain up to 509 revokes each. We've so far been reserving space for
revokes in transactions in block granularity, so a lot more space than
necessary was being allocated and then released again.
This patch switches to assigning revokes to transactions individually
instead. Initially, space for the revoke descriptor is reserved and
handed out to transactions. When more revokes than that are reserved,
additional revoke blocks are added. When the log is flushed, the space
for the additional revoke blocks is released, but we keep the space for
the revoke descriptor block allocated.
Transactions may still reserve more revokes than they will actually need
in the end, but now we won't overshoot the target as much, and by only
returning the space for excess revokes at log flush time, we further
reduce the amount of contention between processes.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2020-12-17 23:14:30 +08:00
|
|
|
extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
|
GFS2: remove transaction glock
GFS2 has a transaction glock, which must be grabbed for every
transaction, whose purpose is to deal with freezing the filesystem.
Aside from this involving a large amount of locking, it is very easy to
make the current fsfreeze code hang on unfreezing.
This patch rewrites how gfs2 handles freezing the filesystem. The
transaction glock is removed. In it's place is a freeze glock, which is
cached (but not held) in a shared state by every node in the cluster
when the filesystem is mounted. This lock only needs to be grabbed on
freezing, and actions which need to be safe from freezing, like
recovery.
When a node wants to freeze the filesystem, it grabs this glock
exclusively. When the freeze glock state changes on the nodes (either
from shared to unlocked, or shared to exclusive), the filesystem does a
special log flush. gfs2_log_flush() does all the work for flushing out
the and shutting down the incore log, and then it tries to grab the
freeze glock in a shared state again. Since the filesystem is stuck in
gfs2_log_flush, no new transaction can start, and nothing can be written
to disk. Unfreezing the filesytem simply involes dropping the freeze
glock, allowing gfs2_log_flush() to grab and then release the shared
lock, so it is cached for next time.
However, in order for the unfreezing ioctl to occur, gfs2 needs to get a
shared lock on the filesystem root directory inode to check permissions.
If that glock has already been grabbed exclusively, fsfreeze will be
unable to get the shared lock and unfreeze the filesystem.
In order to allow the unfreeze, this patch makes gfs2 grab a shared lock
on the filesystem root directory during the freeze, and hold it until it
unfreezes the filesystem. The functions which need to grab a shared
lock in order to allow the unfreeze ioctl to be issued now use the lock
grabbed by the freeze code instead.
The freeze and unfreeze code take care to make sure that this shared
lock will not be dropped while another process is using it.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2014-05-02 11:26:55 +08:00
|
|
|
extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
|
gfs2: Per-revoke accounting in transactions
In the log, revokes are stored as a revoke descriptor (struct
gfs2_log_descriptor), followed by zero or more additional revoke blocks
(struct gfs2_meta_header). On filesystems with a blocksize of 4k, the
revoke descriptor contains up to 503 revokes, and the metadata blocks
contain up to 509 revokes each. We've so far been reserving space for
revokes in transactions in block granularity, so a lot more space than
necessary was being allocated and then released again.
This patch switches to assigning revokes to transactions individually
instead. Initially, space for the revoke descriptor is reserved and
handed out to transactions. When more revokes than that are reserved,
additional revoke blocks are added. When the log is flushed, the space
for the additional revoke blocks is released, but we keep the space for
the revoke descriptor block allocated.
Transactions may still reserve more revokes than they will actually need
in the end, but now we won't overshoot the target as much, and by only
returning the space for excess revokes at log flush time, we further
reduce the amount of contention between processes.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2020-12-17 23:14:30 +08:00
|
|
|
extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
|
|
|
|
unsigned int *extra_revokes);
|
|
|
|
extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
|
|
|
|
unsigned int *extra_revokes);
|
2018-01-17 07:01:33 +08:00
|
|
|
extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
|
2019-03-25 23:34:19 +08:00
|
|
|
u64 seq, u32 tail, u32 lblock, u32 flags,
|
|
|
|
int op_flags);
|
GFS2: remove transaction glock
GFS2 has a transaction glock, which must be grabbed for every
transaction, whose purpose is to deal with freezing the filesystem.
Aside from this involving a large amount of locking, it is very easy to
make the current fsfreeze code hang on unfreezing.
This patch rewrites how gfs2 handles freezing the filesystem. The
transaction glock is removed. In it's place is a freeze glock, which is
cached (but not held) in a shared state by every node in the cluster
when the filesystem is mounted. This lock only needs to be grabbed on
freezing, and actions which need to be safe from freezing, like
recovery.
When a node wants to freeze the filesystem, it grabs this glock
exclusively. When the freeze glock state changes on the nodes (either
from shared to unlocked, or shared to exclusive), the filesystem does a
special log flush. gfs2_log_flush() does all the work for flushing out
the and shutting down the incore log, and then it tries to grab the
freeze glock in a shared state again. Since the filesystem is stuck in
gfs2_log_flush, no new transaction can start, and nothing can be written
to disk. Unfreezing the filesytem simply involes dropping the freeze
glock, allowing gfs2_log_flush() to grab and then release the shared
lock, so it is cached for next time.
However, in order for the unfreezing ioctl to occur, gfs2 needs to get a
shared lock on the filesystem root directory inode to check permissions.
If that glock has already been grabbed exclusively, fsfreeze will be
unable to get the shared lock and unfreeze the filesystem.
In order to allow the unfreeze, this patch makes gfs2 grab a shared lock
on the filesystem root directory during the freeze, and hold it until it
unfreezes the filesystem. The functions which need to grab a shared
lock in order to allow the unfreeze ioctl to be issued now use the lock
grabbed by the freeze code instead.
The freeze and unfreeze code take care to make sure that this shared
lock will not be dropped while another process is using it.
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
2014-05-02 11:26:55 +08:00
|
|
|
extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
|
2018-01-17 07:01:33 +08:00
|
|
|
u32 type);
|
2010-05-21 11:30:11 +08:00
|
|
|
extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
|
2011-04-18 21:18:09 +08:00
|
|
|
extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
|
2019-11-14 03:47:02 +08:00
|
|
|
extern void log_flush_wait(struct gfs2_sbd *sdp);
|
2007-11-08 22:25:12 +08:00
|
|
|
|
2010-05-21 11:30:11 +08:00
|
|
|
extern int gfs2_logd(void *data);
|
2013-06-15 00:38:29 +08:00
|
|
|
extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
|
2019-11-14 22:49:11 +08:00
|
|
|
extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
|
2020-12-19 10:11:51 +08:00
|
|
|
extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
|
2006-01-17 00:50:04 +08:00
|
|
|
|
|
|
|
#endif /* __LOG_DOT_H__ */
|