linux-sg2042/fs/xfs/xfs_rw.c

360 lines
8.7 KiB
C
Raw Normal View History

/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_dir2_sf.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_itable.h"
#include "xfs_btree.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_buf_item.h"
#include "xfs_rw.h"
xfs: event tracing support Convert the old xfs tracing support that could only be used with the out of tree kdb and xfsidbg patches to use the generic event tracer. To use it make sure CONFIG_EVENT_TRACING is enabled and then enable all xfs trace channels by: echo 1 > /sys/kernel/debug/tracing/events/xfs/enable or alternatively enable single events by just doing the same in one event subdirectory, e.g. echo 1 > /sys/kernel/debug/tracing/events/xfs/xfs_ihold/enable or set more complex filters, etc. In Documentation/trace/events.txt all this is desctribed in more detail. To reads the events do a cat /sys/kernel/debug/tracing/trace Compared to the last posting this patch converts the tracing mostly to the one tracepoint per callsite model that other users of the new tracing facility also employ. This allows a very fine-grained control of the tracing, a cleaner output of the traces and also enables the perf tool to use each tracepoint as a virtual performance counter, allowing us to e.g. count how often certain workloads git various spots in XFS. Take a look at http://lwn.net/Articles/346470/ for some examples. Also the btree tracing isn't included at all yet, as it will require additional core tracing features not in mainline yet, I plan to deliver it later. And the really nice thing about this patch is that it actually removes many lines of code while adding this nice functionality: fs/xfs/Makefile | 8 fs/xfs/linux-2.6/xfs_acl.c | 1 fs/xfs/linux-2.6/xfs_aops.c | 52 - fs/xfs/linux-2.6/xfs_aops.h | 2 fs/xfs/linux-2.6/xfs_buf.c | 117 +-- fs/xfs/linux-2.6/xfs_buf.h | 33 fs/xfs/linux-2.6/xfs_fs_subr.c | 3 fs/xfs/linux-2.6/xfs_ioctl.c | 1 fs/xfs/linux-2.6/xfs_ioctl32.c | 1 fs/xfs/linux-2.6/xfs_iops.c | 1 fs/xfs/linux-2.6/xfs_linux.h | 1 fs/xfs/linux-2.6/xfs_lrw.c | 87 -- fs/xfs/linux-2.6/xfs_lrw.h | 45 - fs/xfs/linux-2.6/xfs_super.c | 104 --- fs/xfs/linux-2.6/xfs_super.h | 7 fs/xfs/linux-2.6/xfs_sync.c | 1 fs/xfs/linux-2.6/xfs_trace.c | 75 ++ fs/xfs/linux-2.6/xfs_trace.h | 1369 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_vnode.h | 4 fs/xfs/quota/xfs_dquot.c | 110 --- fs/xfs/quota/xfs_dquot.h | 21 fs/xfs/quota/xfs_qm.c | 40 - fs/xfs/quota/xfs_qm_syscalls.c | 4 fs/xfs/support/ktrace.c | 323 --------- fs/xfs/support/ktrace.h | 85 -- fs/xfs/xfs.h | 16 fs/xfs/xfs_ag.h | 14 fs/xfs/xfs_alloc.c | 230 +----- fs/xfs/xfs_alloc.h | 27 fs/xfs/xfs_alloc_btree.c | 1 fs/xfs/xfs_attr.c | 107 --- fs/xfs/xfs_attr.h | 10 fs/xfs/xfs_attr_leaf.c | 14 fs/xfs/xfs_attr_sf.h | 40 - fs/xfs/xfs_bmap.c | 507 +++------------ fs/xfs/xfs_bmap.h | 49 - fs/xfs/xfs_bmap_btree.c | 6 fs/xfs/xfs_btree.c | 5 fs/xfs/xfs_btree_trace.h | 17 fs/xfs/xfs_buf_item.c | 87 -- fs/xfs/xfs_buf_item.h | 20 fs/xfs/xfs_da_btree.c | 3 fs/xfs/xfs_da_btree.h | 7 fs/xfs/xfs_dfrag.c | 2 fs/xfs/xfs_dir2.c | 8 fs/xfs/xfs_dir2_block.c | 20 fs/xfs/xfs_dir2_leaf.c | 21 fs/xfs/xfs_dir2_node.c | 27 fs/xfs/xfs_dir2_sf.c | 26 fs/xfs/xfs_dir2_trace.c | 216 ------ fs/xfs/xfs_dir2_trace.h | 72 -- fs/xfs/xfs_filestream.c | 8 fs/xfs/xfs_fsops.c | 2 fs/xfs/xfs_iget.c | 111 --- fs/xfs/xfs_inode.c | 67 -- fs/xfs/xfs_inode.h | 76 -- fs/xfs/xfs_inode_item.c | 5 fs/xfs/xfs_iomap.c | 85 -- fs/xfs/xfs_iomap.h | 8 fs/xfs/xfs_log.c | 181 +---- fs/xfs/xfs_log_priv.h | 20 fs/xfs/xfs_log_recover.c | 1 fs/xfs/xfs_mount.c | 2 fs/xfs/xfs_quota.h | 8 fs/xfs/xfs_rename.c | 1 fs/xfs/xfs_rtalloc.c | 1 fs/xfs/xfs_rw.c | 3 fs/xfs/xfs_trans.h | 47 + fs/xfs/xfs_trans_buf.c | 62 - fs/xfs/xfs_vnodeops.c | 8 70 files changed, 2151 insertions(+), 2592 deletions(-) Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
2009-12-15 07:14:59 +08:00
#include "xfs_trace.h"
/*
* This is a subroutine for xfs_write() and other writers (xfs_ioctl)
* which clears the setuid and setgid bits when a file is written.
*/
int
xfs_write_clear_setuid(
xfs_inode_t *ip)
{
xfs_mount_t *mp;
xfs_trans_t *tp;
int error;
mp = ip->i_mount;
tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
if ((error = xfs_trans_reserve(tp, 0,
XFS_WRITEID_LOG_RES(mp),
0, 0, 0))) {
xfs_trans_cancel(tp, 0);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
ip->i_d.di_mode &= ~S_ISUID;
/*
* Note that we don't have to worry about mandatory
* file locking being disabled here because we only
* clear the S_ISGID bit if the Group execute bit is
* on, but if it was on then mandatory locking wouldn't
* have been enabled.
*/
if (ip->i_d.di_mode & S_IXGRP) {
ip->i_d.di_mode &= ~S_ISGID;
}
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return 0;
}
/*
* Force a shutdown of the filesystem instantly while keeping
* the filesystem consistent. We don't do an unmount here; just shutdown
* the shop, make sure that absolutely nothing persistent happens to
* this filesystem after this point.
*/
void
xfs_do_force_shutdown(
xfs_mount_t *mp,
int flags,
char *fname,
int lnnum)
{
int logerror;
logerror = flags & SHUTDOWN_LOG_IO_ERROR;
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from "
"line %d of file %s. Return address = 0x%p",
mp->m_fsname, flags, lnnum, fname, __return_address);
}
/*
* No need to duplicate efforts.
*/
if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
return;
/*
* This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
* queue up anybody new on the log reservations, and wakes up
* everybody who's sleeping on log reservations to tell them
* the bad news.
*/
if (xfs_log_force_umount(mp, logerror))
return;
if (flags & SHUTDOWN_CORRUPT_INCORE) {
xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp,
"Corruption of in-memory data detected. Shutting down filesystem: %s",
mp->m_fsname);
if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
xfs_stack_trace();
}
} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
if (logerror) {
xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp,
"Log I/O Error Detected. Shutting down filesystem: %s",
mp->m_fsname);
} else if (flags & SHUTDOWN_DEVICE_REQ) {
xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
"All device paths lost. Shutting down filesystem: %s",
mp->m_fsname);
} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
"I/O Error Detected. Shutting down filesystem: %s",
mp->m_fsname);
}
}
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
cmn_err(CE_ALERT, "Please umount the filesystem, "
"and rectify the problem(s)");
}
}
/*
* Called when we want to stop a buffer from getting written or read.
* We attach the EIO error, muck with its flags, and call biodone
* so that the proper iodone callbacks get called.
*/
int
xfs_bioerror(
xfs_buf_t *bp)
{
#ifdef XFSERRORDEBUG
ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
#endif
/*
* No need to wait until the buffer is unpinned.
* We aren't flushing it.
*/
XFS_BUF_ERROR(bp, EIO);
/*
* We're calling biodone, so delete B_DONE flag. Either way
* we have to call the iodone callback, and calling biodone
* probably is the best way since it takes care of
* GRIO as well.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
XFS_BUF_CLR_BDSTRAT_FUNC(bp);
xfs_biodone(bp);
return (EIO);
}
/*
* Same as xfs_bioerror, except that we are releasing the buffer
* here ourselves, and avoiding the biodone call.
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
int
xfs_bioerror_relse(
xfs_buf_t *bp)
{
int64_t fl;
ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
fl = XFS_BUF_BFLAGS(bp);
/*
* No need to wait until the buffer is unpinned.
* We aren't flushing it.
*
* chunkhold expects B_DONE to be set, whether
* we actually finish the I/O or not. We don't want to
* change that interface.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_DONE(bp);
XFS_BUF_STALE(bp);
XFS_BUF_CLR_IODONE_FUNC(bp);
XFS_BUF_CLR_BDSTRAT_FUNC(bp);
if (!(fl & XFS_B_ASYNC)) {
/*
* Mark b_error and B_ERROR _both_.
* Lot's of chunkcache code assumes that.
* There's no reason to mark error for
* ASYNC buffers.
*/
XFS_BUF_ERROR(bp, EIO);
XFS_BUF_FINISH_IOWAIT(bp);
} else {
xfs_buf_relse(bp);
}
return (EIO);
}
/*
* Prints out an ALERT message about I/O error.
*/
void
xfs_ioerror_alert(
char *func,
struct xfs_mount *mp,
xfs_buf_t *bp,
xfs_daddr_t blkno)
{
cmn_err(CE_ALERT,
"I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
" (\"%s\") error %d buf count %zd",
(!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
(__uint64_t)blkno, func,
XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
}
/*
* This isn't an absolute requirement, but it is
* just a good idea to call xfs_read_buf instead of
* directly doing a read_buf call. For one, we shouldn't
* be doing this disk read if we are in SHUTDOWN state anyway,
* so this stops that from happening. Secondly, this does all
* the error checking stuff and the brelse if appropriate for
* the caller, so the code can be a little leaner.
*/
int
xfs_read_buf(
struct xfs_mount *mp,
xfs_buftarg_t *target,
xfs_daddr_t blkno,
int len,
uint flags,
xfs_buf_t **bpp)
{
xfs_buf_t *bp;
int error;
if (!flags)
flags = XBF_LOCK | XBF_MAPPED;
bp = xfs_buf_read(target, blkno, len, flags);
if (!bp)
return XFS_ERROR(EIO);
error = XFS_BUF_GETERROR(bp);
if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) {
*bpp = bp;
} else {
*bpp = NULL;
if (error) {
xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp));
} else {
error = XFS_ERROR(EIO);
}
if (bp) {
XFS_BUF_UNDONE(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_STALE(bp);
/*
* brelse clears B_ERROR and b_error
*/
xfs_buf_relse(bp);
}
}
return (error);
}
/*
* Wrapper around bwrite() so that we can trap
* write errors, and act accordingly.
*/
int
xfs_bwrite(
struct xfs_mount *mp,
struct xfs_buf *bp)
{
int error;
/*
* XXXsup how does this work for quotas.
*/
XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
bp->b_mount = mp;
XFS_BUF_WRITE(bp);
if ((error = XFS_bwrite(bp))) {
ASSERT(mp);
/*
* Cannot put a buftrace here since if the buffer is not
* B_HOLD then we will brelse() the buffer before returning
* from bwrite and we could be tracing a buffer that has
* been reused.
*/
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
}
return (error);
}
/*
* helper function to extract extent size hint from inode
*/
xfs_extlen_t
xfs_get_extsz_hint(
struct xfs_inode *ip)
{
xfs_extlen_t extsz;
if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
? ip->i_d.di_extsize
: ip->i_mount->m_sb.sb_rextsize;
ASSERT(extsz);
} else {
extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
? ip->i_d.di_extsize : 0;
}
return extsz;
}