2013-08-12 18:49:35 +08:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
|
|
|
|
* All Rights Reserved.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it would be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
#ifndef __XFS_INODE_BUF_H__
|
|
|
|
#define __XFS_INODE_BUF_H__
|
|
|
|
|
|
|
|
struct xfs_inode;
|
|
|
|
struct xfs_dinode;
|
2016-02-09 13:54:58 +08:00
|
|
|
|
|
|
|
/*
|
2016-02-09 13:54:58 +08:00
|
|
|
* In memory representation of the XFS inode. This is held in the in-core struct
|
|
|
|
* xfs_inode and represents the current on disk values but the structure is not
|
|
|
|
* in on-disk format. That is, this structure is always translated to on-disk
|
2016-02-09 13:54:58 +08:00
|
|
|
* format specific structures at the appropriate time.
|
|
|
|
*/
|
|
|
|
struct xfs_icdinode {
|
|
|
|
__int8_t di_version; /* inode version */
|
|
|
|
__int8_t di_format; /* format of di_c data */
|
2016-02-09 13:54:58 +08:00
|
|
|
__uint16_t di_flushiter; /* incremented on flush */
|
2016-02-09 13:54:58 +08:00
|
|
|
__uint32_t di_uid; /* owner's user id */
|
|
|
|
__uint32_t di_gid; /* owner's group id */
|
|
|
|
__uint16_t di_projid_lo; /* lower part of owner's project id */
|
|
|
|
__uint16_t di_projid_hi; /* higher part of owner's project id */
|
|
|
|
xfs_fsize_t di_size; /* number of bytes in file */
|
|
|
|
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
|
|
|
|
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
|
|
|
|
xfs_extnum_t di_nextents; /* number of extents in data fork */
|
|
|
|
xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
|
|
|
|
__uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
|
|
|
|
__int8_t di_aformat; /* format of attr fork's data */
|
|
|
|
__uint32_t di_dmevmask; /* DMIG event mask */
|
|
|
|
__uint16_t di_dmstate; /* DMIG state info */
|
|
|
|
__uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
|
|
|
|
|
|
|
|
__uint64_t di_flags2; /* more random flags */
|
|
|
|
|
|
|
|
xfs_ictimestamp_t di_crtime; /* time created */
|
|
|
|
};
|
2013-08-12 18:49:35 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Inode location information. Stored in the inode and passed to
|
|
|
|
* xfs_imap_to_bp() to get a buffer and dinode for a given inode.
|
|
|
|
*/
|
|
|
|
struct xfs_imap {
|
|
|
|
xfs_daddr_t im_blkno; /* starting BB of inode chunk */
|
|
|
|
ushort im_len; /* length in BBs of inode chunk */
|
|
|
|
ushort im_boffset; /* inode offset in block in bytes */
|
|
|
|
};
|
|
|
|
|
xfs: recovery of swap extents operations for CRC filesystems
This is the recovery side of the btree block owner change operation
performed by swapext on CRC enabled filesystems. We detect that an
owner change is needed by the flag that has been placed on the inode
log format flag field. Because the inode recovery is being replayed
after the buffers that make up the BMBT in the given checkpoint, we
can walk all the buffers and directly modify them when we see the
flag set on an inode.
Because the inode can be relogged and hence present in multiple
chekpoints with the "change owner" flag set, we could do multiple
passes across the inode to do this change. While this isn't optimal,
we can't directly ignore the flag as there may be multiple
independent swap extent operations being replayed on the same inode
in different checkpoints so we can't ignore them.
Further, because the owner change operation uses ordered buffers, we
might have buffers that are newer on disk than the current
checkpoint and so already have the owner changed in them. Hence we
cannot just peek at a buffer in the tree and check that it has the
correct owner and assume that the change was completed.
So, for the moment just brute force the owner change every time we
see an inode with the flag set. Note that we have to be careful here
because the owner of the buffers may point to either the old owner
or the new owner. Currently the verifier can't verify the owner
directly, so there is no failure case here right now. If we verify
the owner exactly in future, then we'll have to take this into
account.
This was tested in terms of normal operation via xfstests - all of
the fsr tests now pass without failure. however, we really need to
modify xfs/227 to stress v3 inodes correctly to ensure we fully
cover this case for v5 filesystems.
In terms of recovery testing, I used a hacked version of xfs_fsr
that held the temp inode open for a few seconds before exiting so
that the filesystem could be shut down with an open owner change
recovery flags set on at least the temp inode. fsr leaves the temp
inode unlinked and in btree format, so this was necessary for the
owner change to be reliably replayed.
logprint confirmed the tmp inode in the log had the correct flag set:
INO: cnt:3 total:3 a:0x69e9e0 len:56 a:0x69ea20 len:176 a:0x69eae0 len:88
INODE: #regs:3 ino:0x44 flags:0x209 dsize:88
^^^^^
0x200 is set, indicating a data fork owner change needed to be
replayed on inode 0x44. A printk in the revoery code confirmed that
the inode change was recovered:
XFS (vdc): Mounting Filesystem
XFS (vdc): Starting recovery (logdev: internal)
recovering owner change ino 0x44
XFS (vdc): Version 5 superblock detected. This kernel L support enabled!
Use of these features in this kernel is at your own risk!
XFS (vdc): Ending recovery (logdev: internal)
The script used to test this was:
$ cat ./recovery-fsr.sh
#!/bin/bash
dev=/dev/vdc
mntpt=/mnt/scratch
testfile=$mntpt/testfile
umount $mntpt
mkfs.xfs -f -m crc=1 $dev
mount $dev $mntpt
chmod 777 $mntpt
for i in `seq 10000 -1 0`; do
xfs_io -f -d -c "pwrite $(($i * 4096)) 4096" $testfile > /dev/null 2>&1
done
xfs_bmap -vp $testfile |head -20
xfs_fsr -d -v $testfile &
sleep 10
/home/dave/src/xfstests-dev/src/godown -f $mntpt
wait
umount $mntpt
xfs_logprint -t $dev |tail -20
time mount $dev $mntpt
xfs_bmap -vp $testfile
umount $mntpt
$
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
2013-08-30 08:23:45 +08:00
|
|
|
int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
|
|
|
|
struct xfs_imap *, struct xfs_dinode **,
|
|
|
|
struct xfs_buf **, uint, uint);
|
|
|
|
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
|
|
|
|
struct xfs_inode *, uint);
|
|
|
|
void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
|
2016-02-09 13:54:58 +08:00
|
|
|
void xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
|
|
|
|
xfs_lsn_t lsn);
|
2016-02-09 13:54:58 +08:00
|
|
|
void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
|
|
|
|
void xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
|
|
|
|
struct xfs_dinode *to);
|
2013-08-12 18:49:35 +08:00
|
|
|
|
|
|
|
#if defined(DEBUG)
|
xfs: recovery of swap extents operations for CRC filesystems
This is the recovery side of the btree block owner change operation
performed by swapext on CRC enabled filesystems. We detect that an
owner change is needed by the flag that has been placed on the inode
log format flag field. Because the inode recovery is being replayed
after the buffers that make up the BMBT in the given checkpoint, we
can walk all the buffers and directly modify them when we see the
flag set on an inode.
Because the inode can be relogged and hence present in multiple
chekpoints with the "change owner" flag set, we could do multiple
passes across the inode to do this change. While this isn't optimal,
we can't directly ignore the flag as there may be multiple
independent swap extent operations being replayed on the same inode
in different checkpoints so we can't ignore them.
Further, because the owner change operation uses ordered buffers, we
might have buffers that are newer on disk than the current
checkpoint and so already have the owner changed in them. Hence we
cannot just peek at a buffer in the tree and check that it has the
correct owner and assume that the change was completed.
So, for the moment just brute force the owner change every time we
see an inode with the flag set. Note that we have to be careful here
because the owner of the buffers may point to either the old owner
or the new owner. Currently the verifier can't verify the owner
directly, so there is no failure case here right now. If we verify
the owner exactly in future, then we'll have to take this into
account.
This was tested in terms of normal operation via xfstests - all of
the fsr tests now pass without failure. however, we really need to
modify xfs/227 to stress v3 inodes correctly to ensure we fully
cover this case for v5 filesystems.
In terms of recovery testing, I used a hacked version of xfs_fsr
that held the temp inode open for a few seconds before exiting so
that the filesystem could be shut down with an open owner change
recovery flags set on at least the temp inode. fsr leaves the temp
inode unlinked and in btree format, so this was necessary for the
owner change to be reliably replayed.
logprint confirmed the tmp inode in the log had the correct flag set:
INO: cnt:3 total:3 a:0x69e9e0 len:56 a:0x69ea20 len:176 a:0x69eae0 len:88
INODE: #regs:3 ino:0x44 flags:0x209 dsize:88
^^^^^
0x200 is set, indicating a data fork owner change needed to be
replayed on inode 0x44. A printk in the revoery code confirmed that
the inode change was recovered:
XFS (vdc): Mounting Filesystem
XFS (vdc): Starting recovery (logdev: internal)
recovering owner change ino 0x44
XFS (vdc): Version 5 superblock detected. This kernel L support enabled!
Use of these features in this kernel is at your own risk!
XFS (vdc): Ending recovery (logdev: internal)
The script used to test this was:
$ cat ./recovery-fsr.sh
#!/bin/bash
dev=/dev/vdc
mntpt=/mnt/scratch
testfile=$mntpt/testfile
umount $mntpt
mkfs.xfs -f -m crc=1 $dev
mount $dev $mntpt
chmod 777 $mntpt
for i in `seq 10000 -1 0`; do
xfs_io -f -d -c "pwrite $(($i * 4096)) 4096" $testfile > /dev/null 2>&1
done
xfs_bmap -vp $testfile |head -20
xfs_fsr -d -v $testfile &
sleep 10
/home/dave/src/xfstests-dev/src/godown -f $mntpt
wait
umount $mntpt
xfs_logprint -t $dev |tail -20
time mount $dev $mntpt
xfs_bmap -vp $testfile
umount $mntpt
$
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
2013-08-30 08:23:45 +08:00
|
|
|
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
|
2013-08-12 18:49:35 +08:00
|
|
|
#else
|
|
|
|
#define xfs_inobp_check(mp, bp)
|
|
|
|
#endif /* DEBUG */
|
|
|
|
|
|
|
|
#endif /* __XFS_INODE_BUF_H__ */
|