xfs: add version 3 inode format with CRCs
Add a new inode version with a larger core. The primary objective is to allow for a crc of the inode, and location information (uuid and ino) to verify it was written in the right place. We also extend it by: a creation time (for Samba); a changecount (for NFSv4); a flush sequence (in LSN format for recovery); an additional inode flags field; and some additional padding. These additional fields are not implemented yet, but already laid out in the structure. [dchinner@redhat.com] Added LSN and flags field, some factoring and rework to capture all the necessary information in the crc calculation. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
parent
3fe58f30b4
commit
93848a999c
|
@ -48,6 +48,7 @@ extern kmem_zone_t *xfs_buf_item_zone;
|
||||||
#define XFS_BLF_AGF_BUF (1<<6)
|
#define XFS_BLF_AGF_BUF (1<<6)
|
||||||
#define XFS_BLF_AGFL_BUF (1<<7)
|
#define XFS_BLF_AGFL_BUF (1<<7)
|
||||||
#define XFS_BLF_AGI_BUF (1<<8)
|
#define XFS_BLF_AGI_BUF (1<<8)
|
||||||
|
#define XFS_BLF_DINO_BUF (1<<9)
|
||||||
|
|
||||||
#define XFS_BLF_TYPE_MASK \
|
#define XFS_BLF_TYPE_MASK \
|
||||||
(XFS_BLF_UDQUOT_BUF | \
|
(XFS_BLF_UDQUOT_BUF | \
|
||||||
|
@ -56,7 +57,8 @@ extern kmem_zone_t *xfs_buf_item_zone;
|
||||||
XFS_BLF_BTREE_BUF | \
|
XFS_BLF_BTREE_BUF | \
|
||||||
XFS_BLF_AGF_BUF | \
|
XFS_BLF_AGF_BUF | \
|
||||||
XFS_BLF_AGFL_BUF | \
|
XFS_BLF_AGFL_BUF | \
|
||||||
XFS_BLF_AGI_BUF)
|
XFS_BLF_AGI_BUF | \
|
||||||
|
XFS_BLF_DINO_BUF)
|
||||||
|
|
||||||
#define XFS_BLF_CHUNK 128
|
#define XFS_BLF_CHUNK 128
|
||||||
#define XFS_BLF_SHIFT 7
|
#define XFS_BLF_SHIFT 7
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
#define __XFS_DINODE_H__
|
#define __XFS_DINODE_H__
|
||||||
|
|
||||||
#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
|
#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
|
||||||
#define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2))
|
#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3)
|
||||||
|
|
||||||
typedef struct xfs_timestamp {
|
typedef struct xfs_timestamp {
|
||||||
__be32 t_sec; /* timestamp seconds */
|
__be32 t_sec; /* timestamp seconds */
|
||||||
|
@ -70,10 +70,35 @@ typedef struct xfs_dinode {
|
||||||
|
|
||||||
/* di_next_unlinked is the only non-core field in the old dinode */
|
/* di_next_unlinked is the only non-core field in the old dinode */
|
||||||
__be32 di_next_unlinked;/* agi unlinked list ptr */
|
__be32 di_next_unlinked;/* agi unlinked list ptr */
|
||||||
} __attribute__((packed)) xfs_dinode_t;
|
|
||||||
|
/* start of the extended dinode, writable fields */
|
||||||
|
__le32 di_crc; /* CRC of the inode */
|
||||||
|
__be64 di_changecount; /* number of attribute changes */
|
||||||
|
__be64 di_lsn; /* flush sequence */
|
||||||
|
__be64 di_flags2; /* more random flags */
|
||||||
|
__u8 di_pad2[16]; /* more padding for future expansion */
|
||||||
|
|
||||||
|
/* fields only written to during inode creation */
|
||||||
|
xfs_timestamp_t di_crtime; /* time created */
|
||||||
|
__be64 di_ino; /* inode number */
|
||||||
|
uuid_t di_uuid; /* UUID of the filesystem */
|
||||||
|
|
||||||
|
/* structure must be padded to 64 bit alignment */
|
||||||
|
} xfs_dinode_t;
|
||||||
|
|
||||||
#define DI_MAX_FLUSH 0xffff
|
#define DI_MAX_FLUSH 0xffff
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Size of the core inode on disk. Version 1 and 2 inodes have
|
||||||
|
* the same size, but version 3 has grown a few additional fields.
|
||||||
|
*/
|
||||||
|
static inline uint xfs_dinode_size(int version)
|
||||||
|
{
|
||||||
|
if (version == 3)
|
||||||
|
return sizeof(struct xfs_dinode);
|
||||||
|
return offsetof(struct xfs_dinode, di_crc);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
|
* The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
|
||||||
* Since the pathconf interface is signed, we use 2^31 - 1 instead.
|
* Since the pathconf interface is signed, we use 2^31 - 1 instead.
|
||||||
|
@ -105,7 +130,7 @@ typedef enum xfs_dinode_fmt {
|
||||||
* Inode size for given fs.
|
* Inode size for given fs.
|
||||||
*/
|
*/
|
||||||
#define XFS_LITINO(mp, version) \
|
#define XFS_LITINO(mp, version) \
|
||||||
((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode)))
|
((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version)))
|
||||||
|
|
||||||
#define XFS_BROOT_SIZE_ADJ(ip) \
|
#define XFS_BROOT_SIZE_ADJ(ip) \
|
||||||
(XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))
|
(XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))
|
||||||
|
@ -133,7 +158,7 @@ typedef enum xfs_dinode_fmt {
|
||||||
* Return pointers to the data or attribute forks.
|
* Return pointers to the data or attribute forks.
|
||||||
*/
|
*/
|
||||||
#define XFS_DFORK_DPTR(dip) \
|
#define XFS_DFORK_DPTR(dip) \
|
||||||
((char *)(dip) + sizeof(struct xfs_dinode))
|
((char *)dip + xfs_dinode_size(dip->di_version))
|
||||||
#define XFS_DFORK_APTR(dip) \
|
#define XFS_DFORK_APTR(dip) \
|
||||||
(XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
|
(XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
|
||||||
#define XFS_DFORK_PTR(dip,w) \
|
#define XFS_DFORK_PTR(dip,w) \
|
||||||
|
|
|
@ -167,6 +167,7 @@ xfs_ialloc_inode_init(
|
||||||
int version;
|
int version;
|
||||||
int i, j;
|
int i, j;
|
||||||
xfs_daddr_t d;
|
xfs_daddr_t d;
|
||||||
|
xfs_ino_t ino = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Loop over the new block(s), filling in the inodes.
|
* Loop over the new block(s), filling in the inodes.
|
||||||
|
@ -185,13 +186,29 @@ xfs_ialloc_inode_init(
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Figure out what version number to use in the inodes we create.
|
* Figure out what version number to use in the inodes we create. If
|
||||||
* If the superblock version has caught up to the one that supports
|
* the superblock version has caught up to the one that supports the new
|
||||||
* the new inode format, then use the new inode version. Otherwise
|
* inode format, then use the new inode version. Otherwise use the old
|
||||||
* use the old version so that old kernels will continue to be
|
* version so that old kernels will continue to be able to use the file
|
||||||
* able to use the file system.
|
* system.
|
||||||
|
*
|
||||||
|
* For v3 inodes, we also need to write the inode number into the inode,
|
||||||
|
* so calculate the first inode number of the chunk here as
|
||||||
|
* XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
|
||||||
|
* across multiple filesystem blocks (such as a cluster) and so cannot
|
||||||
|
* be used in the cluster buffer loop below.
|
||||||
|
*
|
||||||
|
* Further, because we are writing the inode directly into the buffer
|
||||||
|
* and calculating a CRC on the entire inode, we have ot log the entire
|
||||||
|
* inode so that the entire range the CRC covers is present in the log.
|
||||||
|
* That means for v3 inode we log the entire buffer rather than just the
|
||||||
|
* inode cores.
|
||||||
*/
|
*/
|
||||||
if (xfs_sb_version_hasnlink(&mp->m_sb))
|
if (xfs_sb_version_hascrc(&mp->m_sb)) {
|
||||||
|
version = 3;
|
||||||
|
ino = XFS_AGINO_TO_INO(mp, agno,
|
||||||
|
XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
|
||||||
|
} else if (xfs_sb_version_hasnlink(&mp->m_sb))
|
||||||
version = 2;
|
version = 2;
|
||||||
else
|
else
|
||||||
version = 1;
|
version = 1;
|
||||||
|
@ -214,17 +231,32 @@ xfs_ialloc_inode_init(
|
||||||
* individual transactions causing a lot of log traffic.
|
* individual transactions causing a lot of log traffic.
|
||||||
*/
|
*/
|
||||||
fbuf->b_ops = &xfs_inode_buf_ops;
|
fbuf->b_ops = &xfs_inode_buf_ops;
|
||||||
xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
|
xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
|
||||||
for (i = 0; i < ninodes; i++) {
|
for (i = 0; i < ninodes; i++) {
|
||||||
int ioffset = i << mp->m_sb.sb_inodelog;
|
int ioffset = i << mp->m_sb.sb_inodelog;
|
||||||
uint isize = sizeof(struct xfs_dinode);
|
uint isize = xfs_dinode_size(version);
|
||||||
|
|
||||||
free = xfs_make_iptr(mp, fbuf, i);
|
free = xfs_make_iptr(mp, fbuf, i);
|
||||||
free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
|
free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
|
||||||
free->di_version = version;
|
free->di_version = version;
|
||||||
free->di_gen = cpu_to_be32(gen);
|
free->di_gen = cpu_to_be32(gen);
|
||||||
free->di_next_unlinked = cpu_to_be32(NULLAGINO);
|
free->di_next_unlinked = cpu_to_be32(NULLAGINO);
|
||||||
xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
|
|
||||||
|
if (version == 3) {
|
||||||
|
free->di_ino = cpu_to_be64(ino);
|
||||||
|
ino++;
|
||||||
|
uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
|
||||||
|
xfs_dinode_calc_crc(mp, free);
|
||||||
|
} else {
|
||||||
|
/* just log the inode core */
|
||||||
|
xfs_trans_log_buf(tp, fbuf, ioffset,
|
||||||
|
ioffset + isize - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (version == 3) {
|
||||||
|
/* need to log the entire buffer */
|
||||||
|
xfs_trans_log_buf(tp, fbuf, 0,
|
||||||
|
BBTOB(fbuf->b_length) - 1);
|
||||||
}
|
}
|
||||||
xfs_trans_inode_alloc_buf(tp, fbuf);
|
xfs_trans_inode_alloc_buf(tp, fbuf);
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,6 +44,7 @@
|
||||||
#include "xfs_quota.h"
|
#include "xfs_quota.h"
|
||||||
#include "xfs_filestream.h"
|
#include "xfs_filestream.h"
|
||||||
#include "xfs_vnodeops.h"
|
#include "xfs_vnodeops.h"
|
||||||
|
#include "xfs_cksum.h"
|
||||||
#include "xfs_trace.h"
|
#include "xfs_trace.h"
|
||||||
#include "xfs_icache.h"
|
#include "xfs_icache.h"
|
||||||
|
|
||||||
|
@ -866,6 +867,17 @@ xfs_dinode_from_disk(
|
||||||
to->di_dmstate = be16_to_cpu(from->di_dmstate);
|
to->di_dmstate = be16_to_cpu(from->di_dmstate);
|
||||||
to->di_flags = be16_to_cpu(from->di_flags);
|
to->di_flags = be16_to_cpu(from->di_flags);
|
||||||
to->di_gen = be32_to_cpu(from->di_gen);
|
to->di_gen = be32_to_cpu(from->di_gen);
|
||||||
|
|
||||||
|
if (to->di_version == 3) {
|
||||||
|
to->di_changecount = be64_to_cpu(from->di_changecount);
|
||||||
|
to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
|
||||||
|
to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
|
||||||
|
to->di_flags2 = be64_to_cpu(from->di_flags2);
|
||||||
|
to->di_ino = be64_to_cpu(from->di_ino);
|
||||||
|
to->di_lsn = be64_to_cpu(from->di_lsn);
|
||||||
|
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
|
||||||
|
uuid_copy(&to->di_uuid, &from->di_uuid);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -902,6 +914,17 @@ xfs_dinode_to_disk(
|
||||||
to->di_dmstate = cpu_to_be16(from->di_dmstate);
|
to->di_dmstate = cpu_to_be16(from->di_dmstate);
|
||||||
to->di_flags = cpu_to_be16(from->di_flags);
|
to->di_flags = cpu_to_be16(from->di_flags);
|
||||||
to->di_gen = cpu_to_be32(from->di_gen);
|
to->di_gen = cpu_to_be32(from->di_gen);
|
||||||
|
|
||||||
|
if (from->di_version == 3) {
|
||||||
|
to->di_changecount = cpu_to_be64(from->di_changecount);
|
||||||
|
to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
|
||||||
|
to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
|
||||||
|
to->di_flags2 = cpu_to_be64(from->di_flags2);
|
||||||
|
to->di_ino = cpu_to_be64(from->di_ino);
|
||||||
|
to->di_lsn = cpu_to_be64(from->di_lsn);
|
||||||
|
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
|
||||||
|
uuid_copy(&to->di_uuid, &from->di_uuid);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
STATIC uint
|
STATIC uint
|
||||||
|
@ -962,6 +985,47 @@ xfs_dic2xflags(
|
||||||
(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
|
(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
xfs_dinode_verify(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
struct xfs_inode *ip,
|
||||||
|
struct xfs_dinode *dip)
|
||||||
|
{
|
||||||
|
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* only version 3 or greater inodes are extensively verified here */
|
||||||
|
if (dip->di_version < 3)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!xfs_sb_version_hascrc(&mp->m_sb))
|
||||||
|
return false;
|
||||||
|
if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
|
||||||
|
offsetof(struct xfs_dinode, di_crc)))
|
||||||
|
return false;
|
||||||
|
if (be64_to_cpu(dip->di_ino) != ip->i_ino)
|
||||||
|
return false;
|
||||||
|
if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
xfs_dinode_calc_crc(
|
||||||
|
struct xfs_mount *mp,
|
||||||
|
struct xfs_dinode *dip)
|
||||||
|
{
|
||||||
|
__uint32_t crc;
|
||||||
|
|
||||||
|
if (dip->di_version < 3)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
|
||||||
|
crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
|
||||||
|
offsetof(struct xfs_dinode, di_crc));
|
||||||
|
dip->di_crc = xfs_end_cksum(crc);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the disk inode attributes into the in-core inode structure.
|
* Read the disk inode attributes into the in-core inode structure.
|
||||||
*/
|
*/
|
||||||
|
@ -990,17 +1054,13 @@ xfs_iread(
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
/*
|
/* even unallocated inodes are verified */
|
||||||
* If we got something that isn't an inode it means someone
|
if (!xfs_dinode_verify(mp, ip, dip)) {
|
||||||
* (nfs or dmi) has a stale handle.
|
xfs_alert(mp, "%s: validation failed for inode %lld failed",
|
||||||
*/
|
__func__, ip->i_ino);
|
||||||
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
|
|
||||||
#ifdef DEBUG
|
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
|
||||||
xfs_alert(mp,
|
error = XFS_ERROR(EFSCORRUPTED);
|
||||||
"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
|
|
||||||
__func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
|
|
||||||
#endif /* DEBUG */
|
|
||||||
error = XFS_ERROR(EINVAL);
|
|
||||||
goto out_brelse;
|
goto out_brelse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1022,10 +1082,20 @@ xfs_iread(
|
||||||
goto out_brelse;
|
goto out_brelse;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
/*
|
||||||
|
* Partial initialisation of the in-core inode. Just the bits
|
||||||
|
* that xfs_ialloc won't overwrite or relies on being correct.
|
||||||
|
*/
|
||||||
ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
|
ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
|
||||||
ip->i_d.di_version = dip->di_version;
|
ip->i_d.di_version = dip->di_version;
|
||||||
ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
|
ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
|
||||||
ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
|
ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
|
||||||
|
|
||||||
|
if (dip->di_version == 3) {
|
||||||
|
ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
|
||||||
|
uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure to pull in the mode here as well in
|
* Make sure to pull in the mode here as well in
|
||||||
* case the inode is released without being used.
|
* case the inode is released without being used.
|
||||||
|
@ -1161,6 +1231,7 @@ xfs_ialloc(
|
||||||
xfs_buf_t **ialloc_context,
|
xfs_buf_t **ialloc_context,
|
||||||
xfs_inode_t **ipp)
|
xfs_inode_t **ipp)
|
||||||
{
|
{
|
||||||
|
struct xfs_mount *mp = tp->t_mountp;
|
||||||
xfs_ino_t ino;
|
xfs_ino_t ino;
|
||||||
xfs_inode_t *ip;
|
xfs_inode_t *ip;
|
||||||
uint flags;
|
uint flags;
|
||||||
|
@ -1187,7 +1258,7 @@ xfs_ialloc(
|
||||||
* This is because we're setting fields here we need
|
* This is because we're setting fields here we need
|
||||||
* to prevent others from looking at until we're done.
|
* to prevent others from looking at until we're done.
|
||||||
*/
|
*/
|
||||||
error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
|
error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
|
||||||
XFS_ILOCK_EXCL, &ip);
|
XFS_ILOCK_EXCL, &ip);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
|
@ -1208,7 +1279,7 @@ xfs_ialloc(
|
||||||
* the inode version number now. This way we only do the conversion
|
* the inode version number now. This way we only do the conversion
|
||||||
* here rather than here and in the flush/logging code.
|
* here rather than here and in the flush/logging code.
|
||||||
*/
|
*/
|
||||||
if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
|
if (xfs_sb_version_hasnlink(&mp->m_sb) &&
|
||||||
ip->i_d.di_version == 1) {
|
ip->i_d.di_version == 1) {
|
||||||
ip->i_d.di_version = 2;
|
ip->i_d.di_version = 2;
|
||||||
/*
|
/*
|
||||||
|
@ -1258,6 +1329,19 @@ xfs_ialloc(
|
||||||
ip->i_d.di_dmevmask = 0;
|
ip->i_d.di_dmevmask = 0;
|
||||||
ip->i_d.di_dmstate = 0;
|
ip->i_d.di_dmstate = 0;
|
||||||
ip->i_d.di_flags = 0;
|
ip->i_d.di_flags = 0;
|
||||||
|
|
||||||
|
if (ip->i_d.di_version == 3) {
|
||||||
|
ASSERT(ip->i_d.di_ino == ino);
|
||||||
|
ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
|
||||||
|
ip->i_d.di_crc = 0;
|
||||||
|
ip->i_d.di_changecount = 1;
|
||||||
|
ip->i_d.di_lsn = 0;
|
||||||
|
ip->i_d.di_flags2 = 0;
|
||||||
|
memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
|
||||||
|
ip->i_d.di_crtime = ip->i_d.di_mtime;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
flags = XFS_ILOG_CORE;
|
flags = XFS_ILOG_CORE;
|
||||||
switch (mode & S_IFMT) {
|
switch (mode & S_IFMT) {
|
||||||
case S_IFIFO:
|
case S_IFIFO:
|
||||||
|
@ -2716,20 +2800,18 @@ abort_out:
|
||||||
|
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_iflush_int(
|
xfs_iflush_int(
|
||||||
xfs_inode_t *ip,
|
struct xfs_inode *ip,
|
||||||
xfs_buf_t *bp)
|
struct xfs_buf *bp)
|
||||||
{
|
{
|
||||||
xfs_inode_log_item_t *iip;
|
struct xfs_inode_log_item *iip = ip->i_itemp;
|
||||||
xfs_dinode_t *dip;
|
struct xfs_dinode *dip;
|
||||||
xfs_mount_t *mp;
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
|
||||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
|
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
|
||||||
ASSERT(xfs_isiflocked(ip));
|
ASSERT(xfs_isiflocked(ip));
|
||||||
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
||||||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
|
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
|
||||||
|
ASSERT(iip != NULL && iip->ili_fields != 0);
|
||||||
iip = ip->i_itemp;
|
|
||||||
mp = ip->i_mount;
|
|
||||||
|
|
||||||
/* set *dip = inode's place in the buffer */
|
/* set *dip = inode's place in the buffer */
|
||||||
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
|
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
|
||||||
|
@ -2790,9 +2872,9 @@ xfs_iflush_int(
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* bump the flush iteration count, used to detect flushes which
|
* bump the flush iteration count, used to detect flushes which
|
||||||
* postdate a log record during recovery.
|
* postdate a log record during recovery. This is redundant as we now
|
||||||
|
* log every change and hence this can't happen. Still, it doesn't hurt.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ip->i_d.di_flushiter++;
|
ip->i_d.di_flushiter++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2868,7 +2950,6 @@ xfs_iflush_int(
|
||||||
* need the AIL lock, because it is a 64 bit value that cannot be read
|
* need the AIL lock, because it is a 64 bit value that cannot be read
|
||||||
* atomically.
|
* atomically.
|
||||||
*/
|
*/
|
||||||
if (iip != NULL && iip->ili_fields != 0) {
|
|
||||||
iip->ili_last_fields = iip->ili_fields;
|
iip->ili_last_fields = iip->ili_fields;
|
||||||
iip->ili_fields = 0;
|
iip->ili_fields = 0;
|
||||||
iip->ili_logged = 1;
|
iip->ili_logged = 1;
|
||||||
|
@ -2884,25 +2965,15 @@ xfs_iflush_int(
|
||||||
*/
|
*/
|
||||||
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
|
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
|
||||||
|
|
||||||
|
/* update the lsn in the on disk inode if required */
|
||||||
|
if (ip->i_d.di_version == 3)
|
||||||
|
dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
|
||||||
|
|
||||||
|
/* generate the checksum. */
|
||||||
|
xfs_dinode_calc_crc(mp, dip);
|
||||||
|
|
||||||
ASSERT(bp->b_fspriv != NULL);
|
ASSERT(bp->b_fspriv != NULL);
|
||||||
ASSERT(bp->b_iodone != NULL);
|
ASSERT(bp->b_iodone != NULL);
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* We're flushing an inode which is not in the AIL and has
|
|
||||||
* not been logged. For this case we can immediately drop
|
|
||||||
* the inode flush lock because we can avoid the whole
|
|
||||||
* AIL state thing. It's OK to drop the flush lock now,
|
|
||||||
* because we've already locked the buffer and to do anything
|
|
||||||
* you really need both.
|
|
||||||
*/
|
|
||||||
if (iip != NULL) {
|
|
||||||
ASSERT(iip->ili_logged == 0);
|
|
||||||
ASSERT(iip->ili_last_fields == 0);
|
|
||||||
ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
|
|
||||||
}
|
|
||||||
xfs_ifunlock(ip);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
corrupt_out:
|
corrupt_out:
|
||||||
|
|
|
@ -150,13 +150,38 @@ typedef struct xfs_icdinode {
|
||||||
__uint16_t di_dmstate; /* DMIG state info */
|
__uint16_t di_dmstate; /* DMIG state info */
|
||||||
__uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
|
__uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
|
||||||
__uint32_t di_gen; /* generation number */
|
__uint32_t di_gen; /* generation number */
|
||||||
|
|
||||||
|
/* di_next_unlinked is the only non-core field in the old dinode */
|
||||||
|
xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */
|
||||||
|
|
||||||
|
/* start of the extended dinode, writable fields */
|
||||||
|
__uint32_t di_crc; /* CRC of the inode */
|
||||||
|
__uint64_t di_changecount; /* number of attribute changes */
|
||||||
|
xfs_lsn_t di_lsn; /* flush sequence */
|
||||||
|
__uint64_t di_flags2; /* more random flags */
|
||||||
|
__uint8_t di_pad2[16]; /* more padding for future expansion */
|
||||||
|
|
||||||
|
/* fields only written to during inode creation */
|
||||||
|
xfs_ictimestamp_t di_crtime; /* time created */
|
||||||
|
xfs_ino_t di_ino; /* inode number */
|
||||||
|
uuid_t di_uuid; /* UUID of the filesystem */
|
||||||
|
|
||||||
|
/* structure must be padded to 64 bit alignment */
|
||||||
} xfs_icdinode_t;
|
} xfs_icdinode_t;
|
||||||
|
|
||||||
|
static inline uint xfs_icdinode_size(int version)
|
||||||
|
{
|
||||||
|
if (version == 3)
|
||||||
|
return sizeof(struct xfs_icdinode);
|
||||||
|
return offsetof(struct xfs_icdinode, di_next_unlinked);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flags for xfs_ichgtime().
|
* Flags for xfs_ichgtime().
|
||||||
*/
|
*/
|
||||||
#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
|
#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
|
||||||
#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
|
#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
|
||||||
|
#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Per-fork incore inode flags.
|
* Per-fork incore inode flags.
|
||||||
|
@ -556,6 +581,7 @@ int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
|
||||||
struct xfs_buf **, uint, uint);
|
struct xfs_buf **, uint, uint);
|
||||||
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
|
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
|
||||||
struct xfs_inode *, uint);
|
struct xfs_inode *, uint);
|
||||||
|
void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
|
||||||
void xfs_dinode_to_disk(struct xfs_dinode *,
|
void xfs_dinode_to_disk(struct xfs_dinode *,
|
||||||
struct xfs_icdinode *);
|
struct xfs_icdinode *);
|
||||||
void xfs_idestroy_fork(struct xfs_inode *, int);
|
void xfs_idestroy_fork(struct xfs_inode *, int);
|
||||||
|
|
|
@ -179,7 +179,7 @@ xfs_inode_item_format(
|
||||||
nvecs = 1;
|
nvecs = 1;
|
||||||
|
|
||||||
vecp->i_addr = &ip->i_d;
|
vecp->i_addr = &ip->i_d;
|
||||||
vecp->i_len = sizeof(struct xfs_icdinode);
|
vecp->i_len = xfs_icdinode_size(ip->i_d.di_version);
|
||||||
vecp->i_type = XLOG_REG_TYPE_ICORE;
|
vecp->i_type = XLOG_REG_TYPE_ICORE;
|
||||||
vecp++;
|
vecp++;
|
||||||
nvecs++;
|
nvecs++;
|
||||||
|
|
|
@ -1786,6 +1786,7 @@ xlog_recover_do_inode_buffer(
|
||||||
xfs_agino_t *buffer_nextp;
|
xfs_agino_t *buffer_nextp;
|
||||||
|
|
||||||
trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
|
trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
|
||||||
|
bp->b_ops = &xfs_inode_buf_ops;
|
||||||
|
|
||||||
inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
|
inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
|
||||||
for (i = 0; i < inodes_per_buf; i++) {
|
for (i = 0; i < inodes_per_buf; i++) {
|
||||||
|
@ -1989,6 +1990,18 @@ xlog_recover_do_reg_buffer(
|
||||||
}
|
}
|
||||||
bp->b_ops = &xfs_dquot_buf_ops;
|
bp->b_ops = &xfs_dquot_buf_ops;
|
||||||
break;
|
break;
|
||||||
|
case XFS_BLF_DINO_BUF:
|
||||||
|
/*
|
||||||
|
* we get here with inode allocation buffers, not buffers that
|
||||||
|
* track unlinked list changes.
|
||||||
|
*/
|
||||||
|
if (*(__be16 *)bp->b_addr != cpu_to_be16(XFS_DINODE_MAGIC)) {
|
||||||
|
xfs_warn(mp, "Bad INODE block magic!");
|
||||||
|
ASSERT(0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bp->b_ops = &xfs_inode_buf_ops;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2277,6 +2290,7 @@ xlog_recover_inode_pass2(
|
||||||
int attr_index;
|
int attr_index;
|
||||||
uint fields;
|
uint fields;
|
||||||
xfs_icdinode_t *dicp;
|
xfs_icdinode_t *dicp;
|
||||||
|
uint isize;
|
||||||
int need_free = 0;
|
int need_free = 0;
|
||||||
|
|
||||||
if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
|
if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
|
||||||
|
@ -2302,7 +2316,7 @@ xlog_recover_inode_pass2(
|
||||||
trace_xfs_log_recover_inode_recover(log, in_f);
|
trace_xfs_log_recover_inode_recover(log, in_f);
|
||||||
|
|
||||||
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
|
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
|
||||||
NULL);
|
&xfs_inode_buf_ops);
|
||||||
if (!bp) {
|
if (!bp) {
|
||||||
error = ENOMEM;
|
error = ENOMEM;
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -2413,7 +2427,8 @@ xlog_recover_inode_pass2(
|
||||||
error = EFSCORRUPTED;
|
error = EFSCORRUPTED;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
|
isize = xfs_icdinode_size(dicp->di_version);
|
||||||
|
if (unlikely(item->ri_buf[1].i_len > isize)) {
|
||||||
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
|
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
|
||||||
XFS_ERRLEVEL_LOW, mp, dicp);
|
XFS_ERRLEVEL_LOW, mp, dicp);
|
||||||
xfs_buf_relse(bp);
|
xfs_buf_relse(bp);
|
||||||
|
@ -2425,13 +2440,13 @@ xlog_recover_inode_pass2(
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The core is in in-core format */
|
/* The core is in in-core format */
|
||||||
xfs_dinode_to_disk(dip, item->ri_buf[1].i_addr);
|
xfs_dinode_to_disk(dip, dicp);
|
||||||
|
|
||||||
/* the rest is in on-disk format */
|
/* the rest is in on-disk format */
|
||||||
if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
|
if (item->ri_buf[1].i_len > isize) {
|
||||||
memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
|
memcpy((char *)dip + isize,
|
||||||
item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
|
item->ri_buf[1].i_addr + isize,
|
||||||
item->ri_buf[1].i_len - sizeof(struct xfs_icdinode));
|
item->ri_buf[1].i_len - isize);
|
||||||
}
|
}
|
||||||
|
|
||||||
fields = in_f->ilf_fields;
|
fields = in_f->ilf_fields;
|
||||||
|
@ -2515,6 +2530,9 @@ xlog_recover_inode_pass2(
|
||||||
}
|
}
|
||||||
|
|
||||||
write_inode_buffer:
|
write_inode_buffer:
|
||||||
|
/* re-generate the checksum. */
|
||||||
|
xfs_dinode_calc_crc(log->l_mp, dip);
|
||||||
|
|
||||||
ASSERT(bp->b_target->bt_mount == mp);
|
ASSERT(bp->b_target->bt_mount == mp);
|
||||||
bp->b_iodone = xlog_recover_iodone;
|
bp->b_iodone = xlog_recover_iodone;
|
||||||
xfs_buf_delwri_queue(bp, buffer_list);
|
xfs_buf_delwri_queue(bp, buffer_list);
|
||||||
|
|
|
@ -704,6 +704,7 @@ xfs_trans_inode_buf(
|
||||||
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
||||||
|
|
||||||
bip->bli_flags |= XFS_BLI_INODE_BUF;
|
bip->bli_flags |= XFS_BLI_INODE_BUF;
|
||||||
|
xfs_trans_buf_set_type(tp, bp, XFS_BLF_DINO_BUF);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -728,6 +729,7 @@ xfs_trans_stale_inode_buf(
|
||||||
|
|
||||||
bip->bli_flags |= XFS_BLI_STALE_INODE;
|
bip->bli_flags |= XFS_BLI_STALE_INODE;
|
||||||
bip->bli_item.li_cb = xfs_buf_iodone;
|
bip->bli_item.li_cb = xfs_buf_iodone;
|
||||||
|
xfs_trans_buf_set_type(tp, bp, XFS_BLF_DINO_BUF);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -751,6 +753,7 @@ xfs_trans_inode_alloc_buf(
|
||||||
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
ASSERT(atomic_read(&bip->bli_refcount) > 0);
|
||||||
|
|
||||||
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
|
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
|
||||||
|
xfs_trans_buf_set_type(tp, bp, XFS_BLF_DINO_BUF);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue