xfs: updates for 4.2-rc4
- remote attribute log recovery corruption fixes - DAX page faults need to use direct mappings, not a page cache mapping. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) iQIcBAABAgAGBQJVutbbAAoJEK3oKUf0dfode50QANILLE7Rq9rKTp9ogHZKxTfv Y3SJLCpwsuZ9GQuvqgRk2MbZDgNsQxbKg4lCVlf+lbXWDoKINkLw73g1qPmEMA8q 3EQ+GNYcvsoGKlyIRod6HMuqIa9zIfremj+qk4/PCDhD50IjI/1QH5LqVz9lnaas 3N1zuU1t5dJvmUMPdLUvj46s51O9stiKTN0gbLnv5CEbTbyH1PiyqGubecJkcFVU 3oEdp0PyxeoxUgUWLh24tgQXzeLQTR/95viJsUILcdPBd3geCBbW3/pgke5Rg/3+ G/8aowNtDvNrg8sT6FuDgSR8kYudEssleQnfWO7rX7yigKLzSnNpD7m4779ZmEXP ey32NVbXwslg92AYwM9A8EqzGInm+cMDWcRu22WpHKzWs1wpVlpdkJXbP1SE9fKm pXVw1bT4XNArPjew/cclLMI2T0Tc6LMTUR5WxcF1N8RCTvdTm2qf+l43zXYFcBvX IxaD6U8z7tyShbgG2xllM3D/ANznvIs4pAb9zK6vJK7ZcfCgTVY/2YkHZeoKGKdG 2Mo1SUQpaG8Xhm53IDfSLtEmTp9eQAdDooHCwjugsLXe9A65787EIabTK9oTsNG1 W90CBQzyrIliQ9v291eAB/e5gRmExVxyV9VnVPMw3jXJ9mwxyfKOEd9mW+pv8RfH x3mj0CxjO14vQrrfCLjd =kjas -----END PGP SIGNATURE----- Merge tag 'xfs-for-linus-4.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs Pull xfs fixes from Dave Chinner: "There are a couple of recently found, long standing remote attribute corruption fixes caused by log recovery getting confused after a crash, and the new DAX code in XFS (merged in 4.2-rc1) needs to actually use the DAX fault path on read faults. Summary: - remote attribute log recovery corruption fixes - DAX page faults need to use direct mappings, not a page cache mapping" * tag 'xfs-for-linus-4.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: remote attributes need to be considered data xfs: remote attribute headers contain an invalid LSN xfs: call dax_fault on read page faults for DAX
This commit is contained in:
commit
8400935737
14
fs/dax.c
14
fs/dax.c
|
@ -319,6 +319,12 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
|
|||
* @vma: The virtual memory area where the fault occurred
|
||||
* @vmf: The description of the fault
|
||||
* @get_block: The filesystem method used to translate file offsets to blocks
|
||||
* @complete_unwritten: The filesystem method used to convert unwritten blocks
|
||||
* to written so the data written to them is exposed. This is required for
|
||||
* required by write faults for filesystems that will return unwritten
|
||||
* extent mappings from @get_block, but it is optional for reads as
|
||||
* dax_insert_mapping() will always zero unwritten blocks. If the fs does
|
||||
* not support unwritten extents, the it should pass NULL.
|
||||
*
|
||||
* When a page fault occurs, filesystems may call this helper in their
|
||||
* fault handler for DAX files. __dax_fault() assumes the caller has done all
|
||||
|
@ -437,8 +443,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
* as for normal BH based IO completions.
|
||||
*/
|
||||
error = dax_insert_mapping(inode, &bh, vma, vmf);
|
||||
if (buffer_unwritten(&bh))
|
||||
complete_unwritten(&bh, !error);
|
||||
if (buffer_unwritten(&bh)) {
|
||||
if (complete_unwritten)
|
||||
complete_unwritten(&bh, !error);
|
||||
else
|
||||
WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
|
||||
}
|
||||
|
||||
out:
|
||||
if (error == -ENOMEM)
|
||||
|
|
|
@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify(
|
|||
struct xfs_buf *bp)
|
||||
{
|
||||
struct xfs_mount *mp = bp->b_target->bt_mount;
|
||||
struct xfs_buf_log_item *bip = bp->b_fspriv;
|
||||
int blksize = mp->m_attr_geo->blksize;
|
||||
char *ptr;
|
||||
int len;
|
||||
xfs_daddr_t bno;
|
||||
int blksize = mp->m_attr_geo->blksize;
|
||||
|
||||
/* no verification of non-crc buffers */
|
||||
if (!xfs_sb_version_hascrc(&mp->m_sb))
|
||||
|
@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify(
|
|||
ASSERT(len >= blksize);
|
||||
|
||||
while (len > 0) {
|
||||
struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
|
||||
|
||||
if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
|
||||
xfs_buf_ioerror(bp, -EFSCORRUPTED);
|
||||
xfs_verifier_error(bp);
|
||||
return;
|
||||
}
|
||||
if (bip) {
|
||||
struct xfs_attr3_rmt_hdr *rmt;
|
||||
|
||||
rmt = (struct xfs_attr3_rmt_hdr *)ptr;
|
||||
rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
|
||||
/*
|
||||
* Ensure we aren't writing bogus LSNs to disk. See
|
||||
* xfs_attr3_rmt_hdr_set() for the explanation.
|
||||
*/
|
||||
if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
|
||||
xfs_buf_ioerror(bp, -EFSCORRUPTED);
|
||||
xfs_verifier_error(bp);
|
||||
return;
|
||||
}
|
||||
xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
|
||||
|
||||
|
@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set(
|
|||
rmt->rm_owner = cpu_to_be64(ino);
|
||||
rmt->rm_blkno = cpu_to_be64(bno);
|
||||
|
||||
/*
|
||||
* Remote attribute blocks are written synchronously, so we don't
|
||||
* have an LSN that we can stamp in them that makes any sense to log
|
||||
* recovery. To ensure that log recovery handles overwrites of these
|
||||
* blocks sanely (i.e. once they've been freed and reallocated as some
|
||||
* other type of metadata) we need to ensure that the LSN has a value
|
||||
* that tells log recovery to ignore the LSN and overwrite the buffer
|
||||
* with whatever is in it's log. To do this, we use the magic
|
||||
* NULLCOMMITLSN to indicate that the LSN is invalid.
|
||||
*/
|
||||
rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
|
||||
|
||||
return sizeof(struct xfs_attr3_rmt_hdr);
|
||||
}
|
||||
|
||||
|
@ -434,14 +451,21 @@ xfs_attr_rmtval_set(
|
|||
|
||||
/*
|
||||
* Allocate a single extent, up to the size of the value.
|
||||
*
|
||||
* Note that we have to consider this a data allocation as we
|
||||
* write the remote attribute without logging the contents.
|
||||
* Hence we must ensure that we aren't using blocks that are on
|
||||
* the busy list so that we don't overwrite blocks which have
|
||||
* recently been freed but their transactions are not yet
|
||||
* committed to disk. If we overwrite the contents of a busy
|
||||
* extent and then crash then the block may not contain the
|
||||
* correct metadata after log recovery occurs.
|
||||
*/
|
||||
xfs_bmap_init(args->flist, args->firstblock);
|
||||
nmap = 1;
|
||||
error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
|
||||
blkcnt,
|
||||
XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
|
||||
args->firstblock, args->total, &map, &nmap,
|
||||
args->flist);
|
||||
blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
|
||||
args->total, &map, &nmap, args->flist);
|
||||
if (!error) {
|
||||
error = xfs_bmap_finish(&args->trans, args->flist,
|
||||
&committed);
|
||||
|
|
|
@ -1514,18 +1514,27 @@ xfs_filemap_fault(
|
|||
struct vm_area_struct *vma,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file));
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
int ret;
|
||||
|
||||
trace_xfs_filemap_fault(ip);
|
||||
trace_xfs_filemap_fault(XFS_I(inode));
|
||||
|
||||
/* DAX can shortcut the normal fault path on write faults! */
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
|
||||
return xfs_filemap_page_mkwrite(vma, vmf);
|
||||
|
||||
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
||||
ret = filemap_fault(vma, vmf);
|
||||
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
if (IS_DAX(inode)) {
|
||||
/*
|
||||
* we do not want to trigger unwritten extent conversion on read
|
||||
* faults - that is unnecessary overhead and would also require
|
||||
* changes to xfs_get_blocks_direct() to map unwritten extent
|
||||
* ioend for conversion on read-only mappings.
|
||||
*/
|
||||
ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL);
|
||||
} else
|
||||
ret = filemap_fault(vma, vmf);
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -1886,9 +1886,14 @@ xlog_recover_get_buf_lsn(
|
|||
uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
|
||||
break;
|
||||
case XFS_ATTR3_RMT_MAGIC:
|
||||
lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
|
||||
uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid;
|
||||
break;
|
||||
/*
|
||||
* Remote attr blocks are written synchronously, rather than
|
||||
* being logged. That means they do not contain a valid LSN
|
||||
* (i.e. transactionally ordered) in them, and hence any time we
|
||||
* see a buffer to replay over the top of a remote attribute
|
||||
* block we should simply do so.
|
||||
*/
|
||||
goto recover_immediately;
|
||||
case XFS_SB_MAGIC:
|
||||
lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
|
||||
uuid = &((struct xfs_dsb *)blk)->sb_uuid;
|
||||
|
|
Loading…
Reference in New Issue