diff --git a/Documentation/filesystems/dax.rst b/Documentation/filesystems/dax.rst index e3b30429d703..c04609d8ee24 100644 --- a/Documentation/filesystems/dax.rst +++ b/Documentation/filesystems/dax.rst @@ -23,11 +23,11 @@ on it as usual. The `DAX` code currently only supports files with a block size equal to your kernel's `PAGE_SIZE`, so you may need to specify a block size when creating the filesystem. -Currently 4 filesystems support `DAX`: ext2, ext4, xfs and virtiofs. +Currently 5 filesystems support `DAX`: ext2, ext4, xfs, virtiofs and erofs. Enabling `DAX` on them is different. -Enabling DAX on ext2 --------------------- +Enabling DAX on ext2 and erofs +------------------------------ When mounting the filesystem, use the ``-o dax`` option on the command line or add 'dax' to the options in ``/etc/fstab``. This works to enable `DAX` on all files diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index 7119aa213be7..bef6d3040ce4 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -40,7 +40,7 @@ Here is the main features of EROFS: Inode metadata size 32 bytes 64 bytes Max file size 4 GB 16 EB (also limited by max. vol size) Max uids/gids 65536 4294967296 - File change time no yes (64 + 32-bit timestamp) + Per-inode timestamp no yes (64 + 32-bit timestamp) Max hardlinks 65536 4294967296 Metadata reserved 4 bytes 14 bytes ===================== ============ ===================================== diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 226a57c57ee6..780db1e5f4b7 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -28,10 +28,10 @@ void erofs_put_metabuf(struct erofs_buf *buf) buf->page = NULL; } -void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_blk_t blkaddr, enum erofs_kmap_type type) +void *erofs_bread(struct erofs_buf *buf, struct inode *inode, + erofs_blk_t blkaddr, enum erofs_kmap_type type) { - struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; + struct address_space *const mapping = inode->i_mapping; erofs_off_t offset = blknr_to_addr(blkaddr); pgoff_t index = offset >> PAGE_SHIFT; struct page *page = buf->page; @@ -60,6 +60,12 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, return buf->base + (offset & ~PAGE_MASK); } +void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, + erofs_blk_t blkaddr, enum erofs_kmap_type type) +{ + return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type); +} + static int erofs_map_blocks_flatmode(struct inode *inode, struct erofs_map_blocks *map, int flags) diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index eee9b0b31b63..18e59821c597 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2022, Alibaba Cloud */ #include "internal.h" @@ -67,7 +68,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, static int erofs_readdir(struct file *f, struct dir_context *ctx) { struct inode *dir = file_inode(f); - struct address_space *mapping = dir->i_mapping; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; const size_t dirsize = i_size_read(dir); unsigned int i = ctx->pos / EROFS_BLKSIZ; unsigned int ofs = ctx->pos % EROFS_BLKSIZ; @@ -75,26 +76,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) bool initial = true; while (ctx->pos < dirsize) { - struct page *dentry_page; struct erofs_dirent *de; unsigned int nameoff, maxsize; - dentry_page = read_mapping_page(mapping, i, NULL); - if (dentry_page == ERR_PTR(-ENOMEM)) { - err = -ENOMEM; - break; - } else if (IS_ERR(dentry_page)) { + de = erofs_bread(&buf, dir, i, EROFS_KMAP); + if (IS_ERR(de)) { erofs_err(dir->i_sb, "fail to readdir of logical block %u of nid %llu", i, EROFS_I(dir)->nid); - err = -EFSCORRUPTED; + err = PTR_ERR(de); break; } - de = (struct erofs_dirent *)kmap(dentry_page); - nameoff = le16_to_cpu(de->nameoff); - if (nameoff < sizeof(struct erofs_dirent) || nameoff >= PAGE_SIZE) { erofs_err(dir->i_sb, @@ -119,10 +113,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) err = erofs_fill_dentries(dir, ctx, de, &ofs, nameoff, maxsize); skip_this: - kunmap(dentry_page); - - put_page(dentry_page); - ctx->pos = blknr_to_addr(i) + ofs; if (err) @@ -130,6 +120,7 @@ skip_this: ++i; ofs = 0; } + erofs_put_metabuf(&buf); return err < 0 ? err : 0; } diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 3ea62c6fb00a..1238ca104f09 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -12,6 +12,7 @@ #define EROFS_SUPER_OFFSET 1024 #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 +#define EROFS_FEATURE_COMPAT_MTIME 0x00000002 /* * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should @@ -186,8 +187,8 @@ struct erofs_inode_extended { __le32 i_uid; __le32 i_gid; - __le64 i_ctime; - __le32 i_ctime_nsec; + __le64 i_mtime; + __le32 i_mtime_nsec; __le32 i_nlink; __u8 i_reserved2[16]; }; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index ff62f84f47d3..e8b37ba5e9ad 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -113,8 +113,8 @@ static void *erofs_read_inode(struct erofs_buf *buf, set_nlink(inode, le32_to_cpu(die->i_nlink)); /* extended inode has its own timestamp */ - inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime); - inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec); + inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime); + inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec); inode->i_size = le64_to_cpu(die->i_size); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 5aa2cf2c2f80..5298c4ee277d 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -479,6 +479,8 @@ struct erofs_map_dev { extern const struct file_operations erofs_file_fops; void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); +void *erofs_bread(struct erofs_buf *buf, struct inode *inode, + erofs_blk_t blkaddr, enum erofs_kmap_type type); void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, erofs_blk_t blkaddr, enum erofs_kmap_type type); int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 8629e616028c..554efa363317 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ + * Copyright (C) 2022, Alibaba Cloud */ #include "xattr.h" @@ -86,14 +87,14 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, return ERR_PTR(-ENOENT); } -static struct page *find_target_block_classic(struct inode *dir, - struct erofs_qstr *name, - int *_ndirents) +static void *find_target_block_classic(struct erofs_buf *target, + struct inode *dir, + struct erofs_qstr *name, + int *_ndirents) { unsigned int startprfx, endprfx; int head, back; - struct address_space *const mapping = dir->i_mapping; - struct page *candidate = ERR_PTR(-ENOENT); + void *candidate = ERR_PTR(-ENOENT); startprfx = endprfx = 0; head = 0; @@ -101,10 +102,11 @@ static struct page *find_target_block_classic(struct inode *dir, while (head <= back) { const int mid = head + (back - head) / 2; - struct page *page = read_mapping_page(mapping, mid, NULL); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + struct erofs_dirent *de; - if (!IS_ERR(page)) { - struct erofs_dirent *de = kmap_atomic(page); + de = erofs_bread(&buf, dir, mid, EROFS_KMAP); + if (!IS_ERR(de)) { const int nameoff = nameoff_from_disk(de->nameoff, EROFS_BLKSIZ); const int ndirents = nameoff / sizeof(*de); @@ -113,13 +115,12 @@ static struct page *find_target_block_classic(struct inode *dir, struct erofs_qstr dname; if (!ndirents) { - kunmap_atomic(de); - put_page(page); + erofs_put_metabuf(&buf); erofs_err(dir->i_sb, "corrupted dir block %d @ nid %llu", mid, EROFS_I(dir)->nid); DBG_BUGON(1); - page = ERR_PTR(-EFSCORRUPTED); + de = ERR_PTR(-EFSCORRUPTED); goto out; } @@ -135,7 +136,6 @@ static struct page *find_target_block_classic(struct inode *dir, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - kunmap_atomic(de); if (!diff) { *_ndirents = 0; @@ -145,11 +145,12 @@ static struct page *find_target_block_classic(struct inode *dir, startprfx = matched; if (!IS_ERR(candidate)) - put_page(candidate); - candidate = page; + erofs_put_metabuf(target); + *target = buf; + candidate = de; *_ndirents = ndirents; } else { - put_page(page); + erofs_put_metabuf(&buf); back = mid - 1; endprfx = matched; @@ -158,8 +159,8 @@ static struct page *find_target_block_classic(struct inode *dir, } out: /* free if the candidate is valid */ if (!IS_ERR(candidate)) - put_page(candidate); - return page; + erofs_put_metabuf(target); + return de; } return candidate; } @@ -169,8 +170,7 @@ int erofs_namei(struct inode *dir, erofs_nid_t *nid, unsigned int *d_type) { int ndirents; - struct page *page; - void *data; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_dirent *de; struct erofs_qstr qn; @@ -181,26 +181,20 @@ int erofs_namei(struct inode *dir, qn.end = name->name + name->len; ndirents = 0; - page = find_target_block_classic(dir, &qn, &ndirents); - if (IS_ERR(page)) - return PTR_ERR(page); + de = find_target_block_classic(&buf, dir, &qn, &ndirents); + if (IS_ERR(de)) + return PTR_ERR(de); - data = kmap_atomic(page); /* the target page has been mapped */ if (ndirents) - de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); - else - de = (struct erofs_dirent *)data; + de = find_target_dirent(&qn, (u8 *)de, EROFS_BLKSIZ, ndirents); if (!IS_ERR(de)) { *nid = le64_to_cpu(de->nid); *d_type = de->file_type; } - - kunmap_atomic(data); - put_page(page); - + erofs_put_metabuf(&buf); return PTR_ERR_OR_ZERO(de); } diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 915eefe0d7e2..e178100c162a 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -281,21 +281,19 @@ static int erofs_init_devices(struct super_block *sb, static int erofs_read_superblock(struct super_block *sb) { struct erofs_sb_info *sbi; - struct page *page; + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; unsigned int blkszbits; void *data; int ret; - page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL); - if (IS_ERR(page)) { + data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); + if (IS_ERR(data)) { erofs_err(sb, "cannot read erofs superblock"); - return PTR_ERR(page); + return PTR_ERR(data); } sbi = EROFS_SB(sb); - - data = kmap(page); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); ret = -EINVAL; @@ -365,8 +363,7 @@ static int erofs_read_superblock(struct super_block *sb) if (erofs_sb_has_ztailpacking(sbi)) erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); out: - kunmap(page); - put_page(page); + erofs_put_metabuf(&buf); return ret; } @@ -535,6 +532,11 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask) return ret; } +/* + * It will be called only on inode eviction. In case that there are still some + * decompression requests in progress, wait with rescheduling for a bit here. + * We could introduce an extra locking instead but it seems unnecessary. + */ static void erofs_managed_cache_invalidatepage(struct page *page, unsigned int offset, unsigned int length) @@ -568,8 +570,7 @@ static int erofs_init_managed_cache(struct super_block *sb) inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &managed_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, - GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->managed_cache = inode; return 0; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index dac252bc9228..f3babf1e6608 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -221,9 +221,11 @@ void erofs_unregister_sysfs(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); - kobject_del(&sbi->s_kobj); - kobject_put(&sbi->s_kobj); - wait_for_completion(&sbi->s_kobj_unregister); + if (sbi->s_kobj.state_in_sysfs) { + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + } } int __init erofs_init_sysfs(void) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 27d42ffdafd2..0ed880f42525 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -192,7 +192,10 @@ enum z_erofs_collectmode { COLLECT_PRIMARY_FOLLOWED, }; -struct z_erofs_collector { +struct z_erofs_decompress_frontend { + struct inode *const inode; + struct erofs_map_blocks map; + struct z_erofs_pagevec_ctor vector; struct z_erofs_pcluster *pcl, *tailpcl; @@ -202,13 +205,6 @@ struct z_erofs_collector { z_erofs_next_pcluster_t owned_head; enum z_erofs_collectmode mode; -}; - -struct z_erofs_decompress_frontend { - struct inode *const inode; - - struct z_erofs_collector clt; - struct erofs_map_blocks map; bool readahead; /* used for applying cache strategy on the fly */ @@ -216,30 +212,30 @@ struct z_erofs_decompress_frontend { erofs_off_t headoffset; }; -#define COLLECTOR_INIT() { \ - .owned_head = Z_EROFS_PCLUSTER_TAIL, \ - .mode = COLLECT_PRIMARY_FOLLOWED } - #define DECOMPRESS_FRONTEND_INIT(__i) { \ - .inode = __i, .clt = COLLECTOR_INIT(), \ - .backmost = true, } + .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ + .mode = COLLECT_PRIMARY_FOLLOWED } static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES]; static DEFINE_MUTEX(z_pagemap_global_lock); -static void preload_compressed_pages(struct z_erofs_collector *clt, - struct address_space *mc, - enum z_erofs_cache_alloctype type, - struct page **pagepool) +static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe, + enum z_erofs_cache_alloctype type, + struct page **pagepool) { - struct z_erofs_pcluster *pcl = clt->pcl; + struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); + struct z_erofs_pcluster *pcl = fe->pcl; bool standalone = true; + /* + * optimistic allocation without direct reclaim since inplace I/O + * can be used if low memory otherwise. + */ gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; struct page **pages; pgoff_t index; - if (clt->mode < COLLECT_PRIMARY_FOLLOWED) + if (fe->mode < COLLECT_PRIMARY_FOLLOWED) return; pages = pcl->compressed_pages; @@ -288,7 +284,7 @@ static void preload_compressed_pages(struct z_erofs_collector *clt, * managed cache since it can be moved to the bypass queue instead. */ if (standalone) - clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; + fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; } /* called by erofs_shrinker to get rid of all compressed_pages */ @@ -350,47 +346,47 @@ int erofs_try_to_free_cached_page(struct page *page) } /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ -static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, +static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, struct page *page) { - struct z_erofs_pcluster *const pcl = clt->pcl; + struct z_erofs_pcluster *const pcl = fe->pcl; - while (clt->icpage_ptr > pcl->compressed_pages) - if (!cmpxchg(--clt->icpage_ptr, NULL, page)) + while (fe->icpage_ptr > pcl->compressed_pages) + if (!cmpxchg(--fe->icpage_ptr, NULL, page)) return true; return false; } /* callers must be with collection lock held */ -static int z_erofs_attach_page(struct z_erofs_collector *clt, +static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, struct page *page, enum z_erofs_page_type type, bool pvec_safereuse) { int ret; /* give priority for inplaceio */ - if (clt->mode >= COLLECT_PRIMARY && + if (fe->mode >= COLLECT_PRIMARY && type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && - z_erofs_try_inplace_io(clt, page)) + z_erofs_try_inplace_io(fe, page)) return 0; - ret = z_erofs_pagevec_enqueue(&clt->vector, page, type, + ret = z_erofs_pagevec_enqueue(&fe->vector, page, type, pvec_safereuse); - clt->cl->vcnt += (unsigned int)ret; + fe->cl->vcnt += (unsigned int)ret; return ret ? 0 : -EAGAIN; } -static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) +static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) { - struct z_erofs_pcluster *pcl = clt->pcl; - z_erofs_next_pcluster_t *owned_head = &clt->owned_head; + struct z_erofs_pcluster *pcl = f->pcl; + z_erofs_next_pcluster_t *owned_head = &f->owned_head; /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, *owned_head) == Z_EROFS_PCLUSTER_NIL) { *owned_head = &pcl->next; /* so we can attach this pcluster to our submission chain. */ - clt->mode = COLLECT_PRIMARY_FOLLOWED; + f->mode = COLLECT_PRIMARY_FOLLOWED; return; } @@ -401,24 +397,24 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, *owned_head) == Z_EROFS_PCLUSTER_TAIL) { *owned_head = Z_EROFS_PCLUSTER_TAIL; - clt->mode = COLLECT_PRIMARY_HOOKED; - clt->tailpcl = NULL; + f->mode = COLLECT_PRIMARY_HOOKED; + f->tailpcl = NULL; return; } /* type 3, it belongs to a chain, but it isn't the end of the chain */ - clt->mode = COLLECT_PRIMARY; + f->mode = COLLECT_PRIMARY; } -static int z_erofs_lookup_collection(struct z_erofs_collector *clt, +static int z_erofs_lookup_collection(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { - struct z_erofs_pcluster *pcl = clt->pcl; + struct z_erofs_pcluster *pcl = fe->pcl; struct z_erofs_collection *cl; unsigned int length; /* to avoid unexpected loop formed by corrupted images */ - if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) { + if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) { DBG_BUGON(1); return -EFSCORRUPTED; } @@ -449,15 +445,15 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt, } mutex_lock(&cl->lock); /* used to check tail merging loop due to corrupted images */ - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) - clt->tailpcl = pcl; + if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) + fe->tailpcl = pcl; - z_erofs_try_to_claim_pcluster(clt); - clt->cl = cl; + z_erofs_try_to_claim_pcluster(fe); + fe->cl = cl; return 0; } -static int z_erofs_register_collection(struct z_erofs_collector *clt, +static int z_erofs_register_collection(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { @@ -485,8 +481,8 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, Z_EROFS_PCLUSTER_FULL_LENGTH : 0); /* new pclusters should be claimed as type 1, primary and followed */ - pcl->next = clt->owned_head; - clt->mode = COLLECT_PRIMARY_FOLLOWED; + pcl->next = fe->owned_head; + fe->mode = COLLECT_PRIMARY_FOLLOWED; cl = z_erofs_primarycollection(pcl); cl->pageofs = map->m_la & ~PAGE_MASK; @@ -512,18 +508,18 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, } if (grp != &pcl->obj) { - clt->pcl = container_of(grp, + fe->pcl = container_of(grp, struct z_erofs_pcluster, obj); err = -EEXIST; goto err_out; } } /* used to check tail merging loop due to corrupted images */ - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) - clt->tailpcl = pcl; - clt->owned_head = &pcl->next; - clt->pcl = pcl; - clt->cl = cl; + if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL) + fe->tailpcl = pcl; + fe->owned_head = &pcl->next; + fe->pcl = pcl; + fe->cl = cl; return 0; err_out: @@ -532,18 +528,18 @@ err_out: return err; } -static int z_erofs_collector_begin(struct z_erofs_collector *clt, +static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe, struct inode *inode, struct erofs_map_blocks *map) { struct erofs_workgroup *grp; int ret; - DBG_BUGON(clt->cl); + DBG_BUGON(fe->cl); /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */ - DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL); - DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); + DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); + DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED); if (map->m_flags & EROFS_MAP_META) { if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) { @@ -555,28 +551,28 @@ static int z_erofs_collector_begin(struct z_erofs_collector *clt, grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT); if (grp) { - clt->pcl = container_of(grp, struct z_erofs_pcluster, obj); + fe->pcl = container_of(grp, struct z_erofs_pcluster, obj); } else { tailpacking: - ret = z_erofs_register_collection(clt, inode, map); + ret = z_erofs_register_collection(fe, inode, map); if (!ret) goto out; if (ret != -EEXIST) return ret; } - ret = z_erofs_lookup_collection(clt, inode, map); + ret = z_erofs_lookup_collection(fe, inode, map); if (ret) { - erofs_workgroup_put(&clt->pcl->obj); + erofs_workgroup_put(&fe->pcl->obj); return ret; } out: - z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, - clt->cl->pagevec, clt->cl->vcnt); + z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS, + fe->cl->pagevec, fe->cl->vcnt); /* since file-backed online pages are traversed in reverse order */ - clt->icpage_ptr = clt->pcl->compressed_pages + - z_erofs_pclusterpages(clt->pcl); + fe->icpage_ptr = fe->pcl->compressed_pages + + z_erofs_pclusterpages(fe->pcl); return 0; } @@ -610,24 +606,24 @@ static void z_erofs_collection_put(struct z_erofs_collection *cl) erofs_workgroup_put(&pcl->obj); } -static bool z_erofs_collector_end(struct z_erofs_collector *clt) +static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe) { - struct z_erofs_collection *cl = clt->cl; + struct z_erofs_collection *cl = fe->cl; if (!cl) return false; - z_erofs_pagevec_ctor_exit(&clt->vector, false); + z_erofs_pagevec_ctor_exit(&fe->vector, false); mutex_unlock(&cl->lock); /* * if all pending pages are added, don't hold its reference * any longer if the pcluster isn't hosted by ourselves. */ - if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) + if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE) z_erofs_collection_put(cl); - clt->cl = NULL; + fe->cl = NULL; return true; } @@ -651,7 +647,6 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, struct inode *const inode = fe->inode; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct erofs_map_blocks *const map = &fe->map; - struct z_erofs_collector *const clt = &fe->clt; const loff_t offset = page_offset(page); bool tight = true; @@ -672,7 +667,7 @@ repeat: if (offset + cur >= map->m_la && offset + cur < map->m_la + map->m_llen) { /* didn't get a valid collection previously (very rare) */ - if (!clt->cl) + if (!fe->cl) goto restart_now; goto hitted; } @@ -680,7 +675,7 @@ repeat: /* go ahead the next map_blocks */ erofs_dbg("%s: [out-of-range] pos %llu", __func__, offset + cur); - if (z_erofs_collector_end(clt)) + if (z_erofs_collector_end(fe)) fe->backmost = false; map->m_la = offset + cur; @@ -693,11 +688,11 @@ restart_now: if (!(map->m_flags & EROFS_MAP_MAPPED)) goto hitted; - err = z_erofs_collector_begin(clt, inode, map); + err = z_erofs_collector_begin(fe, inode, map); if (err) goto err_out; - if (z_erofs_is_inline_pcluster(clt->pcl)) { + if (z_erofs_is_inline_pcluster(fe->pcl)) { void *mp; mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb, @@ -709,20 +704,18 @@ restart_now: goto err_out; } get_page(fe->map.buf.page); - WRITE_ONCE(clt->pcl->compressed_pages[0], fe->map.buf.page); - clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; + WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page); + fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; } else { - /* preload all compressed pages (can change mode if needed) */ + /* bind cache first when cached decompression is preferred */ if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la)) cache_strategy = TRYALLOC; else cache_strategy = DONTALLOC; - preload_compressed_pages(clt, MNGD_MAPPING(sbi), - cache_strategy, pagepool); + z_erofs_bind_cache(fe, cache_strategy, pagepool); } - hitted: /* * Ensure the current partial page belongs to this submit chain rather @@ -730,8 +723,8 @@ hitted: * those chains are handled asynchronously thus the page cannot be used * for inplace I/O or pagevec (should be processed in strict order.) */ - tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && - clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); + tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED && + fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); cur = end - min_t(unsigned int, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { @@ -746,18 +739,18 @@ hitted: Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); if (cur) - tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED); + tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED); retry: - err = z_erofs_attach_page(clt, page, page_type, - clt->mode >= COLLECT_PRIMARY_FOLLOWED); + err = z_erofs_attach_page(fe, page, page_type, + fe->mode >= COLLECT_PRIMARY_FOLLOWED); /* should allocate an additional short-lived page for pagevec */ if (err == -EAGAIN) { struct page *const newpage = alloc_page(GFP_NOFS | __GFP_NOFAIL); set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE); - err = z_erofs_attach_page(clt, newpage, + err = z_erofs_attach_page(fe, newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); if (!err) goto retry; @@ -773,7 +766,7 @@ retry: /* bump up the number of spiltted parts of a page */ ++spiltted; /* also update nr_pages */ - clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1); + fe->cl->nr_pages = max_t(pgoff_t, fe->cl->nr_pages, index + 1); next_part: /* can be used for verification */ map->m_llen = offset + cur - map->m_la; @@ -1098,10 +1091,10 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, unsigned int nr, struct page **pagepool, - struct address_space *mc, - gfp_t gfp) + struct address_space *mc) { const pgoff_t index = pcl->obj.index; + gfp_t gfp = mapping_gfp_mask(mc); bool tocache = false; struct address_space *mapping; @@ -1309,7 +1302,7 @@ static void z_erofs_submit_queue(struct super_block *sb, z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; void *bi_private; - z_erofs_next_pcluster_t owned_head = f->clt.owned_head; + z_erofs_next_pcluster_t owned_head = f->owned_head; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ pgoff_t last_index; struct block_device *last_bdev; @@ -1357,8 +1350,7 @@ static void z_erofs_submit_queue(struct super_block *sb, struct page *page; page = pickup_page_for_submission(pcl, i++, pagepool, - MNGD_MAPPING(sbi), - GFP_NOFS); + MNGD_MAPPING(sbi)); if (!page) continue; @@ -1416,7 +1408,7 @@ static void z_erofs_runqueue(struct super_block *sb, { struct z_erofs_decompressqueue io[NR_JOBQUEUES]; - if (f->clt.owned_head == Z_EROFS_PCLUSTER_TAIL) + if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) return; z_erofs_submit_queue(sb, f, pagepool, io, &force_fg); @@ -1516,7 +1508,7 @@ static int z_erofs_readpage(struct file *file, struct page *page) err = z_erofs_do_read_page(&f, page, &pagepool); z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false); - (void)z_erofs_collector_end(&f.clt); + (void)z_erofs_collector_end(&f); /* if some compressed cluster ready, need submit them anyway */ z_erofs_runqueue(inode->i_sb, &f, &pagepool, @@ -1566,7 +1558,7 @@ static void z_erofs_readahead(struct readahead_control *rac) put_page(page); } z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false); - (void)z_erofs_collector_end(&f.clt); + (void)z_erofs_collector_end(&f); z_erofs_runqueue(inode->i_sb, &f, &pagepool, z_erofs_get_sync_decompress_policy(sbi, nr_pages)); diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 361b1d6e4bf9..572f0b8151ba 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -431,48 +431,47 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned int lookback_distance) { struct erofs_inode *const vi = EROFS_I(m->inode); - struct erofs_map_blocks *const map = m->map; const unsigned int lclusterbits = vi->z_logical_clusterbits; - unsigned long lcn = m->lcn; - int err; - if (lcn < lookback_distance) { - erofs_err(m->inode->i_sb, - "bogus lookback distance @ nid %llu", vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } + while (m->lcn >= lookback_distance) { + unsigned long lcn = m->lcn - lookback_distance; + int err; - /* load extent head logical cluster if needed */ - lcn -= lookback_distance; - err = z_erofs_load_cluster_from_disk(m, lcn, false); - if (err) - return err; + /* load extent head logical cluster if needed */ + err = z_erofs_load_cluster_from_disk(m, lcn, false); + if (err) + return err; - switch (m->type) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - if (!m->delta[0]) { + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (!m->delta[0]) { + erofs_err(m->inode->i_sb, + "invalid lookback distance 0 @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + lookback_distance = m->delta[0]; + continue; + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: + m->headtype = m->type; + m->map->m_la = (lcn << lclusterbits) | m->clusterofs; + return 0; + default: erofs_err(m->inode->i_sb, - "invalid lookback distance 0 @ nid %llu", - vi->nid); + "unknown type %u @ lcn %lu of nid %llu", + m->type, lcn, vi->nid); DBG_BUGON(1); - return -EFSCORRUPTED; + return -EOPNOTSUPP; } - return z_erofs_extent_lookback(m, m->delta[0]); - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: - m->headtype = m->type; - map->m_la = (lcn << lclusterbits) | m->clusterofs; - break; - default: - erofs_err(m->inode->i_sb, - "unknown type %u @ lcn %lu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; } - return 0; + + erofs_err(m->inode->i_sb, "bogus lookback distance @ nid %llu", + vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; } static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, @@ -494,7 +493,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) || ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) && !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { - map->m_plen = 1 << lclusterbits; + map->m_plen = 1ULL << lclusterbits; return 0; } lcn = m->lcn + 1; @@ -540,7 +539,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, return -EFSCORRUPTED; } out: - map->m_plen = m->compressedlcs << lclusterbits; + map->m_plen = (u64)m->compressedlcs << lclusterbits; return 0; err_bonus_cblkcnt: erofs_err(m->inode->i_sb,