From 3b62f000c86ac7139f79912136b85eacf233b173 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Jan 2020 13:27:44 -0800 Subject: [PATCH 01/24] xfs: rename compat_time_t to old_time32_t The compat_time_t type has been removed everywhere else, as most users rely on old_time32_t for both native and compat mode handling of 32-bit time_t. Remove the last one in xfs. Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_ioctl32.c | 2 +- fs/xfs/xfs_ioctl32.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index c4c4f09113d3..a49bd80b2c3b 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -107,7 +107,7 @@ xfs_ioctl32_bstime_copyin( xfs_bstime_t *bstime, compat_xfs_bstime_t __user *bstime32) { - compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */ + old_time32_t sec32; /* tv_sec differs on 64 vs. 32 */ if (get_user(sec32, &bstime32->tv_sec) || get_user(bstime->tv_nsec, &bstime32->tv_nsec)) diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 8c7743cd490e..053de7d894cd 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -32,7 +32,7 @@ #endif typedef struct compat_xfs_bstime { - compat_time_t tv_sec; /* seconds */ + old_time32_t tv_sec; /* seconds */ __s32 tv_nsec; /* and nanoseconds */ } compat_xfs_bstime_t; From b8a0880a37e2f43aa3bcd147182e95a4ebd82279 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Jan 2020 13:27:45 -0800 Subject: [PATCH 02/24] xfs: quota: move to time64_t interfaces As a preparation for removing the 32-bit time_t type and all associated interfaces, change xfs to use time64_t and ktime_get_real_seconds() for the quota housekeeping. This avoids one difference between 32-bit and 64-bit kernels, raising the theoretical limit for the quota grace period to year 2106 on 32-bit instead of year 2038. Note that common user space tools using the XFS quotactl interface instead of the generic one still use the y2038 dates. To fix quotas properly, both the on-disk format and user space still need to be changed. Signed-off-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_dquot.c | 6 +++--- fs/xfs/xfs_qm.h | 6 +++--- fs/xfs/xfs_quotaops.c | 6 +++--- fs/xfs/xfs_trans_dquot.c | 8 +++++--- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 2bff21ca9d78..9cfd3209f52b 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -137,7 +137,7 @@ xfs_qm_adjust_dqtimers( (d->d_blk_hardlimit && (be64_to_cpu(d->d_bcount) > be64_to_cpu(d->d_blk_hardlimit)))) { - d->d_btimer = cpu_to_be32(get_seconds() + + d->d_btimer = cpu_to_be32(ktime_get_real_seconds() + mp->m_quotainfo->qi_btimelimit); } else { d->d_bwarns = 0; @@ -160,7 +160,7 @@ xfs_qm_adjust_dqtimers( (d->d_ino_hardlimit && (be64_to_cpu(d->d_icount) > be64_to_cpu(d->d_ino_hardlimit)))) { - d->d_itimer = cpu_to_be32(get_seconds() + + d->d_itimer = cpu_to_be32(ktime_get_real_seconds() + mp->m_quotainfo->qi_itimelimit); } else { d->d_iwarns = 0; @@ -183,7 +183,7 @@ xfs_qm_adjust_dqtimers( (d->d_rtb_hardlimit && (be64_to_cpu(d->d_rtbcount) > be64_to_cpu(d->d_rtb_hardlimit)))) { - d->d_rtbtimer = cpu_to_be32(get_seconds() + + d->d_rtbtimer = cpu_to_be32(ktime_get_real_seconds() + mp->m_quotainfo->qi_rtbtimelimit); } else { d->d_rtbwarns = 0; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 7823af39008b..4e57edca8bce 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -64,9 +64,9 @@ struct xfs_quotainfo { struct xfs_inode *qi_pquotaip; /* project quota inode */ struct list_lru qi_lru; int qi_dquots; - time_t qi_btimelimit; /* limit for blks timer */ - time_t qi_itimelimit; /* limit for inodes timer */ - time_t qi_rtbtimelimit;/* limit for rt blks timer */ + time64_t qi_btimelimit; /* limit for blks timer */ + time64_t qi_itimelimit; /* limit for inodes timer */ + time64_t qi_rtbtimelimit;/* limit for rt blks timer */ xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index c7de17deeae6..38669e827206 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -37,9 +37,9 @@ xfs_qm_fill_state( tstate->flags |= QCI_SYSFILE; tstate->blocks = ip->i_d.di_nblocks; tstate->nextents = ip->i_d.di_nextents; - tstate->spc_timelimit = q->qi_btimelimit; - tstate->ino_timelimit = q->qi_itimelimit; - tstate->rt_spc_timelimit = q->qi_rtbtimelimit; + tstate->spc_timelimit = (u32)q->qi_btimelimit; + tstate->ino_timelimit = (u32)q->qi_itimelimit; + tstate->rt_spc_timelimit = (u32)q->qi_rtbtimelimit; tstate->spc_warnlimit = q->qi_bwarnlimit; tstate->ino_warnlimit = q->qi_iwarnlimit; tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index a6fe2d8dc40f..d1b9869bc5fa 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -580,7 +580,7 @@ xfs_trans_dqresv( { xfs_qcnt_t hardlimit; xfs_qcnt_t softlimit; - time_t timer; + time64_t timer; xfs_qwarncnt_t warns; xfs_qwarncnt_t warnlimit; xfs_qcnt_t total_count; @@ -635,7 +635,8 @@ xfs_trans_dqresv( goto error_return; } if (softlimit && total_count > softlimit) { - if ((timer != 0 && get_seconds() > timer) || + if ((timer != 0 && + ktime_get_real_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTLONGWARN); @@ -662,7 +663,8 @@ xfs_trans_dqresv( goto error_return; } if (softlimit && total_count > softlimit) { - if ((timer != 0 && get_seconds() > timer) || + if ((timer != 0 && + ktime_get_real_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTLONGWARN); From 5a57c05b56b6eb2b4e3eb2a9f205e39e849325a1 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 2 Jan 2020 13:31:54 -0800 Subject: [PATCH 03/24] xfs: remove shadow variable in xfs_btree_lshift Sparse warns about a shadow variable in this function after the Fixed: commit added another int i; with larger scope. It's safe to remove the one with the smaller scope to fix this shadow, although the shadow itself is harmless. Fixes: 2c813ad66a72 ("xfs: support btrees with overlapping intervals for keys") Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_btree.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index e2cc98931552..b22c7e928eb1 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -2389,8 +2389,6 @@ xfs_btree_lshift( XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); if (level > 0) { /* It's a nonleaf. operate on keys and ptrs */ - int i; /* loop index */ - for (i = 0; i < rrecs; i++) { error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level); if (error) From 953aa9d136f53e226448dbd801a905c28f8071bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Jan 2020 15:25:37 -0800 Subject: [PATCH 04/24] xfs: clear kernel only flags in XFS_IOC_ATTRMULTI_BY_HANDLE Don't allow passing arbitrary flags as they change behavior including memory allocation that the call stack is not prepared for. Fixes: ddbca70cc45c ("xfs: allocate xattr buffer on demand") Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_attr.h | 7 +++++-- fs/xfs/xfs_ioctl.c | 2 ++ fs/xfs/xfs_ioctl32.c | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 94badfa1743e..91c2cb14276e 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -26,7 +26,7 @@ struct xfs_attr_list_context; *========================================================================*/ -#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ +#define ATTR_DONTFOLLOW 0x0001 /* -- ignored, from IRIX -- */ #define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ #define ATTR_TRUST 0x0004 /* -- unused, from IRIX -- */ #define ATTR_SECURE 0x0008 /* use attrs in security namespace */ @@ -37,7 +37,10 @@ struct xfs_attr_list_context; #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ #define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ -#define ATTR_ALLOC 0x8000 /* allocate xattr buffer on demand */ +#define ATTR_ALLOC 0x8000 /* [kernel] allocate xattr buffer on demand */ + +#define ATTR_KERNEL_FLAGS \ + (ATTR_KERNOTIME | ATTR_KERNOVAL | ATTR_INCOMPLETE | ATTR_ALLOC) #define XFS_ATTR_FLAGS \ { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 7b35d62ede9f..edfbdb8f85e2 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -462,6 +462,8 @@ xfs_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; + ops[i].am_error = strncpy_from_user((char *)attr_name, ops[i].am_attrname, MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index a49bd80b2c3b..20b3edc10f48 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -450,6 +450,8 @@ xfs_compat_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; + ops[i].am_error = strncpy_from_user((char *)attr_name, compat_ptr(ops[i].am_attrname), MAXNAMELEN); From 84fd081f8ae92e05ace47d24e58f26f7af2994c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Jan 2020 15:25:38 -0800 Subject: [PATCH 05/24] xfs: reject invalid flags combinations in XFS_IOC_ATTRMULTI_BY_HANDLE While the flags field in the ABI and the on-disk format allows for multiple namespace flags, that is a logically invalid combination that scrub complains about. Reject it at the ioctl level, as all other interface already get this right at higher levels. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_ioctl.c | 5 +++++ fs/xfs/xfs_ioctl32.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index edfbdb8f85e2..17b4a981be4d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -462,6 +462,11 @@ xfs_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { + if ((ops[i].am_flags & ATTR_ROOT) && + (ops[i].am_flags & ATTR_SECURE)) { + ops[i].am_error = -EINVAL; + continue; + } ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; ops[i].am_error = strncpy_from_user((char *)attr_name, diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 20b3edc10f48..769581a79c58 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -450,6 +450,11 @@ xfs_compat_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { + if ((ops[i].am_flags & ATTR_ROOT) && + (ops[i].am_flags & ATTR_SECURE)) { + ops[i].am_error = -EINVAL; + continue; + } ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; ops[i].am_error = strncpy_from_user((char *)attr_name, From 8cde9f259c7d91d05b12cc68993a9643d6b4e45f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Jan 2020 15:25:38 -0800 Subject: [PATCH 06/24] xfs: also remove cached ACLs when removing the underlying attr We should not just invalidate the ACL when setting the underlying attribute, but also when removing it. The ioctl interface gets that right, but the normal xattr inteface skipped the xfs_forget_acl due to an early return. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_xattr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 383f0203d103..2288f20ae282 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -74,10 +74,11 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused, if (flags & XATTR_REPLACE) xflags |= ATTR_REPLACE; - if (!value) - return xfs_attr_remove(ip, (unsigned char *)name, xflags); - error = xfs_attr_set(ip, (unsigned char *)name, + if (value) + error = xfs_attr_set(ip, (unsigned char *)name, (void *)value, size, xflags); + else + error = xfs_attr_remove(ip, (unsigned char *)name, xflags); if (!error) xfs_forget_acl(inode, name, xflags); From 780d29057781d986cd87dbbe232cd02876ad430f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Jan 2020 15:25:39 -0800 Subject: [PATCH 07/24] xfs: fix misuse of the XFS_ATTR_INCOMPLETE flag XFS_ATTR_INCOMPLETE is a flag in the on-disk attribute format, and thus in a different namespace as the ATTR_* flags in xfs_da_args.flags. Switch to using a XFS_DA_OP_INCOMPLETE flag in op_flags instead. Without this users might be able to inject this flag into operations using the attr by handle ioctl. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_attr.c | 2 +- fs/xfs/libxfs/xfs_attr_leaf.c | 4 ++-- fs/xfs/libxfs/xfs_da_btree.h | 4 +++- fs/xfs/libxfs/xfs_da_format.h | 2 -- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 0d7fcc983b3d..2368a1bfe7e8 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1007,7 +1007,7 @@ restart: * The INCOMPLETE flag means that we will find the "old" * attr, not the "new" one. */ - args->flags |= XFS_ATTR_INCOMPLETE; + args->op_flags |= XFS_DA_OP_INCOMPLETE; state = xfs_da_state_alloc(); state->args = args; state->mp = mp; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 08d4b10ae2d5..fed537a4353d 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -2403,8 +2403,8 @@ xfs_attr3_leaf_lookup_int( * If we are looking for INCOMPLETE entries, show only those. * If we are looking for complete entries, show only those. */ - if ((args->flags & XFS_ATTR_INCOMPLETE) != - (entry->flags & XFS_ATTR_INCOMPLETE)) { + if (!!(args->op_flags & XFS_DA_OP_INCOMPLETE) != + !!(entry->flags & XFS_ATTR_INCOMPLETE)) { continue; } if (entry->flags & XFS_ATTR_LOCAL) { diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index e16610d1c14f..0f4fbb0889ff 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -89,6 +89,7 @@ typedef struct xfs_da_args { #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ #define XFS_DA_OP_ALLOCVAL 0x0020 /* lookup to alloc buffer if found */ +#define XFS_DA_OP_INCOMPLETE 0x0040 /* lookup INCOMPLETE attr keys */ #define XFS_DA_OP_FLAGS \ { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ @@ -96,7 +97,8 @@ typedef struct xfs_da_args { { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \ - { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" } + { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" }, \ + { XFS_DA_OP_INCOMPLETE, "INCOMPLETE" } /* * Storage for holding state during Btree searches and split/join ops. diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 3dee33043e09..05615d1f4113 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -683,8 +683,6 @@ struct xfs_attr3_leafblock { /* * Flags used in the leaf_entry[i].flags field. - * NOTE: the INCOMPLETE bit must not collide with the flags bits specified - * on the system call, they are "or"ed together for various operations. */ #define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ #define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */ From d29f781c32b1d1366c8ac10be31dad1e1f39c336 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Tue, 7 Jan 2020 15:26:15 -0800 Subject: [PATCH 08/24] xfs: Remove all strlen in all xfs_attr_* functions for attr names. This helps to pre-simplify the extra handling of the null terminator in delayed operations which use memcpy rather than strlen. Later when we introduce parent pointers, attribute names will become binary, so strlen will not work at all. Removing uses of strlen now will help reduce complexities later Signed-off-by: Allison Collins Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_attr.c | 12 ++++++++---- fs/xfs/libxfs/xfs_attr.h | 8 +++++--- fs/xfs/xfs_acl.c | 11 +++++++---- fs/xfs/xfs_ioctl.c | 13 ++++++++++--- fs/xfs/xfs_iops.c | 6 ++++-- fs/xfs/xfs_xattr.c | 11 +++++++---- 6 files changed, 41 insertions(+), 20 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 2368a1bfe7e8..e6149720ce02 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -62,6 +62,7 @@ xfs_attr_args_init( struct xfs_da_args *args, struct xfs_inode *dp, const unsigned char *name, + size_t namelen, int flags) { @@ -74,7 +75,7 @@ xfs_attr_args_init( args->dp = dp; args->flags = flags; args->name = name; - args->namelen = strlen((const char *)name); + args->namelen = namelen; if (args->namelen >= MAXNAMELEN) return -EFAULT; /* match IRIX behaviour */ @@ -139,6 +140,7 @@ int xfs_attr_get( struct xfs_inode *ip, const unsigned char *name, + size_t namelen, unsigned char **value, int *valuelenp, int flags) @@ -154,7 +156,7 @@ xfs_attr_get( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - error = xfs_attr_args_init(&args, ip, name, flags); + error = xfs_attr_args_init(&args, ip, name, namelen, flags); if (error) return error; @@ -338,6 +340,7 @@ int xfs_attr_set( struct xfs_inode *dp, const unsigned char *name, + size_t namelen, unsigned char *value, int valuelen, int flags) @@ -353,7 +356,7 @@ xfs_attr_set( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - error = xfs_attr_args_init(&args, dp, name, flags); + error = xfs_attr_args_init(&args, dp, name, namelen, flags); if (error) return error; @@ -442,6 +445,7 @@ int xfs_attr_remove( struct xfs_inode *dp, const unsigned char *name, + size_t namelen, int flags) { struct xfs_mount *mp = dp->i_mount; @@ -453,7 +457,7 @@ xfs_attr_remove( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - error = xfs_attr_args_init(&args, dp, name, flags); + error = xfs_attr_args_init(&args, dp, name, namelen, flags); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 91c2cb14276e..4243b2272642 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -148,11 +148,13 @@ int xfs_attr_list_int(struct xfs_attr_list_context *); int xfs_inode_hasattr(struct xfs_inode *ip); int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args); int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, - unsigned char **value, int *valuelenp, int flags); + size_t namelen, unsigned char **value, int *valuelenp, + int flags); int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, - unsigned char *value, int valuelen, int flags); + size_t namelen, unsigned char *value, int valuelen, int flags); int xfs_attr_set_args(struct xfs_da_args *args); -int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); +int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, + size_t namelen, int flags); int xfs_attr_remove_args(struct xfs_da_args *args); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 91693fce34a8..cd743fad8478 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -145,7 +145,8 @@ xfs_get_acl(struct inode *inode, int type) * go out to the disk. */ len = XFS_ACL_MAX_SIZE(ip->i_mount); - error = xfs_attr_get(ip, ea_name, (unsigned char **)&xfs_acl, &len, + error = xfs_attr_get(ip, ea_name, strlen(ea_name), + (unsigned char **)&xfs_acl, &len, ATTR_ALLOC | ATTR_ROOT); if (error) { /* @@ -196,15 +197,17 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) len -= sizeof(struct xfs_acl_entry) * (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count); - error = xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, - len, ATTR_ROOT); + error = xfs_attr_set(ip, ea_name, strlen(ea_name), + (unsigned char *)xfs_acl, len, ATTR_ROOT); kmem_free(xfs_acl); } else { /* * A NULL ACL argument means we want to remove the ACL. */ - error = xfs_attr_remove(ip, ea_name, ATTR_ROOT); + error = xfs_attr_remove(ip, ea_name, + strlen(ea_name), + ATTR_ROOT); /* * If the attribute didn't exist to start with that's fine. diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 17b4a981be4d..d42de92cb283 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -357,6 +357,7 @@ xfs_attrmulti_attr_get( { unsigned char *kbuf; int error = -EFAULT; + size_t namelen; if (*len > XFS_XATTR_SIZE_MAX) return -EINVAL; @@ -364,7 +365,9 @@ xfs_attrmulti_attr_get( if (!kbuf) return -ENOMEM; - error = xfs_attr_get(XFS_I(inode), name, &kbuf, (int *)len, flags); + namelen = strlen(name); + error = xfs_attr_get(XFS_I(inode), name, namelen, &kbuf, (int *)len, + flags); if (error) goto out_kfree; @@ -386,6 +389,7 @@ xfs_attrmulti_attr_set( { unsigned char *kbuf; int error; + size_t namelen; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; @@ -396,7 +400,8 @@ xfs_attrmulti_attr_set( if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + namelen = strlen(name); + error = xfs_attr_set(XFS_I(inode), name, namelen, kbuf, len, flags); if (!error) xfs_forget_acl(inode, name, flags); kfree(kbuf); @@ -410,10 +415,12 @@ xfs_attrmulti_attr_remove( uint32_t flags) { int error; + size_t namelen; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; - error = xfs_attr_remove(XFS_I(inode), name, flags); + namelen = strlen(name); + error = xfs_attr_remove(XFS_I(inode), name, namelen, flags); if (!error) xfs_forget_acl(inode, name, flags); return error; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 8afe69ca188b..81f2f93caec0 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -50,8 +50,10 @@ xfs_initxattrs( int error = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - error = xfs_attr_set(ip, xattr->name, xattr->value, - xattr->value_len, ATTR_SECURE); + error = xfs_attr_set(ip, xattr->name, + strlen(xattr->name), + xattr->value, xattr->value_len, + ATTR_SECURE); if (error < 0) break; } diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 2288f20ae282..b0fedb543f97 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -24,6 +24,7 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, int xflags = handler->flags; struct xfs_inode *ip = XFS_I(inode); int error, asize = size; + size_t namelen = strlen(name); /* Convert Linux syscall to XFS internal ATTR flags */ if (!size) { @@ -31,7 +32,8 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, value = NULL; } - error = xfs_attr_get(ip, name, (unsigned char **)&value, &asize, xflags); + error = xfs_attr_get(ip, name, namelen, (unsigned char **)&value, + &asize, xflags); if (error) return error; return asize; @@ -67,6 +69,7 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused, int xflags = handler->flags; struct xfs_inode *ip = XFS_I(inode); int error; + size_t namelen = strlen(name); /* Convert Linux syscall to XFS internal ATTR flags */ if (flags & XATTR_CREATE) @@ -75,10 +78,10 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused, xflags |= ATTR_REPLACE; if (value) - error = xfs_attr_set(ip, (unsigned char *)name, - (void *)value, size, xflags); + error = xfs_attr_set(ip, name, namelen, (void *)value, size, + xflags); else - error = xfs_attr_remove(ip, (unsigned char *)name, xflags); + error = xfs_attr_remove(ip, name, namelen, xflags); if (!error) xfs_forget_acl(inode, name, xflags); From 7cb41b1d14e139f7f4247fc29af25e59139bc1ac Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 7 Jan 2020 16:11:30 -0800 Subject: [PATCH 09/24] xfs: remove bogus assertion when online repair isn't enabled We don't need to assert on !REPAIR in the stub version of xrep_calc_ag_resblks that is called when online repair hasn't been compiled into the kernel because none of the repair code will ever run. Reported-by: Eryu Guan Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/repair.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 60c61d7052a8..c3422403b169 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -75,7 +75,6 @@ static inline xfs_extlen_t xrep_calc_ag_resblks( struct xfs_scrub *sc) { - ASSERT(!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)); return 0; } From a5084865524dee1fe8ea1fee17c60b4369ad4f5e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Jan 2020 13:25:00 -0800 Subject: [PATCH 10/24] xfs: introduce XFS_MAX_FILEOFF Introduce a new #define for the maximum supported file block offset. We'll use this in the next patch to make it more obvious that we're doing some operation for all possible inode fork mappings after a given offset. We can't use ULLONG_MAX here because bunmapi uses that to detect when it's done. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_format.h | 7 +++++++ fs/xfs/xfs_reflink.c | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 1b7dcbae051c..77e9fa385980 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1540,6 +1540,13 @@ typedef struct xfs_bmdr_block { #define BMBT_BLOCKCOUNT_BITLEN 21 #define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1) +#define BMBT_BLOCKCOUNT_MASK ((1ULL << BMBT_BLOCKCOUNT_BITLEN) - 1) + +/* + * bmbt records have a file offset (block) field that is 54 bits wide, so this + * is the largest xfs_fileoff_t that we ever expect to see. + */ +#define XFS_MAX_FILEOFF (BMBT_STARTOFF_MASK + BMBT_BLOCKCOUNT_MASK) typedef struct xfs_bmbt_rec { __be64 l0, l1; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index de451235c4ee..7a6c94295b8a 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1457,7 +1457,8 @@ xfs_reflink_clear_inode_flag( * We didn't find any shared blocks so turn off the reflink flag. * First, get rid of any leftover CoW mappings. */ - error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, XFS_MAX_FILEOFF, + true); if (error) return error; From 4bbb04abb4ee2e1f7d65e52557ba1c4038ea43ed Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Jan 2020 13:20:13 -0800 Subject: [PATCH 11/24] xfs: truncate should remove all blocks, not just to the end of the page cache xfs_itruncate_extents_flags() is supposed to unmap every block in a file from EOF onwards. Oddly, it uses s_maxbytes as the upper limit to the bunmapi range, even though s_maxbytes reflects the highest offset the pagecache can support, not the highest offset that XFS supports. The result of this confusion is that if you create a 20T file on a 64-bit machine, mount the filesystem on a 32-bit machine, and remove the file, we leak everything above 16T. Fix this by capping the bunmapi request at the maximum possible block offset, not s_maxbytes. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 401da197f012..1309f25c0d2b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1518,7 +1518,6 @@ xfs_itruncate_extents_flags( struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp = *tpp; xfs_fileoff_t first_unmap_block; - xfs_fileoff_t last_block; xfs_filblks_t unmap_len; int error = 0; int done = 0; @@ -1541,21 +1540,22 @@ xfs_itruncate_extents_flags( * the end of the file (in a crash where the space is allocated * but the inode size is not yet updated), simply remove any * blocks which show up between the new EOF and the maximum - * possible file size. If the first block to be removed is - * beyond the maximum file size (ie it is the same as last_block), - * then there is nothing to do. + * possible file size. + * + * We have to free all the blocks to the bmbt maximum offset, even if + * the page cache can't scale that far. */ first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); - last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); - if (first_unmap_block == last_block) + if (first_unmap_block >= XFS_MAX_FILEOFF) { + WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF); return 0; + } - ASSERT(first_unmap_block < last_block); - unmap_len = last_block - first_unmap_block + 1; - while (!done) { + unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1; + while (unmap_len > 0) { ASSERT(tp->t_firstblock == NULLFSBLOCK); - error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags, - XFS_ITRUNC_MAX_EXTENTS, &done); + error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len, + flags, XFS_ITRUNC_MAX_EXTENTS); if (error) goto out; @@ -1575,7 +1575,7 @@ xfs_itruncate_extents_flags( if (whichfork == XFS_DATA_FORK) { /* Remove all pending CoW reservations. */ error = xfs_reflink_cancel_cow_blocks(ip, &tp, - first_unmap_block, last_block, true); + first_unmap_block, XFS_MAX_FILEOFF, true); if (error) goto out; From 932befe39ddea29cf47f4f1dc080d3dba668f0ca Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 Jan 2020 13:20:13 -0800 Subject: [PATCH 12/24] xfs: fix s_maxbytes computation on 32-bit kernels I observed a hang in generic/308 while running fstests on a i686 kernel. The hang occurred when trying to purge the pagecache on a large sparse file that had a page created past MAX_LFS_FILESIZE, which caused an integer overflow in the pagecache xarray and resulted in an infinite loop. I then noticed that Linus changed the definition of MAX_LFS_FILESIZE in commit 0cc3b0ec23ce ("Clarify (and fix) MAX_LFS_FILESIZE macros") so that it is now one page short of the maximum page index on 32-bit kernels. Because the XFS function to compute max offset open-codes the 2005-era MAX_LFS_FILESIZE computation and neither the vfs nor mm perform any sanity checking of s_maxbytes, the code in generic/308 can create a page above the pagecache's limit and kaboom. Fix all this by setting s_maxbytes to MAX_LFS_FILESIZE directly and aborting the mount with a warning if our assumptions ever break. I have no answer for why this seems to have been broken for years and nobody noticed. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_super.c | 48 ++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d9ae27ddf253..760901783944 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -193,32 +193,6 @@ xfs_fs_show_options( return 0; } -static uint64_t -xfs_max_file_offset( - unsigned int blockshift) -{ - unsigned int pagefactor = 1; - unsigned int bitshift = BITS_PER_LONG - 1; - - /* Figure out maximum filesize, on Linux this can depend on - * the filesystem blocksize (on 32 bit platforms). - * __block_write_begin does this in an [unsigned] long long... - * page->index << (PAGE_SHIFT - bbits) - * So, for page sized blocks (4K on 32 bit platforms), - * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is - * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1) - * but for smaller blocksizes it is less (bbits = log2 bsize). - */ - -#if BITS_PER_LONG == 32 - ASSERT(sizeof(sector_t) == 8); - pagefactor = PAGE_SIZE; - bitshift = BITS_PER_LONG; -#endif - - return (((uint64_t)pagefactor) << bitshift) - 1; -} - /* * Set parameters for inode allocation heuristics, taking into account * filesystem size and inode32/inode64 mount options; i.e. specifically @@ -1424,6 +1398,26 @@ xfs_fc_fill_super( if (error) goto out_free_sb; + /* + * XFS block mappings use 54 bits to store the logical block offset. + * This should suffice to handle the maximum file size that the VFS + * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT + * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes + * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON + * to check this assertion. + * + * Avoid integer overflow by comparing the maximum bmbt offset to the + * maximum pagecache offset in units of fs blocks. + */ + if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) { + xfs_warn(mp, +"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", + XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), + XFS_MAX_FILEOFF); + error = -EINVAL; + goto out_free_sb; + } + error = xfs_filestream_mount(mp); if (error) goto out_free_sb; @@ -1435,7 +1429,7 @@ xfs_fc_fill_super( sb->s_magic = XFS_SUPER_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; - sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; sb->s_time_min = S32_MIN; From ca78eee7b4ac13b63f5e872f7c3a5ca66b2df8da Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 14 Jan 2020 08:03:05 -0800 Subject: [PATCH 13/24] xfs: Add __packed to xfs_dir2_sf_entry_t definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xfs_check_ondisk_structs() verifies that the sizes of the data types used by xfs are correct via the XFS_CHECK_STRUCT_SIZE() macro. Since the structures padding can vary depending on the ABI (e.g. on ARM OABI structures are padded to multiple of 32 bits), it may happen that xfs_dir2_sf_entry_t size check breaks the compilation with the assertion below: In file included from linux/include/linux/string.h:6, from linux/include/linux/uuid.h:12, from linux/fs/xfs/xfs_linux.h:10, from linux/fs/xfs/xfs.h:22, from linux/fs/xfs/xfs_super.c:7: In function ‘xfs_check_ondisk_structs’, inlined from ‘init_xfs_fs’ at linux/fs/xfs/xfs_super.c:2025:2: linux/include/linux/compiler.h:350:38: error: call to ‘__compiletime_assert_107’ declared with attribute error: XFS: sizeof(xfs_dir2_sf_entry_t) is wrong, expected 3 _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) Restore the correct behavior adding __packed to the structure definition. Cc: Darrick J. Wong Suggested-by: Christoph Hellwig Signed-off-by: Vincenzo Frascino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_da_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 05615d1f4113..734837a9b51a 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -217,7 +217,7 @@ typedef struct xfs_dir2_sf_entry { * A 64-bit or 32-bit inode number follows here, at a variable offset * after the name. */ -} xfs_dir2_sf_entry_t; +} __packed xfs_dir2_sf_entry_t; static inline int xfs_dir2_sf_hdr_size(int i8count) { From 7b53b868a1812a9a6ab5e69249394bd37f29ce2c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Jan 2020 09:11:17 -0800 Subject: [PATCH 14/24] xfs: fix IOCB_NOWAIT handling in xfs_file_dio_aio_read Direct I/O reads can also be used with RWF_NOWAIT & co. Fix the inode locking in xfs_file_dio_aio_read to take IOCB_NOWAIT into account. Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c93250108952..b8a4a3f29b36 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -187,7 +187,12 @@ xfs_file_dio_aio_read( file_accessed(iocb->ki_filp); - xfs_ilock(ip, XFS_IOLOCK_SHARED); + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) + return -EAGAIN; + } else { + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, is_sync_kiocb(iocb)); xfs_iunlock(ip, XFS_IOLOCK_SHARED); From 8edbb26b06023de31ad7d4c9b984d99f66577929 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 8 Jan 2020 09:08:07 -0800 Subject: [PATCH 15/24] xfs: refactor remote attr value buffer invalidation Hoist the code that invalidates remote extended attribute value buffers into a separate helper function. This prepares us for a memory corruption fix in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_remote.c | 52 ++++++++++++++++++++------------- fs/xfs/libxfs/xfs_attr_remote.h | 2 ++ 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index a6ef5df42669..df1ab0569481 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -552,6 +552,33 @@ xfs_attr_rmtval_set( return 0; } +/* Mark stale any incore buffers for the remote value. */ +int +xfs_attr_rmtval_stale( + struct xfs_inode *ip, + struct xfs_bmbt_irec *map, + xfs_buf_flags_t incore_flags) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_buf *bp; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (XFS_IS_CORRUPT(mp, map->br_startblock == DELAYSTARTBLOCK) || + XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK)) + return -EFSCORRUPTED; + + bp = xfs_buf_incore(mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, map->br_startblock), + XFS_FSB_TO_BB(mp, map->br_blockcount), incore_flags); + if (bp) { + xfs_buf_stale(bp); + xfs_buf_relse(bp); + } + + return 0; +} + /* * Remove the value associated with an attribute by deleting the * out-of-line buffer that it is stored on. @@ -560,7 +587,6 @@ int xfs_attr_rmtval_remove( struct xfs_da_args *args) { - struct xfs_mount *mp = args->dp->i_mount; xfs_dablk_t lblkno; int blkcnt; int error; @@ -575,9 +601,6 @@ xfs_attr_rmtval_remove( blkcnt = args->rmtblkcnt; while (blkcnt > 0) { struct xfs_bmbt_irec map; - struct xfs_buf *bp; - xfs_daddr_t dblkno; - int dblkcnt; int nmap; /* @@ -588,22 +611,11 @@ xfs_attr_rmtval_remove( blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) return error; - ASSERT(nmap == 1); - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && - (map.br_startblock != HOLESTARTBLOCK)); - - dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), - dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - - /* - * If the "remote" value is in the cache, remove it. - */ - bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); - if (bp) { - xfs_buf_stale(bp); - xfs_buf_relse(bp); - bp = NULL; - } + if (XFS_IS_CORRUPT(args->dp->i_mount, nmap != 1)) + return -EFSCORRUPTED; + error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK); + if (error) + return error; lblkno += map.br_blockcount; blkcnt -= map.br_blockcount; diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index 9d20b66ad379..6fb4572845ce 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h @@ -11,5 +11,7 @@ int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen); int xfs_attr_rmtval_get(struct xfs_da_args *args); int xfs_attr_rmtval_set(struct xfs_da_args *args); int xfs_attr_rmtval_remove(struct xfs_da_args *args); +int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map, + xfs_buf_flags_t incore_flags); #endif /* __XFS_ATTR_REMOTE_H__ */ From e8db2aafcedb7d88320ab83f1000f1606b26d4d7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 7 Jan 2020 16:11:45 -0800 Subject: [PATCH 16/24] xfs: fix memory corruption during remote attr value buffer invalidation While running generic/103, I observed what looks like memory corruption and (with slub debugging turned on) a slub redzone warning on i386 when inactivating an inode with a 64k remote attr value. On a v5 filesystem, maximally sized remote attr values require one block more than 64k worth of space to hold both the remote attribute value header (64 bytes). On a 4k block filesystem this results in a 68k buffer; on a 64k block filesystem, this would be a 128k buffer. Note that even though we'll never use more than 65,600 bytes of this buffer, XFS_MAX_BLOCKSIZE is 64k. This is a problem because the definition of struct xfs_buf_log_format allows for XFS_MAX_BLOCKSIZE worth of dirty bitmap (64k). On i386 when we invalidate a remote attribute, xfs_trans_binval zeroes all 68k worth of the dirty map, writing right off the end of the log item and corrupting memory. We've gotten away with this on x86_64 for years because the compiler inserts a u32 padding on the end of struct xfs_buf_log_format. Fortunately for us, remote attribute values are written to disk with xfs_bwrite(), which is to say that they are not logged. Fix the problem by removing all places where we could end up creating a buffer log item for a remote attribute value and leave a note explaining why. Next, replace the open-coded buffer invalidation with a call to the helper we created in the previous patch that does better checking for bad metadata before marking the buffer stale. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_remote.c | 37 ++++++++++++++++++++---- fs/xfs/xfs_attr_inactive.c | 50 ++++++++++----------------------- 2 files changed, 46 insertions(+), 41 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index df1ab0569481..a266d05df146 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -25,6 +25,23 @@ #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ +/* + * Remote Attribute Values + * ======================= + * + * Remote extended attribute values are conceptually simple -- they're written + * to data blocks mapped by an inode's attribute fork, and they have an upper + * size limit of 64k. Setting a value does not involve the XFS log. + * + * However, on a v5 filesystem, maximally sized remote attr values require one + * block more than 64k worth of space to hold both the remote attribute value + * header (64 bytes). On a 4k block filesystem this results in a 68k buffer; + * on a 64k block filesystem, this would be a 128k buffer. Note that the log + * format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k). + * Therefore, we /must/ ensure that remote attribute value buffers never touch + * the logging system and therefore never have a log item. + */ + /* * Each contiguous block has a header, so it is not just a simple attribute * length to FSB conversion. @@ -401,17 +418,25 @@ xfs_attr_rmtval_get( (map[i].br_startblock != HOLESTARTBLOCK)); dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - error = xfs_trans_read_buf(mp, args->trans, - mp->m_ddev_targp, - dblkno, dblkcnt, 0, &bp, - &xfs_attr3_rmt_buf_ops); - if (error) + bp = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0, + &xfs_attr3_rmt_buf_ops); + if (!bp) + return -ENOMEM; + error = bp->b_error; + if (error) { + xfs_buf_ioerror_alert(bp, __func__); + xfs_buf_relse(bp); + + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; return error; + } error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, &offset, &valuelen, &dst); - xfs_trans_brelse(args->trans, bp); + xfs_buf_relse(bp); if (error) return error; diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 5ff49523d8ea..edb079087a0c 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -25,22 +25,20 @@ #include "xfs_error.h" /* - * Look at all the extents for this logical region, - * invalidate any buffers that are incore/in transactions. + * Invalidate any incore buffers associated with this remote attribute value + * extent. We never log remote attribute value buffers, which means that they + * won't be attached to a transaction and are therefore safe to mark stale. + * The actual bunmapi will be taken care of later. */ STATIC int -xfs_attr3_leaf_freextent( - struct xfs_trans **trans, +xfs_attr3_rmt_stale( struct xfs_inode *dp, xfs_dablk_t blkno, int blkcnt) { struct xfs_bmbt_irec map; - struct xfs_buf *bp; xfs_dablk_t tblkno; - xfs_daddr_t dblkno; int tblkcnt; - int dblkcnt; int nmap; int error; @@ -57,35 +55,19 @@ xfs_attr3_leaf_freextent( nmap = 1; error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); - if (error) { + if (error) return error; - } - ASSERT(nmap == 1); - ASSERT(map.br_startblock != DELAYSTARTBLOCK); + if (XFS_IS_CORRUPT(dp->i_mount, nmap != 1)) + return -EFSCORRUPTED; /* - * If it's a hole, these are already unmapped - * so there's nothing to invalidate. + * Mark any incore buffers for the remote value as stale. We + * never log remote attr value buffers, so the buffer should be + * easy to kill. */ - if (map.br_startblock != HOLESTARTBLOCK) { - - dblkno = XFS_FSB_TO_DADDR(dp->i_mount, - map.br_startblock); - dblkcnt = XFS_FSB_TO_BB(dp->i_mount, - map.br_blockcount); - bp = xfs_trans_get_buf(*trans, - dp->i_mount->m_ddev_targp, - dblkno, dblkcnt, 0); - if (!bp) - return -ENOMEM; - xfs_trans_binval(*trans, bp); - /* - * Roll to next transaction. - */ - error = xfs_trans_roll_inode(trans, dp); - if (error) - return error; - } + error = xfs_attr_rmtval_stale(dp, &map, 0); + if (error) + return error; tblkno += map.br_blockcount; tblkcnt -= map.br_blockcount; @@ -174,9 +156,7 @@ xfs_attr3_leaf_inactive( */ error = 0; for (lp = list, i = 0; i < count; i++, lp++) { - tmp = xfs_attr3_leaf_freextent(trans, dp, - lp->valueblk, lp->valuelen); - + tmp = xfs_attr3_rmt_stale(dp, lp->valueblk, lp->valuelen); if (error == 0) error = tmp; /* save only the 1st errno */ } From 0bb9d159bd018b271e783d3b2d3bc82fa0727321 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 14 Jan 2020 14:31:49 -0800 Subject: [PATCH 17/24] xfs: streamline xfs_attr3_leaf_inactive Now that we know we don't have to take a transaction to stale the incore buffers for a remote value, get rid of the unnecessary memory allocation in the leaf walker and call the rmt_stale function directly. Flatten the loop while we're at it. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr_leaf.h | 9 --- fs/xfs/xfs_attr_inactive.c | 107 ++++++++++------------------------ 2 files changed, 32 insertions(+), 84 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index f4a188e28b7b..73615b1dd1a8 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -39,15 +39,6 @@ struct xfs_attr3_icleaf_hdr { } freemap[XFS_ATTR_LEAF_MAPSIZE]; }; -/* - * Used to keep a list of "remote value" extents when unlinking an inode. - */ -typedef struct xfs_attr_inactive_list { - xfs_dablk_t valueblk; /* block number of value bytes */ - int valuelen; /* number of bytes in value */ -} xfs_attr_inactive_list_t; - - /*======================================================================== * Function prototypes for the kernel. *========================================================================*/ diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index edb079087a0c..c75840a9e478 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -37,8 +37,6 @@ xfs_attr3_rmt_stale( int blkcnt) { struct xfs_bmbt_irec map; - xfs_dablk_t tblkno; - int tblkcnt; int nmap; int error; @@ -46,14 +44,12 @@ xfs_attr3_rmt_stale( * Roll through the "value", invalidating the attribute value's * blocks. */ - tblkno = blkno; - tblkcnt = blkcnt; - while (tblkcnt > 0) { + while (blkcnt > 0) { /* * Try to remember where we decided to put the value. */ nmap = 1; - error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt, + error = xfs_bmapi_read(dp, (xfs_fileoff_t)blkno, blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) return error; @@ -69,8 +65,8 @@ xfs_attr3_rmt_stale( if (error) return error; - tblkno += map.br_blockcount; - tblkcnt -= map.br_blockcount; + blkno += map.br_blockcount; + blkcnt -= map.br_blockcount; } return 0; @@ -84,84 +80,45 @@ xfs_attr3_rmt_stale( */ STATIC int xfs_attr3_leaf_inactive( - struct xfs_trans **trans, - struct xfs_inode *dp, - struct xfs_buf *bp) + struct xfs_trans **trans, + struct xfs_inode *dp, + struct xfs_buf *bp) { - struct xfs_attr_leafblock *leaf; - struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_attr_leaf_entry *entry; + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_mount *mp = bp->b_mount; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_attr_leaf_entry *entry; struct xfs_attr_leaf_name_remote *name_rmt; - struct xfs_attr_inactive_list *list; - struct xfs_attr_inactive_list *lp; - int error; - int count; - int size; - int tmp; - int i; - struct xfs_mount *mp = bp->b_mount; + int error; + int i; - leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); /* - * Count the number of "remote" value extents. + * Find the remote value extents for this leaf and invalidate their + * incore buffers. */ - count = 0; entry = xfs_attr3_leaf_entryp(leaf); for (i = 0; i < ichdr.count; entry++, i++) { - if (be16_to_cpu(entry->nameidx) && - ((entry->flags & XFS_ATTR_LOCAL) == 0)) { - name_rmt = xfs_attr3_leaf_name_remote(leaf, i); - if (name_rmt->valueblk) - count++; - } + int blkcnt; + + if (!entry->nameidx || (entry->flags & XFS_ATTR_LOCAL)) + continue; + + name_rmt = xfs_attr3_leaf_name_remote(leaf, i); + if (!name_rmt->valueblk) + continue; + + blkcnt = xfs_attr3_rmt_blocks(dp->i_mount, + be32_to_cpu(name_rmt->valuelen)); + error = xfs_attr3_rmt_stale(dp, + be32_to_cpu(name_rmt->valueblk), blkcnt); + if (error) + goto err; } - /* - * If there are no "remote" values, we're done. - */ - if (count == 0) { - xfs_trans_brelse(*trans, bp); - return 0; - } - - /* - * Allocate storage for a list of all the "remote" value extents. - */ - size = count * sizeof(xfs_attr_inactive_list_t); - list = kmem_alloc(size, 0); - - /* - * Identify each of the "remote" value extents. - */ - lp = list; - entry = xfs_attr3_leaf_entryp(leaf); - for (i = 0; i < ichdr.count; entry++, i++) { - if (be16_to_cpu(entry->nameidx) && - ((entry->flags & XFS_ATTR_LOCAL) == 0)) { - name_rmt = xfs_attr3_leaf_name_remote(leaf, i); - if (name_rmt->valueblk) { - lp->valueblk = be32_to_cpu(name_rmt->valueblk); - lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount, - be32_to_cpu(name_rmt->valuelen)); - lp++; - } - } - } - xfs_trans_brelse(*trans, bp); /* unlock for trans. in freextent() */ - - /* - * Invalidate each of the "remote" value extents. - */ - error = 0; - for (lp = list, i = 0; i < count; i++, lp++) { - tmp = xfs_attr3_rmt_stale(dp, lp->valueblk, lp->valuelen); - if (error == 0) - error = tmp; /* save only the 1st errno */ - } - - kmem_free(list); + xfs_trans_brelse(*trans, bp); +err: return error; } From c64dd49b5112215730db8a2c3ac38c2e03b09e73 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 8 Jan 2020 09:21:22 -0800 Subject: [PATCH 18/24] xfs: clean up xfs_buf_item_get_format return value The only thing that can cause a nonzero return from xfs_buf_item_get_format is if the kmem_alloc fails, which it can't. Get rid of all the unnecessary error handling. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_buf_item.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 3984779e5911..9737f177a49b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -688,7 +688,7 @@ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_push = xfs_buf_item_push, }; -STATIC int +STATIC void xfs_buf_item_get_format( struct xfs_buf_log_item *bip, int count) @@ -698,14 +698,11 @@ xfs_buf_item_get_format( if (count == 1) { bip->bli_formats = &bip->__bli_format; - return 0; + return; } bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), 0); - if (!bip->bli_formats) - return -ENOMEM; - return 0; } STATIC void @@ -731,7 +728,6 @@ xfs_buf_item_init( struct xfs_buf_log_item *bip = bp->b_log_item; int chunks; int map_size; - int error; int i; /* @@ -760,13 +756,7 @@ xfs_buf_item_init( * Discontiguous buffer support follows the layout of the underlying * buffer. This makes the implementation as simple as possible. */ - error = xfs_buf_item_get_format(bip, bp->b_map_count); - ASSERT(error == 0); - if (error) { /* to stop gcc throwing set-but-unused warnings */ - kmem_cache_free(xfs_buf_item_zone, bip); - return error; - } - + xfs_buf_item_get_format(bip, bp->b_map_count); for (i = 0; i < bip->bli_format_count; i++) { chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len), From c3d5f0c2fb85351a1017b23692d3d6ab561b1f32 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 7 Jan 2020 16:12:24 -0800 Subject: [PATCH 19/24] xfs: complain if anyone tries to create a too-large buffer log item Complain if someone calls xfs_buf_item_init on a buffer that is larger than the dirty bitmap can handle, or tries to log a region that's past the end of the dirty bitmap. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_buf_item.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 9737f177a49b..be691d1d9fad 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -763,6 +763,15 @@ xfs_buf_item_init( XFS_BLF_CHUNK); map_size = DIV_ROUND_UP(chunks, NBWORD); + if (map_size > XFS_BLF_DATAMAP_SIZE) { + kmem_cache_free(xfs_buf_item_zone, bip); + xfs_err(mp, + "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!", + map_size, + BBTOB(bp->b_maps[i].bm_len)); + return -EFSCORRUPTED; + } + bip->bli_formats[i].blf_type = XFS_LI_BUF; bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn; bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len; @@ -795,6 +804,9 @@ xfs_buf_item_log_segment( uint end_bit; uint mask; + ASSERT(first < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD); + ASSERT(last < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD); + /* * Convert byte offsets to bit numbers. */ From b7df5e92055c69666e3c82f31f193120d98f04e3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 7 Jan 2020 16:12:25 -0800 Subject: [PATCH 20/24] xfs: make struct xfs_buf_log_format have a consistent size Increase XFS_BLF_DATAMAP_SIZE by 1 to fill in the implied padding at the end of struct xfs_buf_log_format. This makes the size consistent so that we can check it in xfs_ondisk.h, and will be needed once we start logging attribute values. On amd64 we get the following pahole: struct xfs_buf_log_format { short unsigned int blf_type; /* 0 2 */ short unsigned int blf_size; /* 2 2 */ short unsigned int blf_flags; /* 4 2 */ short unsigned int blf_len; /* 6 2 */ long long int blf_blkno; /* 8 8 */ unsigned int blf_map_size; /* 16 4 */ unsigned int blf_data_map[16]; /* 20 64 */ /* --- cacheline 1 boundary (64 bytes) was 20 bytes ago --- */ /* size: 88, cachelines: 2, members: 7 */ /* padding: 4 */ /* last cacheline: 24 bytes */ }; But on i386 we get the following: struct xfs_buf_log_format { short unsigned int blf_type; /* 0 2 */ short unsigned int blf_size; /* 2 2 */ short unsigned int blf_flags; /* 4 2 */ short unsigned int blf_len; /* 6 2 */ long long int blf_blkno; /* 8 8 */ unsigned int blf_map_size; /* 16 4 */ unsigned int blf_data_map[16]; /* 20 64 */ /* --- cacheline 1 boundary (64 bytes) was 20 bytes ago --- */ /* size: 84, cachelines: 2, members: 7 */ /* last cacheline: 20 bytes */ }; Notice how the amd64 compiler inserts 4 bytes of padding to the end of the structure to ensure 8-byte alignment. Prior to "xfs: fix memory corruption during remote attr value buffer invalidation" we would try to write to blf_data_map[17], which is harmless on amd64 but really bad on i386. This shouldn't cause any changes in the ondisk logging formats because the log code writes out the log vectors with the appropriate size for the log item's map_size, and log recovery treats the data_map array as a VLA. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_log_format.h | 17 +++++++++++++---- fs/xfs/xfs_ondisk.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 8ef31d71a9c7..9bac0d2e56dc 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -462,11 +462,20 @@ static inline uint xfs_log_dinode_size(int version) #define XFS_BLF_GDQUOT_BUF (1<<4) /* - * This is the structure used to lay out a buf log item in the - * log. The data map describes which 128 byte chunks of the buffer - * have been logged. + * This is the structure used to lay out a buf log item in the log. The data + * map describes which 128 byte chunks of the buffer have been logged. + * + * The placement of blf_map_size causes blf_data_map to start at an odd + * multiple of sizeof(unsigned int) offset within the struct. Because the data + * bitmap size will always be an even number, the end of the data_map (and + * therefore the structure) will also be at an odd multiple of sizeof(unsigned + * int). Some 64-bit compilers will insert padding at the end of the struct to + * ensure 64-bit alignment of blf_blkno, but 32-bit ones will not. Therefore, + * XFS_BLF_DATAMAP_SIZE must be an odd number to make the padding explicit and + * keep the structure size consistent between 32-bit and 64-bit platforms. */ -#define XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) +#define __XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) +#define XFS_BLF_DATAMAP_SIZE (__XFS_BLF_DATAMAP_SIZE + 1) typedef struct xfs_buf_log_format { unsigned short blf_type; /* buf log item type indicator */ diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index b6701b4f59a9..5f04d8a5ab2a 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -111,6 +111,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10); /* log structures */ + XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24); XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28); XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32); From 8a6453a89dc10269adb1fc0cd0ceba928d557c21 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 13 Jan 2020 16:33:46 -0800 Subject: [PATCH 21/24] xfs: check log iovec size to make sure it's plausibly a buffer log format When log recovery is processing buffer log items, we should check that the incoming iovec actually describes a region of memory large enough to contain the log format and the dirty map. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_buf_item.c | 17 +++++++++++++++++ fs/xfs/xfs_buf_item.h | 1 + fs/xfs/xfs_log_recover.c | 6 ++++++ 3 files changed, 24 insertions(+) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index be691d1d9fad..5be8973a452c 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -27,6 +27,23 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); +/* Is this log iovec plausibly large enough to contain the buffer log format? */ +bool +xfs_buf_log_check_iovec( + struct xfs_log_iovec *iovec) +{ + struct xfs_buf_log_format *blfp = iovec->i_addr; + char *bmp_end; + char *item_end; + + if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len) + return false; + + item_end = (char *)iovec->i_addr + iovec->i_len; + bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size]; + return bmp_end <= item_end; +} + static inline int xfs_buf_log_format_size( struct xfs_buf_log_format *blfp) diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 4a054b11011a..30114b510332 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -61,6 +61,7 @@ void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, struct list_head *); +bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); extern kmem_zone_t *xfs_buf_item_zone; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 99ec3fba4548..0d683fb96396 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1934,6 +1934,12 @@ xlog_recover_buffer_pass1( struct list_head *bucket; struct xfs_buf_cancel *bcp; + if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) { + xfs_err(log->l_mp, "bad buffer log item size (%d)", + item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } + /* * If this isn't a cancel buffer item, then just return. */ From aa124436f329cc23e88ce67c81b5f6b7f4930529 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Mon, 20 Jan 2020 14:34:47 -0800 Subject: [PATCH 22/24] xfs: change return value of xfs_inode_need_cow to int Fixes coccicheck warning: fs/xfs/xfs_reflink.c:236:9-10: WARNING: return of 0/1 in function 'xfs_inode_need_cow' with return type bool Reported-by: Hulk Robot Signed-off-by: zhengbin [darrick: rename the function so it doesn't sound like a predicate] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 2 +- fs/xfs/xfs_reflink.c | 6 +++--- fs/xfs/xfs_reflink.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 28e2d1f37267..bb590a267a7f 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -923,7 +923,7 @@ xfs_buffered_write_iomap_begin( xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_inode_need_cow(ip, &imap, &shared); + error = xfs_bmap_trim_cow(ip, &imap, &shared); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 7a6c94295b8a..e723b267a247 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -223,8 +223,8 @@ xfs_reflink_trim_around_shared( } } -bool -xfs_inode_need_cow( +int +xfs_bmap_trim_cow( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared) @@ -327,7 +327,7 @@ xfs_find_trim_cow_extent( if (cmap->br_startoff > offset_fsb) { xfs_trim_extent(imap, imap->br_startoff, cmap->br_startoff - imap->br_startoff); - return xfs_inode_need_cow(ip, imap, shared); + return xfs_bmap_trim_cow(ip, imap, shared); } *shared = true; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index d18ad7f4fb64..3e4fd46373ab 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -22,7 +22,7 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal); extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared); -bool xfs_inode_need_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, +int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared); int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, From 54027a49938bbee1af62fad191139b14d4ee5cd2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 23 Jan 2020 07:54:09 -0800 Subject: [PATCH 23/24] xfs: fix uninitialized variable in xfs_attr3_leaf_inactive Dan Carpenter pointed out that error is uninitialized. While there never should be an attr leaf block with zero entries, let's not leave that logic bomb there. Fixes: 0bb9d159bd01 ("xfs: streamline xfs_attr3_leaf_inactive") Reported-by: Dan Carpenter Signed-off-by: Darrick J. Wong Reviewed-by: Allison Collins Reviewed-by: Eric Sandeen --- fs/xfs/xfs_attr_inactive.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index c75840a9e478..8fbb841cd6fe 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -89,7 +89,7 @@ xfs_attr3_leaf_inactive( struct xfs_attr_leafblock *leaf = bp->b_addr; struct xfs_attr_leaf_entry *entry; struct xfs_attr_leaf_name_remote *name_rmt; - int error; + int error = 0; int i; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); From b3531f5fc16d4df2b12567bce48cd9f3ab5f9131 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 23 Jan 2020 21:22:49 -0800 Subject: [PATCH 24/24] xfs: remove unused variable 'done' fs/xfs/xfs_inode.c: In function 'xfs_itruncate_extents_flags': fs/xfs/xfs_inode.c:1523:8: warning: unused variable 'done' [-Wunused-variable] commit 4bbb04abb4ee ("xfs: truncate should remove all blocks, not just to the end of the page cache") left behind this, so remove it. Fixes: 4bbb04abb4ee ("xfs: truncate should remove all blocks, not just to the end of the page cache") Reported-by: Hulk Robot Reported-by: Stephen Rothwell Signed-off-by: YueHaibing Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1309f25c0d2b..1979a0055763 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1520,7 +1520,6 @@ xfs_itruncate_extents_flags( xfs_fileoff_t first_unmap_block; xfs_filblks_t unmap_len; int error = 0; - int done = 0; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||