2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* linux/include/linux/nfs_fs.h
|
|
|
|
*
|
|
|
|
* Copyright (C) 1992 Rick Sladkey
|
|
|
|
*
|
|
|
|
* OS-specific nfs filesystem definitions and declarations
|
|
|
|
*/
|
|
|
|
#ifndef _LINUX_NFS_FS_H
|
|
|
|
#define _LINUX_NFS_FS_H
|
|
|
|
|
2012-10-13 17:46:48 +08:00
|
|
|
#include <uapi/linux/nfs_fs.h>
|
2005-08-26 07:25:55 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2012-03-19 02:07:42 +08:00
|
|
|
/*
|
|
|
|
* Enable dprintk() debugging support for nfs client.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_NFS_DEBUG
|
|
|
|
# define NFS_DEBUG
|
|
|
|
#endif
|
|
|
|
|
2006-09-13 11:36:02 +08:00
|
|
|
#include <linux/in.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/pagemap.h>
|
2006-07-25 23:28:18 +08:00
|
|
|
#include <linux/rbtree.h>
|
2006-09-13 11:36:02 +08:00
|
|
|
#include <linux/rwsem.h>
|
|
|
|
#include <linux/wait.h>
|
|
|
|
|
|
|
|
#include <linux/sunrpc/debug.h>
|
|
|
|
#include <linux/sunrpc/auth.h>
|
|
|
|
#include <linux/sunrpc/clnt.h>
|
|
|
|
|
|
|
|
#include <linux/nfs.h>
|
|
|
|
#include <linux/nfs2.h>
|
|
|
|
#include <linux/nfs3.h>
|
|
|
|
#include <linux/nfs4.h>
|
|
|
|
#include <linux/nfs_xdr.h>
|
|
|
|
#include <linux/nfs_fs_sb.h>
|
|
|
|
|
|
|
|
#include <linux/mempool.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* These are the default flags for swap requests
|
|
|
|
*/
|
|
|
|
#define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* NFSv3/v4 Access mode cache entry
|
|
|
|
*/
|
|
|
|
struct nfs_access_entry {
|
2006-07-25 23:28:18 +08:00
|
|
|
struct rb_node rb_node;
|
2006-07-25 23:28:18 +08:00
|
|
|
struct list_head lru;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long jiffies;
|
|
|
|
struct rpc_cred * cred;
|
|
|
|
int mask;
|
2014-07-14 09:28:20 +08:00
|
|
|
struct rcu_head rcu_head;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2010-06-26 04:35:53 +08:00
|
|
|
struct nfs_lock_context {
|
|
|
|
atomic_t count;
|
|
|
|
struct list_head list;
|
|
|
|
struct nfs_open_context *open_context;
|
2016-10-13 12:26:47 +08:00
|
|
|
fl_owner_t lockowner;
|
2016-01-06 23:40:18 +08:00
|
|
|
atomic_t io_count;
|
2010-06-26 04:35:53 +08:00
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
struct nfs4_state;
|
|
|
|
struct nfs_open_context {
|
2010-06-26 04:35:53 +08:00
|
|
|
struct nfs_lock_context lock_context;
|
2016-10-13 12:26:47 +08:00
|
|
|
fl_owner_t flock_owner;
|
2011-06-23 06:40:12 +08:00
|
|
|
struct dentry *dentry;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct rpc_cred *cred;
|
|
|
|
struct nfs4_state *state;
|
2008-12-24 04:21:56 +08:00
|
|
|
fmode_t mode;
|
2007-07-26 02:09:54 +08:00
|
|
|
|
|
|
|
unsigned long flags;
|
|
|
|
#define NFS_CONTEXT_ERROR_WRITE (0)
|
2012-09-12 04:01:22 +08:00
|
|
|
#define NFS_CONTEXT_RESEND_WRITES (1)
|
2013-03-19 07:45:14 +08:00
|
|
|
#define NFS_CONTEXT_BAD (2)
|
2017-04-12 00:50:10 +08:00
|
|
|
#define NFS_CONTEXT_UNLOCK (3)
|
2005-04-17 06:20:36 +08:00
|
|
|
int error;
|
|
|
|
|
|
|
|
struct list_head list;
|
2012-05-23 17:02:35 +08:00
|
|
|
struct nfs4_threshold *mdsthreshold;
|
2011-03-24 02:48:29 +08:00
|
|
|
};
|
2005-06-23 01:16:29 +08:00
|
|
|
|
2011-03-24 02:48:29 +08:00
|
|
|
struct nfs_open_dir_context {
|
2014-02-08 06:02:08 +08:00
|
|
|
struct list_head list;
|
2011-03-24 02:48:29 +08:00
|
|
|
struct rpc_cred *cred;
|
2011-07-31 00:45:35 +08:00
|
|
|
unsigned long attr_gencount;
|
2005-06-23 01:16:29 +08:00
|
|
|
__u64 dir_cookie;
|
2011-03-24 03:04:31 +08:00
|
|
|
__u64 dup_cookie;
|
2011-07-31 00:45:35 +08:00
|
|
|
signed char duped;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NFSv4 delegation
|
|
|
|
*/
|
|
|
|
struct nfs_delegation;
|
|
|
|
|
2005-06-23 01:16:27 +08:00
|
|
|
struct posix_acl;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* nfs fs inode data in memory
|
|
|
|
*/
|
|
|
|
struct nfs_inode {
|
|
|
|
/*
|
|
|
|
* The 64bit 'inode number'
|
|
|
|
*/
|
|
|
|
__u64 fileid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NFS file handle
|
|
|
|
*/
|
|
|
|
struct nfs_fh fh;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Various flags
|
|
|
|
*/
|
2005-08-19 02:24:11 +08:00
|
|
|
unsigned long flags; /* atomic bit ops */
|
|
|
|
unsigned long cache_validity; /* bit mask */
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
2006-03-21 02:44:11 +08:00
|
|
|
* read_cache_jiffies is when we started read-caching this inode.
|
2005-04-17 06:20:36 +08:00
|
|
|
* attrtimeo is for how long the cached information is assumed
|
|
|
|
* to be valid. A successful attribute revalidation doubles
|
|
|
|
* attrtimeo (up to acregmax/acdirmax), a failure resets it to
|
|
|
|
* acregmin/acdirmin.
|
|
|
|
*
|
|
|
|
* We need to revalidate the cached attrs for this inode if
|
|
|
|
*
|
optimize attribute timeouts for "noac" and "actimeo=0"
Hi.
I've been looking at a bugzilla which describes a problem where
a customer was advised to use either the "noac" or "actimeo=0"
mount options to solve a consistency problem that they were
seeing in the file attributes. It turned out that this solution
did not work reliably for them because sometimes, the local
attribute cache was believed to be valid and not timed out.
(With an attribute cache timeout of 0, the cache should always
appear to be timed out.)
In looking at this situation, it appears to me that the problem
is that the attribute cache timeout code has an off-by-one
error in it. It is assuming that the cache is valid in the
region, [read_cache_jiffies, read_cache_jiffies + attrtimeo]. The
cache should be considered valid only in the region,
[read_cache_jiffies, read_cache_jiffies + attrtimeo). With this
change, the options, "noac" and "actimeo=0", work as originally
expected.
This problem was previously addressed by special casing the
attrtimeo == 0 case. However, since the problem is only an off-
by-one error, the cleaner solution is address the off-by-one
error and thus, not require the special case.
Thanx...
ps
Signed-off-by: Peter Staubach <staubach@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2008-12-24 04:21:56 +08:00
|
|
|
* jiffies - read_cache_jiffies >= attrtimeo
|
|
|
|
*
|
|
|
|
* Please note the comparison is greater than or equal
|
|
|
|
* so that zero timeout values can be specified.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
unsigned long read_cache_jiffies;
|
|
|
|
unsigned long attrtimeo;
|
|
|
|
unsigned long attrtimeo_timestamp;
|
|
|
|
|
2008-10-15 07:16:07 +08:00
|
|
|
unsigned long attr_gencount;
|
2005-04-17 06:20:36 +08:00
|
|
|
/* "Generation counter" for the attribute cache. This is
|
|
|
|
* bumped whenever we update the metadata on the
|
|
|
|
* server.
|
|
|
|
*/
|
|
|
|
unsigned long cache_change_attribute;
|
|
|
|
|
2006-07-25 23:28:18 +08:00
|
|
|
struct rb_root access_cache;
|
2006-07-25 23:28:18 +08:00
|
|
|
struct list_head access_cache_entry_lru;
|
|
|
|
struct list_head access_cache_inode_lru;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the cookie verifier used for NFSv3 readdir
|
|
|
|
* operations
|
|
|
|
*/
|
2006-10-20 14:28:51 +08:00
|
|
|
__be32 cookieverf[2];
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2014-11-13 01:08:00 +08:00
|
|
|
unsigned long nrequests;
|
2012-04-21 02:47:53 +08:00
|
|
|
struct nfs_mds_commit_info commit_info;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Open contexts for shared mmap writes */
|
|
|
|
struct list_head open_files;
|
|
|
|
|
2016-04-29 11:56:31 +08:00
|
|
|
/* Readers: in-flight sillydelete RPC calls */
|
|
|
|
/* Writers: rmdir */
|
|
|
|
struct rw_semaphore rmdir_sem;
|
2007-10-16 06:17:53 +08:00
|
|
|
|
2012-07-31 04:05:25 +08:00
|
|
|
#if IS_ENABLED(CONFIG_NFS_V4)
|
2005-06-23 01:16:23 +08:00
|
|
|
struct nfs4_cached_acl *nfs4_acl;
|
2005-04-17 06:20:36 +08:00
|
|
|
/* NFSv4 state */
|
|
|
|
struct list_head open_states;
|
2010-03-03 17:20:10 +08:00
|
|
|
struct nfs_delegation __rcu *delegation;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct rw_semaphore rwsem;
|
2010-10-20 12:18:01 +08:00
|
|
|
|
|
|
|
/* pNFS layout information */
|
|
|
|
struct pnfs_layout_hdr *layout;
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* CONFIG_NFS_V4*/
|
2012-05-25 01:13:24 +08:00
|
|
|
/* how many bytes have been written/read and how many bytes queued up */
|
|
|
|
__u64 write_io;
|
|
|
|
__u64 read_io;
|
2009-04-03 23:42:43 +08:00
|
|
|
#ifdef CONFIG_NFS_FSCACHE
|
|
|
|
struct fscache_cookie *fscache;
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
struct inode vfs_inode;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2005-08-19 02:24:09 +08:00
|
|
|
* Cache validity bit flags
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2005-08-19 02:24:09 +08:00
|
|
|
#define NFS_INO_INVALID_ATTR 0x0001 /* cached attrs are invalid */
|
|
|
|
#define NFS_INO_INVALID_DATA 0x0002 /* cached data is invalid */
|
|
|
|
#define NFS_INO_INVALID_ATIME 0x0004 /* cached atime is invalid */
|
|
|
|
#define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */
|
|
|
|
#define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */
|
|
|
|
#define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */
|
2007-07-04 04:10:55 +08:00
|
|
|
#define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */
|
2013-05-23 00:50:44 +08:00
|
|
|
#define NFS_INO_INVALID_LABEL 0x0080 /* cached label is invalid */
|
2005-08-19 02:24:09 +08:00
|
|
|
|
|
|
|
/*
|
2005-08-19 02:24:11 +08:00
|
|
|
* Bit offsets in flags field
|
2005-08-19 02:24:09 +08:00
|
|
|
*/
|
2008-10-06 02:48:22 +08:00
|
|
|
#define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */
|
|
|
|
#define NFS_INO_STALE (1) /* possible stale inode */
|
|
|
|
#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */
|
NFS: fix the handling of NFS_INO_INVALID_DATA flag in nfs_revalidate_mapping
There is a possible race in how the nfs_invalidate_mapping function is
handled. Currently, we go and invalidate the pages in the file and then
clear NFS_INO_INVALID_DATA.
The problem is that it's possible for a stale page to creep into the
mapping after the page was invalidated (i.e., via readahead). If another
writer comes along and sets the flag after that happens but before
invalidate_inode_pages2 returns then we could clear the flag
without the cache having been properly invalidated.
So, we must clear the flag first and then invalidate the pages. Doing
this however, opens another race:
It's possible to have two concurrent read() calls that end up in
nfs_revalidate_mapping at the same time. The first one clears the
NFS_INO_INVALID_DATA flag and then goes to call nfs_invalidate_mapping.
Just before calling that though, the other task races in, checks the
flag and finds it cleared. At that point, it trusts that the mapping is
good and gets the lock on the page, allowing the read() to be satisfied
from the cache even though the data is no longer valid.
These effects are easily manifested by running diotest3 from the LTP
test suite on NFS. That program does a series of DIO writes and buffered
reads. The operations are serialized and page-aligned but the existing
code fails the test since it occasionally allows a read to come out of
the cache incorrectly. While mixing direct and buffered I/O isn't
recommended, I believe it's possible to hit this in other ways that just
use buffered I/O, though that situation is much harder to reproduce.
The problem is that the checking/clearing of that flag and the
invalidation of the mapping really need to be atomic. Fix this by
serializing concurrent invalidations with a bitlock.
At the same time, we also need to allow other places that check
NFS_INO_INVALID_DATA to check whether we might be in the middle of
invalidating the file, so fix up a couple of places that do that
to look for the new NFS_INO_INVALIDATING flag.
Doing this requires us to be careful not to set the bitlock
unnecessarily, so this code only does that if it believes it will
be doing an invalidation.
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-01-28 02:46:15 +08:00
|
|
|
#define NFS_INO_INVALIDATING (3) /* inode is being invalidated */
|
2009-04-03 23:42:43 +08:00
|
|
|
#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
|
|
|
|
#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
|
2011-03-23 21:27:54 +08:00
|
|
|
#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
|
2011-10-24 11:21:17 +08:00
|
|
|
#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
|
2015-06-23 19:52:03 +08:00
|
|
|
#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
|
2016-06-04 05:07:19 +08:00
|
|
|
#define NFS_INO_ODIRECT (12) /* I/O setting is O_DIRECT */
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-01-23 14:59:08 +08:00
|
|
|
static inline struct nfs_inode *NFS_I(const struct inode *inode)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return container_of(inode, struct nfs_inode, vfs_inode);
|
|
|
|
}
|
2008-01-23 14:59:08 +08:00
|
|
|
|
|
|
|
static inline struct nfs_server *NFS_SB(const struct super_block *s)
|
|
|
|
{
|
|
|
|
return (struct nfs_server *)(s->s_fs_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct nfs_fh *NFS_FH(const struct inode *inode)
|
|
|
|
{
|
|
|
|
return &NFS_I(inode)->fh;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct nfs_server *NFS_SERVER(const struct inode *inode)
|
|
|
|
{
|
|
|
|
return NFS_SB(inode->i_sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct rpc_clnt *NFS_CLIENT(const struct inode *inode)
|
|
|
|
{
|
|
|
|
return NFS_SERVER(inode)->client;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode)
|
|
|
|
{
|
|
|
|
return NFS_SERVER(inode)->nfs_client->rpc_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode)
|
|
|
|
{
|
|
|
|
struct nfs_server *nfss = NFS_SERVER(inode);
|
|
|
|
return S_ISDIR(inode->i_mode) ? nfss->acdirmin : nfss->acregmin;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned NFS_MAXATTRTIMEO(const struct inode *inode)
|
|
|
|
{
|
|
|
|
struct nfs_server *nfss = NFS_SERVER(inode);
|
|
|
|
return S_ISDIR(inode->i_mode) ? nfss->acdirmax : nfss->acregmax;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int NFS_STALE(const struct inode *inode)
|
|
|
|
{
|
|
|
|
return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
|
|
|
|
}
|
|
|
|
|
2013-09-27 18:20:03 +08:00
|
|
|
static inline struct fscache_cookie *nfs_i_fscache(struct inode *inode)
|
2009-04-03 23:42:43 +08:00
|
|
|
{
|
2013-09-27 18:20:03 +08:00
|
|
|
#ifdef CONFIG_NFS_FSCACHE
|
|
|
|
return NFS_I(inode)->fscache;
|
|
|
|
#else
|
|
|
|
return NULL;
|
|
|
|
#endif
|
2009-04-03 23:42:43 +08:00
|
|
|
}
|
|
|
|
|
2008-01-23 14:59:08 +08:00
|
|
|
static inline __u64 NFS_FILEID(const struct inode *inode)
|
|
|
|
{
|
|
|
|
return NFS_I(inode)->fileid;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void set_nfs_fileid(struct inode *inode, __u64 fileid)
|
|
|
|
{
|
|
|
|
NFS_I(inode)->fileid = fileid;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-10-28 10:12:39 +08:00
|
|
|
static inline void nfs_mark_for_revalidate(struct inode *inode)
|
|
|
|
{
|
2006-05-25 13:40:57 +08:00
|
|
|
struct nfs_inode *nfsi = NFS_I(inode);
|
|
|
|
|
2005-10-28 10:12:39 +08:00
|
|
|
spin_lock(&inode->i_lock);
|
2015-07-06 00:36:34 +08:00
|
|
|
nfsi->cache_validity |= NFS_INO_INVALID_ATTR |
|
|
|
|
NFS_INO_REVAL_PAGECACHE |
|
|
|
|
NFS_INO_INVALID_ACCESS |
|
|
|
|
NFS_INO_INVALID_ACL;
|
2007-10-01 22:00:23 +08:00
|
|
|
if (S_ISDIR(inode->i_mode))
|
2015-07-06 00:36:34 +08:00
|
|
|
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
|
2005-10-28 10:12:39 +08:00
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static inline int nfs_server_capable(struct inode *inode, int cap)
|
|
|
|
{
|
|
|
|
return NFS_SERVER(inode)->caps & cap;
|
|
|
|
}
|
|
|
|
|
2007-10-02 09:42:01 +08:00
|
|
|
static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
|
|
|
|
{
|
|
|
|
dentry->d_time = verf;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
|
|
|
* nfs_save_change_attribute - Returns the inode attribute change cookie
|
2007-09-30 05:14:03 +08:00
|
|
|
* @dir - pointer to parent directory inode
|
2005-04-17 06:20:36 +08:00
|
|
|
* The "change attribute" is updated every time we finish an operation
|
|
|
|
* that will result in a metadata change on the server.
|
|
|
|
*/
|
2007-09-30 05:14:03 +08:00
|
|
|
static inline unsigned long nfs_save_change_attribute(struct inode *dir)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2007-09-30 05:14:03 +08:00
|
|
|
return NFS_I(dir)->cache_change_attribute;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2007-09-30 05:15:01 +08:00
|
|
|
* nfs_verify_change_attribute - Detects NFS remote directory changes
|
|
|
|
* @dir - pointer to parent directory inode
|
2005-04-17 06:20:36 +08:00
|
|
|
* @chattr - previously saved change attribute
|
2007-09-30 05:15:01 +08:00
|
|
|
* Return "false" if the verifiers doesn't match the change attribute.
|
|
|
|
* This would usually indicate that the directory contents have changed on
|
|
|
|
* the server, and that any dentries need revalidating.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2007-09-30 05:15:01 +08:00
|
|
|
static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2007-09-30 05:15:01 +08:00
|
|
|
return chattr == NFS_I(dir)->cache_change_attribute;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* linux/fs/nfs/inode.c
|
|
|
|
*/
|
2005-12-14 05:13:54 +08:00
|
|
|
extern int nfs_sync_mapping(struct address_space *mapping);
|
2006-10-20 14:28:40 +08:00
|
|
|
extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping);
|
2005-04-17 06:20:36 +08:00
|
|
|
extern void nfs_zap_caches(struct inode *);
|
2007-09-29 05:11:45 +08:00
|
|
|
extern void nfs_invalidate_atime(struct inode *);
|
2005-04-17 06:20:36 +08:00
|
|
|
extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
|
2013-05-23 00:50:42 +08:00
|
|
|
struct nfs_fattr *, struct nfs4_label *);
|
2005-04-17 06:20:36 +08:00
|
|
|
extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
|
2005-10-28 10:12:39 +08:00
|
|
|
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
|
2007-10-01 03:21:24 +08:00
|
|
|
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
|
2015-02-27 06:36:09 +08:00
|
|
|
extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
|
statx: Add a system call to make enhanced file info available
Add a system call to make extended file information available, including
file creation and some attribute flags where available through the
underlying filesystem.
The getattr inode operation is altered to take two additional arguments: a
u32 request_mask and an unsigned int flags that indicate the
synchronisation mode. This change is propagated to the vfs_getattr*()
function.
Functions like vfs_stat() are now inline wrappers around new functions
vfs_statx() and vfs_statx_fd() to reduce stack usage.
========
OVERVIEW
========
The idea was initially proposed as a set of xattrs that could be retrieved
with getxattr(), but the general preference proved to be for a new syscall
with an extended stat structure.
A number of requests were gathered for features to be included. The
following have been included:
(1) Make the fields a consistent size on all arches and make them large.
(2) Spare space, request flags and information flags are provided for
future expansion.
(3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an
__s64).
(4) Creation time: The SMB protocol carries the creation time, which could
be exported by Samba, which will in turn help CIFS make use of
FS-Cache as that can be used for coherency data (stx_btime).
This is also specified in NFSv4 as a recommended attribute and could
be exported by NFSD [Steve French].
(5) Lightweight stat: Ask for just those details of interest, and allow a
netfs (such as NFS) to approximate anything not of interest, possibly
without going to the server [Trond Myklebust, Ulrich Drepper, Andreas
Dilger] (AT_STATX_DONT_SYNC).
(6) Heavyweight stat: Force a netfs to go to the server, even if it thinks
its cached attributes are up to date [Trond Myklebust]
(AT_STATX_FORCE_SYNC).
And the following have been left out for future extension:
(7) Data version number: Could be used by userspace NFS servers [Aneesh
Kumar].
Can also be used to modify fill_post_wcc() in NFSD which retrieves
i_version directly, but has just called vfs_getattr(). It could get
it from the kstat struct if it used vfs_xgetattr() instead.
(There's disagreement on the exact semantics of a single field, since
not all filesystems do this the same way).
(8) BSD stat compatibility: Including more fields from the BSD stat such
as creation time (st_btime) and inode generation number (st_gen)
[Jeremy Allison, Bernd Schubert].
(9) Inode generation number: Useful for FUSE and userspace NFS servers
[Bernd Schubert].
(This was asked for but later deemed unnecessary with the
open-by-handle capability available and caused disagreement as to
whether it's a security hole or not).
(10) Extra coherency data may be useful in making backups [Andreas Dilger].
(No particular data were offered, but things like last backup
timestamp, the data version number and the DOS archive bit would come
into this category).
(11) Allow the filesystem to indicate what it can/cannot provide: A
filesystem can now say it doesn't support a standard stat feature if
that isn't available, so if, for instance, inode numbers or UIDs don't
exist or are fabricated locally...
(This requires a separate system call - I have an fsinfo() call idea
for this).
(12) Store a 16-byte volume ID in the superblock that can be returned in
struct xstat [Steve French].
(Deferred to fsinfo).
(13) Include granularity fields in the time data to indicate the
granularity of each of the times (NFSv4 time_delta) [Steve French].
(Deferred to fsinfo).
(14) FS_IOC_GETFLAGS value. These could be translated to BSD's st_flags.
Note that the Linux IOC flags are a mess and filesystems such as Ext4
define flags that aren't in linux/fs.h, so translation in the kernel
may be a necessity (or, possibly, we provide the filesystem type too).
(Some attributes are made available in stx_attributes, but the general
feeling was that the IOC flags were to ext[234]-specific and shouldn't
be exposed through statx this way).
(15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer,
Michael Kerrisk].
(Deferred, probably to fsinfo. Finding out if there's an ACL or
seclabal might require extra filesystem operations).
(16) Femtosecond-resolution timestamps [Dave Chinner].
(A __reserved field has been left in the statx_timestamp struct for
this - if there proves to be a need).
(17) A set multiple attributes syscall to go with this.
===============
NEW SYSTEM CALL
===============
The new system call is:
int ret = statx(int dfd,
const char *filename,
unsigned int flags,
unsigned int mask,
struct statx *buffer);
The dfd, filename and flags parameters indicate the file to query, in a
similar way to fstatat(). There is no equivalent of lstat() as that can be
emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags. There is
also no equivalent of fstat() as that can be emulated by passing a NULL
filename to statx() with the fd of interest in dfd.
Whether or not statx() synchronises the attributes with the backing store
can be controlled by OR'ing a value into the flags argument (this typically
only affects network filesystems):
(1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this
respect.
(2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise
its attributes with the server - which might require data writeback to
occur to get the timestamps correct.
(3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a
network filesystem. The resulting values should be considered
approximate.
mask is a bitmask indicating the fields in struct statx that are of
interest to the caller. The user should set this to STATX_BASIC_STATS to
get the basic set returned by stat(). It should be noted that asking for
more information may entail extra I/O operations.
buffer points to the destination for the data. This must be 256 bytes in
size.
======================
MAIN ATTRIBUTES RECORD
======================
The following structures are defined in which to return the main attribute
set:
struct statx_timestamp {
__s64 tv_sec;
__s32 tv_nsec;
__s32 __reserved;
};
struct statx {
__u32 stx_mask;
__u32 stx_blksize;
__u64 stx_attributes;
__u32 stx_nlink;
__u32 stx_uid;
__u32 stx_gid;
__u16 stx_mode;
__u16 __spare0[1];
__u64 stx_ino;
__u64 stx_size;
__u64 stx_blocks;
__u64 __spare1[1];
struct statx_timestamp stx_atime;
struct statx_timestamp stx_btime;
struct statx_timestamp stx_ctime;
struct statx_timestamp stx_mtime;
__u32 stx_rdev_major;
__u32 stx_rdev_minor;
__u32 stx_dev_major;
__u32 stx_dev_minor;
__u64 __spare2[14];
};
The defined bits in request_mask and stx_mask are:
STATX_TYPE Want/got stx_mode & S_IFMT
STATX_MODE Want/got stx_mode & ~S_IFMT
STATX_NLINK Want/got stx_nlink
STATX_UID Want/got stx_uid
STATX_GID Want/got stx_gid
STATX_ATIME Want/got stx_atime{,_ns}
STATX_MTIME Want/got stx_mtime{,_ns}
STATX_CTIME Want/got stx_ctime{,_ns}
STATX_INO Want/got stx_ino
STATX_SIZE Want/got stx_size
STATX_BLOCKS Want/got stx_blocks
STATX_BASIC_STATS [The stuff in the normal stat struct]
STATX_BTIME Want/got stx_btime{,_ns}
STATX_ALL [All currently available stuff]
stx_btime is the file creation time, stx_mask is a bitmask indicating the
data provided and __spares*[] are where as-yet undefined fields can be
placed.
Time fields are structures with separate seconds and nanoseconds fields
plus a reserved field in case we want to add even finer resolution. Note
that times will be negative if before 1970; in such a case, the nanosecond
fields will also be negative if not zero.
The bits defined in the stx_attributes field convey information about a
file, how it is accessed, where it is and what it does. The following
attributes map to FS_*_FL flags and are the same numerical value:
STATX_ATTR_COMPRESSED File is compressed by the fs
STATX_ATTR_IMMUTABLE File is marked immutable
STATX_ATTR_APPEND File is append-only
STATX_ATTR_NODUMP File is not to be dumped
STATX_ATTR_ENCRYPTED File requires key to decrypt in fs
Within the kernel, the supported flags are listed by:
KSTAT_ATTR_FS_IOC_FLAGS
[Are any other IOC flags of sufficient general interest to be exposed
through this interface?]
New flags include:
STATX_ATTR_AUTOMOUNT Object is an automount trigger
These are for the use of GUI tools that might want to mark files specially,
depending on what they are.
Fields in struct statx come in a number of classes:
(0) stx_dev_*, stx_blksize.
These are local system information and are always available.
(1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino,
stx_size, stx_blocks.
These will be returned whether the caller asks for them or not. The
corresponding bits in stx_mask will be set to indicate whether they
actually have valid values.
If the caller didn't ask for them, then they may be approximated. For
example, NFS won't waste any time updating them from the server,
unless as a byproduct of updating something requested.
If the values don't actually exist for the underlying object (such as
UID or GID on a DOS file), then the bit won't be set in the stx_mask,
even if the caller asked for the value. In such a case, the returned
value will be a fabrication.
Note that there are instances where the type might not be valid, for
instance Windows reparse points.
(2) stx_rdev_*.
This will be set only if stx_mode indicates we're looking at a
blockdev or a chardev, otherwise will be 0.
(3) stx_btime.
Similar to (1), except this will be set to 0 if it doesn't exist.
=======
TESTING
=======
The following test program can be used to test the statx system call:
samples/statx/test-statx.c
Just compile and run, passing it paths to the files you want to examine.
The file is built automatically if CONFIG_SAMPLES is enabled.
Here's some example output. Firstly, an NFS directory that crosses to
another FSID. Note that the AUTOMOUNT attribute is set because transiting
this directory will cause d_automount to be invoked by the VFS.
[root@andromeda ~]# /tmp/test-statx -A /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:26 Inode: 1703937 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------)
Secondly, the result of automounting on that directory.
[root@andromeda ~]# /tmp/test-statx /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:27 Inode: 2 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-02-01 00:46:22 +08:00
|
|
|
extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
|
2012-09-11 02:00:46 +08:00
|
|
|
extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
|
|
|
|
extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
|
2011-06-21 07:28:19 +08:00
|
|
|
extern int nfs_permission(struct inode *, int);
|
2005-04-17 06:20:36 +08:00
|
|
|
extern int nfs_open(struct inode *, struct file *);
|
2013-07-06 05:49:30 +08:00
|
|
|
extern int nfs_attribute_cache_expired(struct inode *inode);
|
2005-04-17 06:20:36 +08:00
|
|
|
extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
|
|
|
|
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
|
2016-12-05 07:34:34 +08:00
|
|
|
extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
|
2006-05-25 13:40:59 +08:00
|
|
|
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
|
2015-11-18 10:14:24 +08:00
|
|
|
extern int nfs_revalidate_mapping_rcu(struct inode *inode);
|
2005-04-17 06:20:36 +08:00
|
|
|
extern int nfs_setattr(struct dentry *, struct iattr *);
|
2015-02-27 05:09:04 +08:00
|
|
|
extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
|
2013-05-23 00:50:44 +08:00
|
|
|
extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
|
|
|
|
struct nfs4_label *label);
|
2005-04-17 06:20:36 +08:00
|
|
|
extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
|
|
|
|
extern void put_nfs_open_context(struct nfs_open_context *ctx);
|
2008-12-24 04:21:56 +08:00
|
|
|
extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
|
2016-10-13 12:26:47 +08:00
|
|
|
extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode, struct file *filp);
|
2013-05-30 01:34:46 +08:00
|
|
|
extern void nfs_inode_attach_open_context(struct nfs_open_context *ctx);
|
2010-09-17 22:56:50 +08:00
|
|
|
extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
|
2015-07-14 02:01:33 +08:00
|
|
|
extern void nfs_file_clear_open_context(struct file *flip);
|
2010-06-26 04:35:53 +08:00
|
|
|
extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
|
|
|
|
extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
|
2007-10-10 00:01:04 +08:00
|
|
|
extern u64 nfs_compat_user_ino64(u64 fileid);
|
2008-10-15 07:16:07 +08:00
|
|
|
extern void nfs_fattr_init(struct nfs_fattr *fattr);
|
2015-02-27 06:42:42 +08:00
|
|
|
extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr);
|
2010-09-17 22:54:37 +08:00
|
|
|
extern unsigned long nfs_inc_attr_generation_counter(void);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-04-17 04:22:45 +08:00
|
|
|
extern struct nfs_fattr *nfs_alloc_fattr(void);
|
|
|
|
|
|
|
|
static inline void nfs_free_fattr(const struct nfs_fattr *fattr)
|
|
|
|
{
|
|
|
|
kfree(fattr);
|
|
|
|
}
|
|
|
|
|
|
|
|
extern struct nfs_fh *nfs_alloc_fhandle(void);
|
|
|
|
|
|
|
|
static inline void nfs_free_fhandle(const struct nfs_fh *fh)
|
|
|
|
{
|
|
|
|
kfree(fh);
|
|
|
|
}
|
|
|
|
|
2012-03-19 02:07:42 +08:00
|
|
|
#ifdef NFS_DEBUG
|
2012-03-07 09:46:43 +08:00
|
|
|
extern u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh);
|
|
|
|
static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh)
|
|
|
|
{
|
|
|
|
return _nfs_display_fhandle_hash(fh);
|
|
|
|
}
|
2012-03-02 06:01:31 +08:00
|
|
|
extern void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption);
|
|
|
|
#define nfs_display_fhandle(fh, caption) \
|
|
|
|
do { \
|
|
|
|
if (unlikely(nfs_debug & NFSDBG_FACILITY)) \
|
|
|
|
_nfs_display_fhandle(fh, caption); \
|
|
|
|
} while (0)
|
|
|
|
#else
|
2012-03-07 09:46:43 +08:00
|
|
|
static inline u32 nfs_display_fhandle_hash(const struct nfs_fh *fh)
|
|
|
|
{
|
2012-03-20 16:26:42 +08:00
|
|
|
return 0;
|
2012-03-07 09:46:43 +08:00
|
|
|
}
|
2012-03-02 06:01:31 +08:00
|
|
|
static inline void nfs_display_fhandle(const struct nfs_fh *fh,
|
|
|
|
const char *caption)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2010-09-17 22:54:37 +08:00
|
|
|
/*
|
|
|
|
* linux/fs/nfs/nfsroot.c
|
|
|
|
*/
|
|
|
|
extern int nfs_root_data(char **root_device, char **root_data); /*__init*/
|
2005-04-17 06:20:36 +08:00
|
|
|
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
|
2006-11-08 16:19:38 +08:00
|
|
|
extern __be32 root_nfs_parse_addr(char *name); /*__init*/
|
2005-10-28 10:12:38 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* linux/fs/nfs/file.c
|
|
|
|
*/
|
2006-03-28 17:56:42 +08:00
|
|
|
extern const struct file_operations nfs_file_operations;
|
2012-07-31 04:05:25 +08:00
|
|
|
#if IS_ENABLED(CONFIG_NFS_V4)
|
nfs: when attempting to open a directory, fall back on normal lookup (try #5)
commit d953126 changed how nfs_atomic_lookup handles an -EISDIR return
from an OPEN call. Prior to that patch, that caused the client to fall
back to doing a normal lookup. When that patch went in, the code began
returning that error to userspace. The d_revalidate codepath however
never had the corresponding change, so it was still possible to end up
with a NULL ctx->state pointer after that.
That patch caused a regression. When we attempt to open a directory that
does not have a cached dentry, that open now errors out with EISDIR. If
you attempt the same open with a cached dentry, it will succeed.
Fix this by reverting the change in nfs_atomic_lookup and allowing
attempts to open directories to fall back to a normal lookup
Also, add a NFSv4-specific f_ops->open routine that just returns
-ENOTDIR. This should never be called if things are working properly,
but if it ever is, then the dprintk may help in debugging.
To facilitate this, a new file_operations field is also added to the
nfs_rpc_ops struct.
Cc: stable@kernel.org
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2011-11-05 01:31:21 +08:00
|
|
|
extern const struct file_operations nfs4_file_operations;
|
|
|
|
#endif /* CONFIG_NFS_V4 */
|
2006-06-28 19:26:44 +08:00
|
|
|
extern const struct address_space_operations nfs_file_aops;
|
2010-12-02 03:17:06 +08:00
|
|
|
extern const struct address_space_operations nfs_dir_aops;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-08-11 05:44:28 +08:00
|
|
|
static inline struct nfs_open_context *nfs_file_open_context(struct file *filp)
|
|
|
|
{
|
|
|
|
return filp->private_data;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static inline struct rpc_cred *nfs_file_cred(struct file *file)
|
|
|
|
{
|
2008-10-16 11:15:16 +08:00
|
|
|
if (file != NULL) {
|
|
|
|
struct nfs_open_context *ctx =
|
|
|
|
nfs_file_open_context(file);
|
|
|
|
if (ctx)
|
|
|
|
return ctx->cred;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* linux/fs/nfs/direct.c
|
|
|
|
*/
|
2016-04-07 23:51:58 +08:00
|
|
|
extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *);
|
2006-10-01 14:28:46 +08:00
|
|
|
extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
|
2016-04-07 23:51:58 +08:00
|
|
|
struct iov_iter *iter);
|
2006-10-01 14:28:46 +08:00
|
|
|
extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
|
2015-04-10 02:11:08 +08:00
|
|
|
struct iov_iter *iter);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* linux/fs/nfs/dir.c
|
|
|
|
*/
|
2006-03-28 17:56:42 +08:00
|
|
|
extern const struct file_operations nfs_dir_operations;
|
2009-02-20 13:51:22 +08:00
|
|
|
extern const struct dentry_operations nfs_dentry_operations;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-10-16 06:18:29 +08:00
|
|
|
extern void nfs_force_lookup_revalidate(struct inode *dir);
|
2013-05-23 00:50:42 +08:00
|
|
|
extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
|
|
|
|
struct nfs_fattr *fattr, struct nfs4_label *label);
|
2007-08-11 05:45:10 +08:00
|
|
|
extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags);
|
|
|
|
extern void nfs_access_zap_cache(struct inode *inode);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* linux/fs/nfs/symlink.c
|
|
|
|
*/
|
2007-02-12 16:55:40 +08:00
|
|
|
extern const struct inode_operations nfs_symlink_inode_operations;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-01-03 16:55:41 +08:00
|
|
|
/*
|
|
|
|
* linux/fs/nfs/sysctl.c
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
extern int nfs_register_sysctl(void);
|
|
|
|
extern void nfs_unregister_sysctl(void);
|
|
|
|
#else
|
2006-02-21 10:28:08 +08:00
|
|
|
#define nfs_register_sysctl() 0
|
2006-01-03 16:55:41 +08:00
|
|
|
#define nfs_unregister_sysctl() do { } while(0)
|
|
|
|
#endif
|
|
|
|
|
2006-06-09 21:34:19 +08:00
|
|
|
/*
|
|
|
|
* linux/fs/nfs/namespace.c
|
|
|
|
*/
|
2007-02-12 16:55:40 +08:00
|
|
|
extern const struct inode_operations nfs_mountpoint_inode_operations;
|
|
|
|
extern const struct inode_operations nfs_referral_inode_operations;
|
2006-06-09 21:34:20 +08:00
|
|
|
extern int nfs_mountpoint_expiry_timeout;
|
|
|
|
extern void nfs_release_automount_timer(void);
|
2006-06-09 21:34:19 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* linux/fs/nfs/unlink.c
|
|
|
|
*/
|
2007-07-15 03:39:58 +08:00
|
|
|
extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* linux/fs/nfs/write.c
|
|
|
|
*/
|
2007-03-17 05:38:26 +08:00
|
|
|
extern int nfs_congestion_kb;
|
2005-04-17 06:20:36 +08:00
|
|
|
extern int nfs_writepage(struct page *page, struct writeback_control *wbc);
|
|
|
|
extern int nfs_writepages(struct address_space *, struct writeback_control *);
|
|
|
|
extern int nfs_flush_incompatible(struct file *file, struct page *page);
|
|
|
|
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try to write back everything synchronously (but check the
|
|
|
|
* return value!)
|
|
|
|
*/
|
2015-03-26 05:23:31 +08:00
|
|
|
extern int nfs_sync_inode(struct inode *inode);
|
2006-10-10 04:18:38 +08:00
|
|
|
extern int nfs_wb_all(struct inode *inode);
|
2017-04-27 00:26:22 +08:00
|
|
|
extern int nfs_wb_page(struct inode *inode, struct page *page);
|
2007-08-28 22:29:36 +08:00
|
|
|
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
|
2010-07-31 03:31:54 +08:00
|
|
|
extern int nfs_commit_inode(struct inode *, int);
|
NFS: fix usage of mempools.
When passed GFP flags that allow sleeping (such as
GFP_NOIO), mempool_alloc() will never return NULL, it will
wait until memory is available.
This means that we don't need to handle failure, but that we
do need to ensure one thread doesn't call mempool_alloc()
twice on the one pool without queuing or freeing the first
allocation. If multiple threads did this during times of
high memory pressure, the pool could be exhausted and a
deadlock could result.
pnfs_generic_alloc_ds_commits() attempts to allocate from
the nfs_commit_mempool while already holding an allocation
from that pool. This is not safe. So change
nfs_commitdata_alloc() to take a flag that indicates whether
failure is acceptable.
In pnfs_generic_alloc_ds_commits(), accept failure and
handle it as we currently do. Else where, do not accept
failure, and do not handle it.
Even when failure is acceptable, we want to succeed if
possible. That means both
- using an entry from the pool if there is one
- waiting for direct reclaim is there isn't.
We call mempool_alloc(GFP_NOWAIT) to achieve the first, then
kmem_cache_alloc(GFP_NOIO|__GFP_NORETRY) to achieve the
second. Each of these can fail, but together they do the
best they can without blocking indefinitely.
The objects returned by kmem_cache_alloc() will still be freed
by mempool_free(). This is safe as mempool_alloc() uses
exactly the same function to allocate objects (since the mempool
was created with mempool_create_slab_pool()). The object returned
by mempool_alloc() and kmem_cache_alloc() are indistinguishable
so mempool_free() will handle both identically, either adding to the
pool or calling kmem_cache_free().
Also, don't test for failure when allocating from
nfs_wdata_mempool.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2017-04-10 10:22:09 +08:00
|
|
|
extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
|
2012-04-21 02:47:39 +08:00
|
|
|
extern void nfs_commit_free(struct nfs_commit_data *data);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
static inline int
|
|
|
|
nfs_have_writebacks(struct inode *inode)
|
|
|
|
{
|
2014-11-13 01:08:00 +08:00
|
|
|
return NFS_I(inode)->nrequests != 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* linux/fs/nfs/read.c
|
|
|
|
*/
|
|
|
|
extern int nfs_readpage(struct file *, struct page *);
|
|
|
|
extern int nfs_readpages(struct file *, struct address_space *,
|
|
|
|
struct list_head *, unsigned);
|
2009-04-03 23:42:44 +08:00
|
|
|
extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
|
|
|
|
struct page *);
|
2006-01-03 16:55:04 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* inline functions
|
|
|
|
*/
|
|
|
|
|
2007-07-02 00:12:14 +08:00
|
|
|
static inline loff_t nfs_size_to_loff_t(__u64 size)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2016-02-09 04:11:50 +08:00
|
|
|
return min_t(u64, size, OFFSET_MAX);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline ino_t
|
|
|
|
nfs_fileid_to_ino_t(u64 fileid)
|
|
|
|
{
|
|
|
|
ino_t ino = (ino_t) fileid;
|
|
|
|
if (sizeof(ino_t) < sizeof(u64))
|
|
|
|
ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8;
|
|
|
|
return ino;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
|
|
|
|
|
2006-12-21 04:29:46 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
# undef ifdebug
|
|
|
|
# ifdef NFS_DEBUG
|
|
|
|
# define ifdebug(fac) if (unlikely(nfs_debug & NFSDBG_##fac))
|
2012-03-21 02:12:46 +08:00
|
|
|
# define NFS_IFDEBUG(x) x
|
2005-04-17 06:20:36 +08:00
|
|
|
# else
|
|
|
|
# define ifdebug(fac) if (0)
|
2012-03-21 02:12:46 +08:00
|
|
|
# define NFS_IFDEBUG(x)
|
2005-04-17 06:20:36 +08:00
|
|
|
# endif
|
|
|
|
#endif
|